{
  "best_global_step": null,
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 14.951603498542275,
  "eval_steps": 500,
  "global_step": 1600,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3733.0,
      "completions/mean_length": 614.185302734375,
      "completions/mean_terminated_length": 534.6917724609375,
      "completions/min_length": 22.0,
      "completions/min_terminated_length": 22.0,
      "epoch": 0.009329446064139942,
      "grad_norm": 0.16490904986858368,
      "learning_rate": 1e-06,
      "loss": 0.011,
      "num_tokens": 554894.0,
      "reward": 0.5334821939468384,
      "reward_std": 0.2727942168712616,
      "rewards/verify_math_reward/mean": 0.5334821343421936,
      "rewards/verify_math_reward/std": 0.49915632605552673,
      "step": 1
    },
    {
      "clip_ratio/high_max": 0.0023652190211578272,
      "clip_ratio/high_mean": 0.0010097824670083355,
      "clip_ratio/low_mean": 0.0006767770846636267,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001686559604422655,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2135.0,
      "completions/mean_length": 582.6842041015625,
      "completions/mean_terminated_length": 530.959228515625,
      "completions/min_length": 43.0,
      "completions/min_terminated_length": 43.0,
      "epoch": 0.018658892128279883,
      "grad_norm": 0.1396065652370453,
      "learning_rate": 1e-06,
      "loss": 0.0003,
      "num_tokens": 1110987.0,
      "reward": 0.4654017984867096,
      "reward_std": 0.22120505571365356,
      "rewards/verify_math_reward/mean": 0.4654017984867096,
      "rewards/verify_math_reward/std": 0.4990801215171814,
      "step": 2
    },
    {
      "clip_ratio/high_max": 0.002458824819768779,
      "clip_ratio/high_mean": 0.0009520601961412467,
      "clip_ratio/low_mean": 0.0006422844571716269,
      "clip_ratio/low_min": 1.4744043255632278e-05,
      "clip_ratio/region_mean": 0.001594344670593273,
      "completions/clipped_ratio": 0.0078125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2504.0,
      "completions/mean_length": 560.5580444335938,
      "completions/mean_terminated_length": 532.7199096679688,
      "completions/min_length": 32.0,
      "completions/min_terminated_length": 32.0,
      "epoch": 0.027988338192419825,
      "grad_norm": 0.13528741896152496,
      "learning_rate": 1e-06,
      "loss": 0.0005,
      "num_tokens": 1683119.0,
      "reward": 0.5212053656578064,
      "reward_std": 0.22019615769386292,
      "rewards/verify_math_reward/mean": 0.5212053656578064,
      "rewards/verify_math_reward/std": 0.49982914328575134,
      "step": 3
    },
    {
      "clip_ratio/high_max": 0.002840214430761989,
      "clip_ratio/high_mean": 0.0011048212909372523,
      "clip_ratio/low_mean": 0.0006483338529506,
      "clip_ratio/low_min": 1.4692054719489533e-05,
      "clip_ratio/region_mean": 0.0017531551493448205,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3404.0,
      "completions/mean_length": 575.1796875,
      "completions/mean_terminated_length": 535.4413452148438,
      "completions/min_length": 14.0,
      "completions/min_terminated_length": 14.0,
      "epoch": 0.037317784256559766,
      "grad_norm": 0.14472410082817078,
      "learning_rate": 1e-06,
      "loss": 0.0054,
      "num_tokens": 2236952.0,
      "reward": 0.5301339626312256,
      "reward_std": 0.25397443771362305,
      "rewards/verify_math_reward/mean": 0.5301339030265808,
      "rewards/verify_math_reward/std": 0.49936985969543457,
      "step": 4
    },
    {
      "clip_ratio/high_max": 0.0023668790236115456,
      "clip_ratio/high_mean": 0.0011392345841159113,
      "clip_ratio/low_mean": 0.0007667491390748182,
      "clip_ratio/low_min": 4.886347505816957e-05,
      "clip_ratio/region_mean": 0.0019059837286476977,
      "completions/clipped_ratio": 0.0078125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3572.0,
      "completions/mean_length": 562.0592041015625,
      "completions/mean_terminated_length": 534.2328491210938,
      "completions/min_length": 19.0,
      "completions/min_terminated_length": 19.0,
      "epoch": 0.04664723032069971,
      "grad_norm": 0.1373066008090973,
      "learning_rate": 1e-06,
      "loss": -0.0007,
      "num_tokens": 2795453.0,
      "reward": 0.504464328289032,
      "reward_std": 0.2638465166091919,
      "rewards/verify_math_reward/mean": 0.5044642686843872,
      "rewards/verify_math_reward/std": 0.5002593398094177,
      "step": 5
    },
    {
      "clip_ratio/high_max": 0.0022958667468628846,
      "clip_ratio/high_mean": 0.0010660433836164884,
      "clip_ratio/low_mean": 0.0009721827082103118,
      "clip_ratio/low_min": 0.00018187476507591782,
      "clip_ratio/region_mean": 0.0020382261063787155,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3899.0,
      "completions/mean_length": 616.1272583007812,
      "completions/mean_terminated_length": 572.8745727539062,
      "completions/min_length": 114.0,
      "completions/min_terminated_length": 114.0,
      "epoch": 0.05597667638483965,
      "grad_norm": 0.1388639509677887,
      "learning_rate": 1e-06,
      "loss": -0.0016,
      "num_tokens": 3392871.0,
      "reward": 0.5022321939468384,
      "reward_std": 0.26367244124412537,
      "rewards/verify_math_reward/mean": 0.5022321343421936,
      "rewards/verify_math_reward/std": 0.5002743005752563,
      "step": 6
    },
    {
      "clip_ratio/high_max": 0.002521075904951431,
      "clip_ratio/high_mean": 0.0010430771653773263,
      "clip_ratio/low_mean": 0.000778143232309958,
      "clip_ratio/low_min": 2.4529329493816476e-05,
      "clip_ratio/region_mean": 0.0018212204086012207,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2758.0,
      "completions/mean_length": 563.1495971679688,
      "completions/mean_terminated_length": 515.1923217773438,
      "completions/min_length": 4.0,
      "completions/min_terminated_length": 4.0,
      "epoch": 0.0653061224489796,
      "grad_norm": 0.1458810567855835,
      "learning_rate": 1e-06,
      "loss": -0.0141,
      "num_tokens": 3946757.0,
      "reward": 0.543526828289032,
      "reward_std": 0.25032734870910645,
      "rewards/verify_math_reward/mean": 0.5435267686843872,
      "rewards/verify_math_reward/std": 0.49838000535964966,
      "step": 7
    },
    {
      "clip_ratio/high_max": 0.0021492620653589256,
      "clip_ratio/high_mean": 0.000913873342142324,
      "clip_ratio/low_mean": 0.0006230521576071624,
      "clip_ratio/low_min": 8.898491614672821e-05,
      "clip_ratio/region_mean": 0.0015369254542747512,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3248.0,
      "completions/mean_length": 625.1741333007812,
      "completions/mean_terminated_length": 574.07470703125,
      "completions/min_length": 72.0,
      "completions/min_terminated_length": 72.0,
      "epoch": 0.07463556851311953,
      "grad_norm": 0.12061502039432526,
      "learning_rate": 1e-06,
      "loss": 0.0052,
      "num_tokens": 4528057.0,
      "reward": 0.5167410969734192,
      "reward_std": 0.23930947482585907,
      "rewards/verify_math_reward/mean": 0.5167410969734192,
      "rewards/verify_math_reward/std": 0.4999987483024597,
      "step": 8
    },
    {
      "clip_ratio/high_max": 0.002152189288608497,
      "clip_ratio/high_mean": 0.0009677911584731191,
      "clip_ratio/low_mean": 0.000675404578942107,
      "clip_ratio/low_min": 5.260602483758703e-05,
      "clip_ratio/region_mean": 0.0016431957192253321,
      "completions/clipped_ratio": 0.024553571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2164.0,
      "completions/mean_length": 657.6529541015625,
      "completions/mean_terminated_length": 571.1040649414062,
      "completions/min_length": 29.0,
      "completions/min_terminated_length": 29.0,
      "epoch": 0.08396501457725948,
      "grad_norm": 0.12534275650978088,
      "learning_rate": 1e-06,
      "loss": 0.0073,
      "num_tokens": 5116178.0,
      "reward": 0.5234375,
      "reward_std": 0.21925115585327148,
      "rewards/verify_math_reward/mean": 0.5234375,
      "rewards/verify_math_reward/std": 0.49972933530807495,
      "step": 9
    },
    {
      "clip_ratio/high_max": 0.0025328486663056538,
      "clip_ratio/high_mean": 0.0010626052535371855,
      "clip_ratio/low_mean": 0.0007675631386518944,
      "clip_ratio/low_min": 5.274859086057404e-05,
      "clip_ratio/region_mean": 0.0018301683667232282,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4041.0,
      "completions/mean_length": 626.6986694335938,
      "completions/mean_terminated_length": 575.6217041015625,
      "completions/min_length": 147.0,
      "completions/min_terminated_length": 147.0,
      "epoch": 0.09329446064139942,
      "grad_norm": 0.12725567817687988,
      "learning_rate": 1e-06,
      "loss": 0.0042,
      "num_tokens": 5719692.0,
      "reward": 0.5011160969734192,
      "reward_std": 0.24949504435062408,
      "rewards/verify_math_reward/mean": 0.5011160969734192,
      "rewards/verify_math_reward/std": 0.5002780556678772,
      "step": 10
    },
    {
      "clip_ratio/high_max": 0.0017157621405203827,
      "clip_ratio/high_mean": 0.000640581754851155,
      "clip_ratio/low_mean": 0.0005166652979369246,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011572470793907996,
      "completions/clipped_ratio": 0.0234375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3596.0,
      "completions/mean_length": 667.1060791015625,
      "completions/mean_terminated_length": 584.8125610351562,
      "completions/min_length": 112.0,
      "completions/min_terminated_length": 112.0,
      "epoch": 0.10262390670553936,
      "grad_norm": 0.10232733190059662,
      "learning_rate": 1e-06,
      "loss": 0.004,
      "num_tokens": 6315747.0,
      "reward": 0.4988839626312256,
      "reward_std": 0.17627158761024475,
      "rewards/verify_math_reward/mean": 0.4988839328289032,
      "rewards/verify_math_reward/std": 0.5002779960632324,
      "step": 11
    },
    {
      "clip_ratio/high_max": 0.0024113980362017173,
      "clip_ratio/high_mean": 0.0008544203719793586,
      "clip_ratio/low_mean": 0.0004581312168738805,
      "clip_ratio/low_min": 1.3769552424491849e-05,
      "clip_ratio/region_mean": 0.001312551601586165,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2802.0,
      "completions/mean_length": 609.5145263671875,
      "completions/mean_terminated_length": 529.9143676757812,
      "completions/min_length": 15.0,
      "completions/min_terminated_length": 15.0,
      "epoch": 0.1119533527696793,
      "grad_norm": 0.11482333391904831,
      "learning_rate": 1e-06,
      "loss": 0.0026,
      "num_tokens": 6882432.0,
      "reward": 0.5345982313156128,
      "reward_std": 0.18442019820213318,
      "rewards/verify_math_reward/mean": 0.5345982313156128,
      "rewards/verify_math_reward/std": 0.4990801215171814,
      "step": 12
    },
    {
      "clip_ratio/high_max": 0.002209904996561818,
      "clip_ratio/high_mean": 0.0009161502002825728,
      "clip_ratio/low_mean": 0.0005432841262518195,
      "clip_ratio/low_min": 2.8014343115501106e-05,
      "clip_ratio/region_mean": 0.0014594343301723711,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2898.0,
      "completions/mean_length": 630.6217041015625,
      "completions/mean_terminated_length": 579.6024780273438,
      "completions/min_length": 57.0,
      "completions/min_terminated_length": 57.0,
      "epoch": 0.12128279883381925,
      "grad_norm": 0.11720063537359238,
      "learning_rate": 1e-06,
      "loss": 0.0062,
      "num_tokens": 7482213.0,
      "reward": 0.5625,
      "reward_std": 0.2191762924194336,
      "rewards/verify_math_reward/mean": 0.5625,
      "rewards/verify_math_reward/std": 0.49635544419288635,
      "step": 13
    },
    {
      "clip_ratio/high_max": 0.002542078284022864,
      "clip_ratio/high_mean": 0.0010691071147448383,
      "clip_ratio/low_mean": 0.0006841068716312293,
      "clip_ratio/low_min": 2.551020406826865e-05,
      "clip_ratio/region_mean": 0.0017532139463583007,
      "completions/clipped_ratio": 0.021205357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3292.0,
      "completions/mean_length": 602.5569458007812,
      "completions/mean_terminated_length": 526.8722534179688,
      "completions/min_length": 123.0,
      "completions/min_terminated_length": 123.0,
      "epoch": 0.1306122448979592,
      "grad_norm": 0.13414473831653595,
      "learning_rate": 1e-06,
      "loss": 0.0037,
      "num_tokens": 8040392.0,
      "reward": 0.5569196939468384,
      "reward_std": 0.24348580837249756,
      "rewards/verify_math_reward/mean": 0.5569196343421936,
      "rewards/verify_math_reward/std": 0.49702703952789307,
      "step": 14
    },
    {
      "clip_ratio/high_max": 0.0017708268351270817,
      "clip_ratio/high_mean": 0.0006339543433568906,
      "clip_ratio/low_mean": 0.0003903240321960766,
      "clip_ratio/low_min": 1.7149128325399943e-05,
      "clip_ratio/region_mean": 0.001024278361001052,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2646.0,
      "completions/mean_length": 640.546875,
      "completions/mean_terminated_length": 573.7178344726562,
      "completions/min_length": 165.0,
      "completions/min_terminated_length": 165.0,
      "epoch": 0.13994169096209913,
      "grad_norm": 0.10310615599155426,
      "learning_rate": 1e-06,
      "loss": -0.0012,
      "num_tokens": 8638154.0,
      "reward": 0.5613839626312256,
      "reward_std": 0.15480685234069824,
      "rewards/verify_math_reward/mean": 0.5613839030265808,
      "rewards/verify_math_reward/std": 0.496494859457016,
      "step": 15
    },
    {
      "clip_ratio/high_max": 0.0022275920855463482,
      "clip_ratio/high_mean": 0.0008802472275419859,
      "clip_ratio/low_mean": 0.0005461570026454865,
      "clip_ratio/low_min": 3.0013235118531156e-05,
      "clip_ratio/region_mean": 0.0014264042183640413,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3389.0,
      "completions/mean_length": 637.6796875,
      "completions/mean_terminated_length": 574.8011474609375,
      "completions/min_length": 82.0,
      "completions/min_terminated_length": 82.0,
      "epoch": 0.14927113702623906,
      "grad_norm": 0.12171747535467148,
      "learning_rate": 1e-06,
      "loss": 0.0037,
      "num_tokens": 9235443.0,
      "reward": 0.559151828289032,
      "reward_std": 0.19707919657230377,
      "rewards/verify_math_reward/mean": 0.5591517686843872,
      "rewards/verify_math_reward/std": 0.496766060590744,
      "step": 16
    },
    {
      "clip_ratio/high_max": 0.002297037899552379,
      "clip_ratio/high_mean": 0.0009401563238498056,
      "clip_ratio/low_mean": 0.0006466075446951436,
      "clip_ratio/low_min": 3.81541540264152e-05,
      "clip_ratio/region_mean": 0.0015867638649069704,
      "completions/clipped_ratio": 0.0022321428571429047,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2212.0,
      "completions/mean_length": 565.9241333007812,
      "completions/mean_terminated_length": 558.02685546875,
      "completions/min_length": 108.0,
      "completions/min_terminated_length": 108.0,
      "epoch": 0.158600583090379,
      "grad_norm": 0.12709392607212067,
      "learning_rate": 1e-06,
      "loss": 0.0186,
      "num_tokens": 9819943.0,
      "reward": 0.5457589626312256,
      "reward_std": 0.24058938026428223,
      "rewards/verify_math_reward/mean": 0.5457589030265808,
      "rewards/verify_math_reward/std": 0.4981797933578491,
      "step": 17
    },
    {
      "clip_ratio/high_max": 0.0021680838872271124,
      "clip_ratio/high_mean": 0.0008981921473605325,
      "clip_ratio/low_mean": 0.0006542301634908654,
      "clip_ratio/low_min": 1.641281596675981e-05,
      "clip_ratio/region_mean": 0.0015524223053944297,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2533.0,
      "completions/mean_length": 634.5614013671875,
      "completions/mean_terminated_length": 575.6265869140625,
      "completions/min_length": 54.0,
      "completions/min_terminated_length": 54.0,
      "epoch": 0.16793002915451896,
      "grad_norm": 0.12446357309818268,
      "learning_rate": 1e-06,
      "loss": 0.0034,
      "num_tokens": 10408302.0,
      "reward": 0.5558035969734192,
      "reward_std": 0.21899083256721497,
      "rewards/verify_math_reward/mean": 0.5558035969734192,
      "rewards/verify_math_reward/std": 0.49715372920036316,
      "step": 18
    },
    {
      "clip_ratio/high_max": 0.00250030163442716,
      "clip_ratio/high_mean": 0.0011050563662138302,
      "clip_ratio/low_mean": 0.0007577011656394461,
      "clip_ratio/low_min": 3.5323871998116374e-05,
      "clip_ratio/region_mean": 0.0018627575118443929,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3468.0,
      "completions/mean_length": 600.9152221679688,
      "completions/mean_terminated_length": 549.4586791992188,
      "completions/min_length": 102.0,
      "completions/min_terminated_length": 102.0,
      "epoch": 0.1772594752186589,
      "grad_norm": 0.13770441710948944,
      "learning_rate": 1e-06,
      "loss": -0.0021,
      "num_tokens": 10987930.0,
      "reward": 0.6272321939468384,
      "reward_std": 0.27343112230300903,
      "rewards/verify_math_reward/mean": 0.6272321343421936,
      "rewards/verify_math_reward/std": 0.4838111698627472,
      "step": 19
    },
    {
      "clip_ratio/high_max": 0.0021293621357472148,
      "clip_ratio/high_mean": 0.001092827948014019,
      "clip_ratio/low_mean": 0.0006099227475715452,
      "clip_ratio/low_min": 4.898774204775691e-05,
      "clip_ratio/region_mean": 0.0017027506837621331,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3080.0,
      "completions/mean_length": 557.0658569335938,
      "completions/mean_terminated_length": 525.18359375,
      "completions/min_length": 11.0,
      "completions/min_terminated_length": 11.0,
      "epoch": 0.18658892128279883,
      "grad_norm": 0.13200710713863373,
      "learning_rate": 1e-06,
      "loss": 0.0017,
      "num_tokens": 11545461.0,
      "reward": 0.645089328289032,
      "reward_std": 0.22631458938121796,
      "rewards/verify_math_reward/mean": 0.6450892686843872,
      "rewards/verify_math_reward/std": 0.4787535071372986,
      "step": 20
    },
    {
      "clip_ratio/high_max": 0.0019469308681436814,
      "clip_ratio/high_mean": 0.0007061947162583238,
      "clip_ratio/low_mean": 0.0005188142467886792,
      "clip_ratio/low_min": 3.999899490736425e-05,
      "clip_ratio/region_mean": 0.0012250089639564976,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3670.0,
      "completions/mean_length": 568.4888916015625,
      "completions/mean_terminated_length": 536.70947265625,
      "completions/min_length": 149.0,
      "completions/min_terminated_length": 149.0,
      "epoch": 0.19591836734693877,
      "grad_norm": 0.10863891988992691,
      "learning_rate": 1e-06,
      "loss": 0.0056,
      "num_tokens": 12104251.0,
      "reward": 0.59375,
      "reward_std": 0.17186929285526276,
      "rewards/verify_math_reward/mean": 0.59375,
      "rewards/verify_math_reward/std": 0.4914066195487976,
      "step": 21
    },
    {
      "clip_ratio/high_max": 0.0017850339609140065,
      "clip_ratio/high_mean": 0.0007677656249143183,
      "clip_ratio/low_mean": 0.0005274809600450681,
      "clip_ratio/low_min": 1.4796401956118643e-05,
      "clip_ratio/region_mean": 0.0012952466131537221,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4037.0,
      "completions/mean_length": 654.8203125,
      "completions/mean_terminated_length": 596.23046875,
      "completions/min_length": 134.0,
      "completions/min_terminated_length": 134.0,
      "epoch": 0.20524781341107873,
      "grad_norm": 0.10651501268148422,
      "learning_rate": 1e-06,
      "loss": 0.0001,
      "num_tokens": 12724962.0,
      "reward": 0.5212053656578064,
      "reward_std": 0.1947491466999054,
      "rewards/verify_math_reward/mean": 0.5212053656578064,
      "rewards/verify_math_reward/std": 0.49982914328575134,
      "step": 22
    },
    {
      "clip_ratio/high_max": 0.0021366372529882938,
      "clip_ratio/high_mean": 0.0008756777497183066,
      "clip_ratio/low_mean": 0.0005200504274398554,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0013957281553302892,
      "completions/clipped_ratio": 0.025669642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4041.0,
      "completions/mean_length": 655.4241333007812,
      "completions/mean_terminated_length": 564.7789306640625,
      "completions/min_length": 103.0,
      "completions/min_terminated_length": 103.0,
      "epoch": 0.21457725947521866,
      "grad_norm": 7.990764617919922,
      "learning_rate": 1e-06,
      "loss": 0.0095,
      "num_tokens": 13302350.0,
      "reward": 0.6350446939468384,
      "reward_std": 0.1891171634197235,
      "rewards/verify_math_reward/mean": 0.6350446343421936,
      "rewards/verify_math_reward/std": 0.481686532497406,
      "step": 23
    },
    {
      "clip_ratio/high_max": 0.0021235865060589276,
      "clip_ratio/high_mean": 0.000820696081063943,
      "clip_ratio/low_mean": 0.0004587074627124821,
      "clip_ratio/low_min": 1.581077594892122e-05,
      "clip_ratio/region_mean": 0.0012794035465049092,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3345.0,
      "completions/mean_length": 644.8783569335938,
      "completions/mean_terminated_length": 590.0986328125,
      "completions/min_length": 148.0,
      "completions/min_terminated_length": 148.0,
      "epoch": 0.2239067055393586,
      "grad_norm": 0.11133348941802979,
      "learning_rate": 1e-06,
      "loss": 0.0114,
      "num_tokens": 13907113.0,
      "reward": 0.5223214626312256,
      "reward_std": 0.19643910229206085,
      "rewards/verify_math_reward/mean": 0.5223214030265808,
      "rewards/verify_math_reward/std": 0.49978047609329224,
      "step": 24
    },
    {
      "clip_ratio/high_max": 0.0021659615376847796,
      "clip_ratio/high_mean": 0.0007778075250826078,
      "clip_ratio/low_mean": 0.0004985564892194816,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012763640042976476,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3484.0,
      "completions/mean_length": 669.247802734375,
      "completions/mean_terminated_length": 610.903564453125,
      "completions/min_length": 110.0,
      "completions/min_terminated_length": 110.0,
      "epoch": 0.23323615160349853,
      "grad_norm": 0.1110498383641243,
      "learning_rate": 1e-06,
      "loss": 0.007,
      "num_tokens": 14530551.0,
      "reward": 0.5401785969734192,
      "reward_std": 0.18032734096050262,
      "rewards/verify_math_reward/mean": 0.5401785969734192,
      "rewards/verify_math_reward/std": 0.49866142868995667,
      "step": 25
    },
    {
      "clip_ratio/high_max": 0.0020686442585429177,
      "clip_ratio/high_mean": 0.0008784457386354916,
      "clip_ratio/low_mean": 0.0005849237613801961,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0014633694845542777,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2760.0,
      "completions/mean_length": 618.2890625,
      "completions/mean_terminated_length": 559.0772094726562,
      "completions/min_length": 164.0,
      "completions/min_terminated_length": 164.0,
      "epoch": 0.2425655976676385,
      "grad_norm": 0.12153169512748718,
      "learning_rate": 1e-06,
      "loss": 0.0067,
      "num_tokens": 15110570.0,
      "reward": 0.512276828289032,
      "reward_std": 0.21564117074012756,
      "rewards/verify_math_reward/mean": 0.5122767686843872,
      "rewards/verify_math_reward/std": 0.500128448009491,
      "step": 26
    },
    {
      "clip_ratio/high_max": 0.002358836092753336,
      "clip_ratio/high_mean": 0.0010083036395371892,
      "clip_ratio/low_mean": 0.0005340912011888577,
      "clip_ratio/low_min": 4.825765336136101e-05,
      "clip_ratio/region_mean": 0.0015423948170791846,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2541.0,
      "completions/mean_length": 587.0201416015625,
      "completions/mean_terminated_length": 547.4153442382812,
      "completions/min_length": 21.0,
      "completions/min_terminated_length": 21.0,
      "epoch": 0.2518950437317784,
      "grad_norm": 0.12810222804546356,
      "learning_rate": 1e-06,
      "loss": 0.0079,
      "num_tokens": 15683700.0,
      "reward": 0.5758928656578064,
      "reward_std": 0.2073742300271988,
      "rewards/verify_math_reward/mean": 0.5758928656578064,
      "rewards/verify_math_reward/std": 0.49448272585868835,
      "step": 27
    },
    {
      "clip_ratio/high_max": 0.0020375716740090866,
      "clip_ratio/high_mean": 0.0008536390269000549,
      "clip_ratio/low_mean": 0.0005751541275458294,
      "clip_ratio/low_min": 6.23652895228588e-05,
      "clip_ratio/region_mean": 0.001428793155355379,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4090.0,
      "completions/mean_length": 590.7042846679688,
      "completions/mean_terminated_length": 543.12109375,
      "completions/min_length": 134.0,
      "completions/min_terminated_length": 134.0,
      "epoch": 0.2612244897959184,
      "grad_norm": 0.1335379183292389,
      "learning_rate": 1e-06,
      "loss": 0.0076,
      "num_tokens": 16253811.0,
      "reward": 0.5546875,
      "reward_std": 0.21373297274112701,
      "rewards/verify_math_reward/mean": 0.5546875,
      "rewards/verify_math_reward/std": 0.4972778558731079,
      "step": 28
    },
    {
      "clip_ratio/high_max": 0.002163997258321615,
      "clip_ratio/high_mean": 0.0008853104918671306,
      "clip_ratio/low_mean": 0.0006560976398759522,
      "clip_ratio/low_min": 4.884715690423036e-05,
      "clip_ratio/region_mean": 0.001541408164484892,
      "completions/clipped_ratio": 0.0200892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2977.0,
      "completions/mean_length": 660.6160888671875,
      "completions/mean_terminated_length": 590.1868286132812,
      "completions/min_length": 92.0,
      "completions/min_terminated_length": 92.0,
      "epoch": 0.2705539358600583,
      "grad_norm": 0.12447265535593033,
      "learning_rate": 1e-06,
      "loss": -0.0152,
      "num_tokens": 16860979.0,
      "reward": 0.5055803656578064,
      "reward_std": 0.21511800587177277,
      "rewards/verify_math_reward/mean": 0.5055803656578064,
      "rewards/verify_math_reward/std": 0.5002480745315552,
      "step": 29
    },
    {
      "clip_ratio/high_max": 0.0018319295995752327,
      "clip_ratio/high_mean": 0.000857271057611797,
      "clip_ratio/low_mean": 0.0006075464270907105,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.00146481746196514,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2588.0,
      "completions/mean_length": 587.84375,
      "completions/mean_terminated_length": 540.2217407226562,
      "completions/min_length": 120.0,
      "completions/min_terminated_length": 120.0,
      "epoch": 0.27988338192419826,
      "grad_norm": 0.12270282208919525,
      "learning_rate": 1e-06,
      "loss": -0.0004,
      "num_tokens": 17433863.0,
      "reward": 0.5703125,
      "reward_std": 0.21109367907047272,
      "rewards/verify_math_reward/mean": 0.5703125,
      "rewards/verify_math_reward/std": 0.49530795216560364,
      "step": 30
    },
    {
      "clip_ratio/high_max": 0.0020136640159762464,
      "clip_ratio/high_mean": 0.0009078333459910937,
      "clip_ratio/low_mean": 0.0005748930125264451,
      "clip_ratio/low_min": 4.280285611457657e-05,
      "clip_ratio/region_mean": 0.0014827263585175388,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4096.0,
      "completions/mean_length": 655.3795166015625,
      "completions/mean_terminated_length": 616.5462646484375,
      "completions/min_length": 159.0,
      "completions/min_terminated_length": 159.0,
      "epoch": 0.2892128279883382,
      "grad_norm": 0.11932247877120972,
      "learning_rate": 1e-06,
      "loss": -0.0032,
      "num_tokens": 18065003.0,
      "reward": 0.5457589626312256,
      "reward_std": 0.23420287668704987,
      "rewards/verify_math_reward/mean": 0.5457589030265808,
      "rewards/verify_math_reward/std": 0.4981797933578491,
      "step": 31
    },
    {
      "clip_ratio/high_max": 0.002112112160830293,
      "clip_ratio/high_mean": 0.000947833490499761,
      "clip_ratio/low_mean": 0.0008170058918040013,
      "clip_ratio/low_min": 4.710533085017232e-05,
      "clip_ratio/region_mean": 0.0017648393550189212,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3921.0,
      "completions/mean_length": 599.8381958007812,
      "completions/mean_terminated_length": 552.3789672851562,
      "completions/min_length": 5.0,
      "completions/min_terminated_length": 5.0,
      "epoch": 0.29854227405247813,
      "grad_norm": 0.13691404461860657,
      "learning_rate": 1e-06,
      "loss": -0.0,
      "num_tokens": 18644146.0,
      "reward": 0.5625,
      "reward_std": 0.24716567993164062,
      "rewards/verify_math_reward/mean": 0.5625,
      "rewards/verify_math_reward/std": 0.49635544419288635,
      "step": 32
    },
    {
      "clip_ratio/high_max": 0.002308595256181434,
      "clip_ratio/high_mean": 0.0008604364102211548,
      "clip_ratio/low_mean": 0.0007807903384673409,
      "clip_ratio/low_min": 7.305674625968095e-05,
      "clip_ratio/region_mean": 0.0016412267432315275,
      "completions/clipped_ratio": 0.005580357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3889.0,
      "completions/mean_length": 547.2835083007812,
      "completions/mean_terminated_length": 527.3692626953125,
      "completions/min_length": 3.0,
      "completions/min_terminated_length": 3.0,
      "epoch": 0.30787172011661806,
      "grad_norm": 0.1319669634103775,
      "learning_rate": 1e-06,
      "loss": 0.0229,
      "num_tokens": 19200568.0,
      "reward": 0.5334821939468384,
      "reward_std": 0.23101183772087097,
      "rewards/verify_math_reward/mean": 0.5334821343421936,
      "rewards/verify_math_reward/std": 0.49915632605552673,
      "step": 33
    },
    {
      "clip_ratio/high_max": 0.0020851020599366166,
      "clip_ratio/high_mean": 0.0008397549772780621,
      "clip_ratio/low_mean": 0.0007184331880125683,
      "clip_ratio/low_min": 1.4137072867015377e-05,
      "clip_ratio/region_mean": 0.0015581881743855774,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3786.0,
      "completions/mean_length": 639.59375,
      "completions/mean_terminated_length": 572.7462768554688,
      "completions/min_length": 151.0,
      "completions/min_terminated_length": 151.0,
      "epoch": 0.317201166180758,
      "grad_norm": 0.1240430399775505,
      "learning_rate": 1e-06,
      "loss": -0.0119,
      "num_tokens": 19787404.0,
      "reward": 0.546875,
      "reward_std": 0.21538084745407104,
      "rewards/verify_math_reward/mean": 0.546875,
      "rewards/verify_math_reward/std": 0.4980759024620056,
      "step": 34
    },
    {
      "clip_ratio/high_max": 0.0017645746302150656,
      "clip_ratio/high_mean": 0.0007314048471016577,
      "clip_ratio/low_mean": 0.0006384962380252546,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0013699011033168063,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3674.0,
      "completions/mean_length": 615.6574096679688,
      "completions/mean_terminated_length": 572.3988647460938,
      "completions/min_length": 155.0,
      "completions/min_terminated_length": 155.0,
      "epoch": 0.32653061224489793,
      "grad_norm": 0.12025720626115799,
      "learning_rate": 1e-06,
      "loss": 0.0258,
      "num_tokens": 20377633.0,
      "reward": 0.59375,
      "reward_std": 0.1985117793083191,
      "rewards/verify_math_reward/mean": 0.59375,
      "rewards/verify_math_reward/std": 0.4914066195487976,
      "step": 35
    },
    {
      "clip_ratio/high_max": 0.0021113943730597384,
      "clip_ratio/high_mean": 0.0008957727495726431,
      "clip_ratio/low_mean": 0.0006973784638830693,
      "clip_ratio/low_min": 2.8803593522752635e-05,
      "clip_ratio/region_mean": 0.0015931512170936912,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3386.0,
      "completions/mean_length": 646.8560791015625,
      "completions/mean_terminated_length": 592.1077270507812,
      "completions/min_length": 164.0,
      "completions/min_terminated_length": 164.0,
      "epoch": 0.3358600583090379,
      "grad_norm": 0.11791989207267761,
      "learning_rate": 1e-06,
      "loss": 0.0068,
      "num_tokens": 20992696.0,
      "reward": 0.5345982313156128,
      "reward_std": 0.22074860334396362,
      "rewards/verify_math_reward/mean": 0.5345982313156128,
      "rewards/verify_math_reward/std": 0.4990801215171814,
      "step": 36
    },
    {
      "clip_ratio/high_max": 0.0023939193997648545,
      "clip_ratio/high_mean": 0.0010083259876410011,
      "clip_ratio/low_mean": 0.0005839945970365079,
      "clip_ratio/low_min": 1.840942604758311e-05,
      "clip_ratio/region_mean": 0.0015923206083243713,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3996.0,
      "completions/mean_length": 611.78125,
      "completions/mean_terminated_length": 548.4318237304688,
      "completions/min_length": 125.0,
      "completions/min_terminated_length": 125.0,
      "epoch": 0.34518950437317786,
      "grad_norm": 0.12318096309900284,
      "learning_rate": 1e-06,
      "loss": 0.01,
      "num_tokens": 21552044.0,
      "reward": 0.5535714626312256,
      "reward_std": 0.20275256037712097,
      "rewards/verify_math_reward/mean": 0.5535714030265808,
      "rewards/verify_math_reward/std": 0.4973994791507721,
      "step": 37
    },
    {
      "clip_ratio/high_max": 0.0021015226702729706,
      "clip_ratio/high_mean": 0.0008463711510557914,
      "clip_ratio/low_mean": 0.0005464616351673612,
      "clip_ratio/low_min": 4.9030876652977895e-05,
      "clip_ratio/region_mean": 0.001392832778947195,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2605.0,
      "completions/mean_length": 620.0279541015625,
      "completions/mean_terminated_length": 560.8456420898438,
      "completions/min_length": 136.0,
      "completions/min_terminated_length": 136.0,
      "epoch": 0.3545189504373178,
      "grad_norm": 0.11765076965093613,
      "learning_rate": 1e-06,
      "loss": -0.0079,
      "num_tokens": 22135485.0,
      "reward": 0.543526828289032,
      "reward_std": 0.20069055259227753,
      "rewards/verify_math_reward/mean": 0.5435267686843872,
      "rewards/verify_math_reward/std": 0.49838000535964966,
      "step": 38
    },
    {
      "clip_ratio/high_max": 0.0019079652411164716,
      "clip_ratio/high_mean": 0.000800685964350123,
      "clip_ratio/low_mean": 0.0005495621308000409,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0013502480833267327,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2939.0,
      "completions/mean_length": 594.443115234375,
      "completions/mean_terminated_length": 550.9208984375,
      "completions/min_length": 44.0,
      "completions/min_terminated_length": 44.0,
      "epoch": 0.3638483965014577,
      "grad_norm": 0.12217120081186295,
      "learning_rate": 1e-06,
      "loss": 0.0003,
      "num_tokens": 22718914.0,
      "reward": 0.53125,
      "reward_std": 0.21079127490520477,
      "rewards/verify_math_reward/mean": 0.53125,
      "rewards/verify_math_reward/std": 0.4993011951446533,
      "step": 39
    },
    {
      "clip_ratio/high_max": 0.0018332536146772327,
      "clip_ratio/high_mean": 0.0008255630032181216,
      "clip_ratio/low_mean": 0.0006326256452666712,
      "clip_ratio/low_min": 5.133819468028378e-05,
      "clip_ratio/region_mean": 0.0014581886134692468,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2932.0,
      "completions/mean_length": 604.0971069335938,
      "completions/mean_terminated_length": 556.6957397460938,
      "completions/min_length": 155.0,
      "completions/min_terminated_length": 155.0,
      "epoch": 0.37317784256559766,
      "grad_norm": 0.11948627978563309,
      "learning_rate": 1e-06,
      "loss": -0.0027,
      "num_tokens": 23313385.0,
      "reward": 0.5267857313156128,
      "reward_std": 0.2077540010213852,
      "rewards/verify_math_reward/mean": 0.5267857313156128,
      "rewards/verify_math_reward/std": 0.4995608627796173,
      "step": 40
    },
    {
      "clip_ratio/high_max": 0.0022066068850108422,
      "clip_ratio/high_mean": 0.0007734388218523236,
      "clip_ratio/low_mean": 0.0008182028213923331,
      "clip_ratio/low_min": 9.753470476425719e-05,
      "clip_ratio/region_mean": 0.0015916416450636461,
      "completions/clipped_ratio": 0.0200892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3936.0,
      "completions/mean_length": 679.4342041015625,
      "completions/mean_terminated_length": 609.3906860351562,
      "completions/min_length": 149.0,
      "completions/min_terminated_length": 149.0,
      "epoch": 0.3825072886297376,
      "grad_norm": 0.11911804974079132,
      "learning_rate": 1e-06,
      "loss": 0.0065,
      "num_tokens": 23950214.0,
      "reward": 0.5,
      "reward_std": 0.21640115976333618,
      "rewards/verify_math_reward/mean": 0.5,
      "rewards/verify_math_reward/std": 0.5002792477607727,
      "step": 41
    },
    {
      "clip_ratio/high_max": 0.001551356423078687,
      "clip_ratio/high_mean": 0.0005771869509771932,
      "clip_ratio/low_mean": 0.00043534704309422523,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010125339940714184,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3311.0,
      "completions/mean_length": 589.7332763671875,
      "completions/mean_terminated_length": 554.1566772460938,
      "completions/min_length": 152.0,
      "completions/min_terminated_length": 152.0,
      "epoch": 0.39183673469387753,
      "grad_norm": 0.1045951321721077,
      "learning_rate": 1e-06,
      "loss": -0.0094,
      "num_tokens": 24532951.0,
      "reward": 0.5290178656578064,
      "reward_std": 0.155109241604805,
      "rewards/verify_math_reward/mean": 0.5290178656578064,
      "rewards/verify_math_reward/std": 0.49943602085113525,
      "step": 42
    },
    {
      "clip_ratio/high_max": 0.0019523338414728642,
      "clip_ratio/high_mean": 0.0007970899187057512,
      "clip_ratio/low_mean": 0.0004689718434747192,
      "clip_ratio/low_min": 2.1147014194866642e-05,
      "clip_ratio/region_mean": 0.0012660617976507638,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3754.0,
      "completions/mean_length": 601.4252319335938,
      "completions/mean_terminated_length": 557.9898681640625,
      "completions/min_length": 170.0,
      "completions/min_terminated_length": 170.0,
      "epoch": 0.40116618075801747,
      "grad_norm": 0.12115523964166641,
      "learning_rate": 1e-06,
      "loss": 0.0073,
      "num_tokens": 25112908.0,
      "reward": 0.5758928656578064,
      "reward_std": 0.19962720572948456,
      "rewards/verify_math_reward/mean": 0.5758928656578064,
      "rewards/verify_math_reward/std": 0.49448275566101074,
      "step": 43
    },
    {
      "clip_ratio/high_max": 0.00213483496918343,
      "clip_ratio/high_mean": 0.0008525035227648914,
      "clip_ratio/low_mean": 0.0005053669019616791,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0013578704274550546,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1779.0,
      "completions/mean_length": 538.3817138671875,
      "completions/mean_terminated_length": 498.2279968261719,
      "completions/min_length": 131.0,
      "completions/min_terminated_length": 131.0,
      "epoch": 0.41049562682215746,
      "grad_norm": 0.1257464438676834,
      "learning_rate": 1e-06,
      "loss": 0.0014,
      "num_tokens": 25640530.0,
      "reward": 0.6116071939468384,
      "reward_std": 0.1928277462720871,
      "rewards/verify_math_reward/mean": 0.6116071343421936,
      "rewards/verify_math_reward/std": 0.48765692114830017,
      "step": 44
    },
    {
      "clip_ratio/high_max": 0.0021022484070272185,
      "clip_ratio/high_mean": 0.0008700824600964552,
      "clip_ratio/low_mean": 0.0005214876373429433,
      "clip_ratio/low_min": 2.5895944418152794e-05,
      "clip_ratio/region_mean": 0.0013915700983488932,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2456.0,
      "completions/mean_length": 639.310302734375,
      "completions/mean_terminated_length": 576.4613647460938,
      "completions/min_length": 102.0,
      "completions/min_terminated_length": 102.0,
      "epoch": 0.4198250728862974,
      "grad_norm": 0.11802743375301361,
      "learning_rate": 1e-06,
      "loss": 0.0146,
      "num_tokens": 26241208.0,
      "reward": 0.578125,
      "reward_std": 0.20256711542606354,
      "rewards/verify_math_reward/mean": 0.578125,
      "rewards/verify_math_reward/std": 0.4941346049308777,
      "step": 45
    },
    {
      "clip_ratio/high_max": 0.0019054479635087773,
      "clip_ratio/high_mean": 0.0008108138445095392,
      "clip_ratio/low_mean": 0.0006439466033043573,
      "clip_ratio/low_min": 3.09958895741147e-05,
      "clip_ratio/region_mean": 0.0014547604660037905,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2816.0,
      "completions/mean_length": 676.0971069335938,
      "completions/mean_terminated_length": 613.9170532226562,
      "completions/min_length": 153.0,
      "completions/min_terminated_length": 153.0,
      "epoch": 0.4291545189504373,
      "grad_norm": 0.11587009578943253,
      "learning_rate": 1e-06,
      "loss": -0.0007,
      "num_tokens": 26869903.0,
      "reward": 0.5636160969734192,
      "reward_std": 0.22180238366127014,
      "rewards/verify_math_reward/mean": 0.5636160969734192,
      "rewards/verify_math_reward/std": 0.49621346592903137,
      "step": 46
    },
    {
      "clip_ratio/high_max": 0.001860423286416335,
      "clip_ratio/high_mean": 0.000731741974959732,
      "clip_ratio/low_mean": 0.0005995130404699012,
      "clip_ratio/low_min": 3.8435670830949675e-05,
      "clip_ratio/region_mean": 0.001331254985416308,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2903.0,
      "completions/mean_length": 643.1373291015625,
      "completions/mean_terminated_length": 592.3023681640625,
      "completions/min_length": 107.0,
      "completions/min_terminated_length": 107.0,
      "epoch": 0.43848396501457726,
      "grad_norm": 0.11640927940607071,
      "learning_rate": 1e-06,
      "loss": 0.0099,
      "num_tokens": 27477802.0,
      "reward": 0.5569196939468384,
      "reward_std": 0.1954626888036728,
      "rewards/verify_math_reward/mean": 0.5569196343421936,
      "rewards/verify_math_reward/std": 0.4970270097255707,
      "step": 47
    },
    {
      "clip_ratio/high_max": 0.0017458675538364332,
      "clip_ratio/high_mean": 0.0006616584932999103,
      "clip_ratio/low_mean": 0.0005689935733244056,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012306520802667364,
      "completions/clipped_ratio": 0.029017857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4025.0,
      "completions/mean_length": 660.2277221679688,
      "completions/mean_terminated_length": 557.5494384765625,
      "completions/min_length": 131.0,
      "completions/min_terminated_length": 131.0,
      "epoch": 0.4478134110787172,
      "grad_norm": 0.12104179710149765,
      "learning_rate": 1e-06,
      "loss": -0.0029,
      "num_tokens": 28057518.0,
      "reward": 0.535714328289032,
      "reward_std": 0.17634011805057526,
      "rewards/verify_math_reward/mean": 0.5357142686843872,
      "rewards/verify_math_reward/std": 0.4990014135837555,
      "step": 48
    },
    {
      "clip_ratio/high_max": 0.002319449369679205,
      "clip_ratio/high_mean": 0.0009525951099931262,
      "clip_ratio/low_mean": 0.000717777469617431,
      "clip_ratio/low_min": 1.9379844161449e-05,
      "clip_ratio/region_mean": 0.0016703725850675255,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1987.0,
      "completions/mean_length": 606.1082763671875,
      "completions/mean_terminated_length": 558.7341918945312,
      "completions/min_length": 164.0,
      "completions/min_terminated_length": 164.0,
      "epoch": 0.45714285714285713,
      "grad_norm": 0.12610645592212677,
      "learning_rate": 1e-06,
      "loss": -0.0017,
      "num_tokens": 28634455.0,
      "reward": 0.515625,
      "reward_std": 0.2249245047569275,
      "rewards/verify_math_reward/mean": 0.515625,
      "rewards/verify_math_reward/std": 0.5000349283218384,
      "step": 49
    },
    {
      "clip_ratio/high_max": 0.0022506101013277657,
      "clip_ratio/high_mean": 0.0008784776182437781,
      "clip_ratio/low_mean": 0.0006992462167545455,
      "clip_ratio/low_min": 2.485473578417441e-05,
      "clip_ratio/region_mean": 0.0015777238113514613,
      "completions/clipped_ratio": 0.0323660714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3814.0,
      "completions/mean_length": 663.341552734375,
      "completions/mean_terminated_length": 548.5236206054688,
      "completions/min_length": 18.0,
      "completions/min_terminated_length": 18.0,
      "epoch": 0.46647230320699706,
      "grad_norm": 0.13359029591083527,
      "learning_rate": 1e-06,
      "loss": 0.0038,
      "num_tokens": 29212561.0,
      "reward": 0.5234375,
      "reward_std": 0.22292782366275787,
      "rewards/verify_math_reward/mean": 0.5234375,
      "rewards/verify_math_reward/std": 0.49972933530807495,
      "step": 50
    },
    {
      "clip_ratio/high_max": 0.002292481505719479,
      "clip_ratio/high_mean": 0.0009920787779265083,
      "clip_ratio/low_mean": 0.0006807221616327297,
      "clip_ratio/low_min": 2.8170768928248435e-05,
      "clip_ratio/region_mean": 0.0016728009868529625,
      "completions/clipped_ratio": 0.0267857142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3475.0,
      "completions/mean_length": 657.482177734375,
      "completions/mean_terminated_length": 562.843994140625,
      "completions/min_length": 123.0,
      "completions/min_terminated_length": 123.0,
      "epoch": 0.47580174927113705,
      "grad_norm": 0.13377641141414642,
      "learning_rate": 1e-06,
      "loss": 0.0075,
      "num_tokens": 29784569.0,
      "reward": 0.590401828289032,
      "reward_std": 0.2197408229112625,
      "rewards/verify_math_reward/mean": 0.5904017686843872,
      "rewards/verify_math_reward/std": 0.49203425645828247,
      "step": 51
    },
    {
      "clip_ratio/high_max": 0.002113297443429474,
      "clip_ratio/high_mean": 0.0008390244620386511,
      "clip_ratio/low_mean": 0.0005232069088378921,
      "clip_ratio/low_min": 4.2957304685842246e-05,
      "clip_ratio/region_mean": 0.001362231374514522,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3689.0,
      "completions/mean_length": 604.1261596679688,
      "completions/mean_terminated_length": 536.5927124023438,
      "completions/min_length": 134.0,
      "completions/min_terminated_length": 134.0,
      "epoch": 0.485131195335277,
      "grad_norm": 0.12396257370710373,
      "learning_rate": 1e-06,
      "loss": 0.0099,
      "num_tokens": 30335922.0,
      "reward": 0.6149553656578064,
      "reward_std": 0.1970124989748001,
      "rewards/verify_math_reward/mean": 0.6149553656578064,
      "rewards/verify_math_reward/std": 0.4868776500225067,
      "step": 52
    },
    {
      "clip_ratio/high_max": 0.0019683467708091484,
      "clip_ratio/high_mean": 0.0008121754190142383,
      "clip_ratio/low_mean": 0.0005306686509811698,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001342844061582582,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2558.0,
      "completions/mean_length": 594.0513916015625,
      "completions/mean_terminated_length": 514.09814453125,
      "completions/min_length": 122.0,
      "completions/min_terminated_length": 122.0,
      "epoch": 0.4944606413994169,
      "grad_norm": 0.11551974713802338,
      "learning_rate": 1e-06,
      "loss": 0.0059,
      "num_tokens": 30872024.0,
      "reward": 0.5736607313156128,
      "reward_std": 0.18309611082077026,
      "rewards/verify_math_reward/mean": 0.5736607313156128,
      "rewards/verify_math_reward/std": 0.4948205351829529,
      "step": 53
    },
    {
      "clip_ratio/high_max": 0.0018420130618324038,
      "clip_ratio/high_mean": 0.0008389095346501563,
      "clip_ratio/low_mean": 0.0005182864852031344,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0013571960516856052,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2842.0,
      "completions/mean_length": 557.5011596679688,
      "completions/mean_terminated_length": 489.06597900390625,
      "completions/min_length": 120.0,
      "completions/min_terminated_length": 120.0,
      "epoch": 0.5037900874635568,
      "grad_norm": 0.1274426281452179,
      "learning_rate": 1e-06,
      "loss": 0.0165,
      "num_tokens": 31391753.0,
      "reward": 0.5926339626312256,
      "reward_std": 0.19050613045692444,
      "rewards/verify_math_reward/mean": 0.5926339030265808,
      "rewards/verify_math_reward/std": 0.49161845445632935,
      "step": 54
    },
    {
      "clip_ratio/high_max": 0.0024072310770861804,
      "clip_ratio/high_mean": 0.0009133650200965349,
      "clip_ratio/low_mean": 0.0006398809582606191,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001553245987452101,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1989.0,
      "completions/mean_length": 547.6495971679688,
      "completions/mean_terminated_length": 507.6004638671875,
      "completions/min_length": 145.0,
      "completions/min_terminated_length": 145.0,
      "epoch": 0.5131195335276968,
      "grad_norm": 0.13147826492786407,
      "learning_rate": 1e-06,
      "loss": 0.0052,
      "num_tokens": 31935879.0,
      "reward": 0.598214328289032,
      "reward_std": 0.202678382396698,
      "rewards/verify_math_reward/mean": 0.5982142686843872,
      "rewards/verify_math_reward/std": 0.49053287506103516,
      "step": 55
    },
    {
      "clip_ratio/high_max": 0.002112068636051845,
      "clip_ratio/high_mean": 0.0007095787332218606,
      "clip_ratio/low_mean": 0.0006021747067279648,
      "clip_ratio/low_min": 3.432239009271143e-05,
      "clip_ratio/region_mean": 0.0013117534545017406,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2280.0,
      "completions/mean_length": 602.078125,
      "completions/mean_terminated_length": 546.6190795898438,
      "completions/min_length": 145.0,
      "completions/min_terminated_length": 145.0,
      "epoch": 0.5224489795918368,
      "grad_norm": 0.12339483201503754,
      "learning_rate": 1e-06,
      "loss": -0.0018,
      "num_tokens": 32509405.0,
      "reward": 0.527901828289032,
      "reward_std": 0.19297927618026733,
      "rewards/verify_math_reward/mean": 0.5279017686843872,
      "rewards/verify_math_reward/std": 0.49949970841407776,
      "step": 56
    },
    {
      "clip_ratio/high_max": 0.002014315490669105,
      "clip_ratio/high_mean": 0.000809934706921922,
      "clip_ratio/low_mean": 0.0005260051398181531,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0013359398253669497,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2168.0,
      "completions/mean_length": 570.6495971679688,
      "completions/mean_terminated_length": 506.55224609375,
      "completions/min_length": 125.0,
      "completions/min_terminated_length": 125.0,
      "epoch": 0.5317784256559767,
      "grad_norm": 0.12417443841695786,
      "learning_rate": 1e-06,
      "loss": 0.0103,
      "num_tokens": 33041923.0,
      "reward": 0.5892857313156128,
      "reward_std": 0.18193607032299042,
      "rewards/verify_math_reward/mean": 0.5892857313156128,
      "rewards/verify_math_reward/std": 0.49223825335502625,
      "step": 57
    },
    {
      "clip_ratio/high_max": 0.0021554861014010385,
      "clip_ratio/high_mean": 0.0008511697506037308,
      "clip_ratio/low_mean": 0.0005805591317766812,
      "clip_ratio/low_min": 3.1049397875904106e-05,
      "clip_ratio/region_mean": 0.0014317288696474861,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2829.0,
      "completions/mean_length": 604.8136596679688,
      "completions/mean_terminated_length": 537.2935180664062,
      "completions/min_length": 153.0,
      "completions/min_terminated_length": 153.0,
      "epoch": 0.5411078717201167,
      "grad_norm": 0.12504227459430695,
      "learning_rate": 1e-06,
      "loss": 0.016,
      "num_tokens": 33604068.0,
      "reward": 0.5491071939468384,
      "reward_std": 0.222071573138237,
      "rewards/verify_math_reward/mean": 0.5491071343421936,
      "rewards/verify_math_reward/std": 0.49786055088043213,
      "step": 58
    },
    {
      "clip_ratio/high_max": 0.0017629893991397694,
      "clip_ratio/high_mean": 0.0006641444506385596,
      "clip_ratio/low_mean": 0.0005931061241426505,
      "clip_ratio/low_min": 4.5553937525255606e-05,
      "clip_ratio/region_mean": 0.0012572505911521148,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4023.0,
      "completions/mean_length": 594.6607666015625,
      "completions/mean_terminated_length": 531.0,
      "completions/min_length": 134.0,
      "completions/min_terminated_length": 134.0,
      "epoch": 0.5504373177842565,
      "grad_norm": 0.16546018421649933,
      "learning_rate": 1e-06,
      "loss": -0.0022,
      "num_tokens": 34165492.0,
      "reward": 0.5814732313156128,
      "reward_std": 0.15947312116622925,
      "rewards/verify_math_reward/mean": 0.5814732313156128,
      "rewards/verify_math_reward/std": 0.4935929775238037,
      "step": 59
    },
    {
      "clip_ratio/high_max": 0.0016925646559684537,
      "clip_ratio/high_mean": 0.000671632211378892,
      "clip_ratio/low_mean": 0.000527624249116343,
      "clip_ratio/low_min": 1.2502500794653315e-05,
      "clip_ratio/region_mean": 0.0011992564395768568,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2217.0,
      "completions/mean_length": 555.5413208007812,
      "completions/mean_terminated_length": 519.6177978515625,
      "completions/min_length": 131.0,
      "completions/min_terminated_length": 131.0,
      "epoch": 0.5597667638483965,
      "grad_norm": 0.821980893611908,
      "learning_rate": 1e-06,
      "loss": -0.0153,
      "num_tokens": 34711401.0,
      "reward": 0.5770089626312256,
      "reward_std": 0.15878772735595703,
      "rewards/verify_math_reward/mean": 0.5770089030265808,
      "rewards/verify_math_reward/std": 0.4943099319934845,
      "step": 60
    },
    {
      "clip_ratio/high_max": 0.002033336590102408,
      "clip_ratio/high_mean": 0.0007129043224267662,
      "clip_ratio/low_mean": 0.0005907068980377517,
      "clip_ratio/low_min": 1.575299393152818e-05,
      "clip_ratio/region_mean": 0.0013036112177360337,
      "completions/clipped_ratio": 0.0267857142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3533.0,
      "completions/mean_length": 670.654052734375,
      "completions/mean_terminated_length": 576.37841796875,
      "completions/min_length": 156.0,
      "completions/min_terminated_length": 156.0,
      "epoch": 0.5690962099125364,
      "grad_norm": 0.11143632978200912,
      "learning_rate": 1e-06,
      "loss": -0.0013,
      "num_tokens": 35298027.0,
      "reward": 0.5245535969734192,
      "reward_std": 0.19756564497947693,
      "rewards/verify_math_reward/mean": 0.5245535969734192,
      "rewards/verify_math_reward/std": 0.4996756911277771,
      "step": 61
    },
    {
      "clip_ratio/high_max": 0.002405178027402144,
      "clip_ratio/high_mean": 0.001002484841592377,
      "clip_ratio/low_mean": 0.0007505635776396957,
      "clip_ratio/low_min": 6.556402513524517e-05,
      "clip_ratio/region_mean": 0.0017530484037706628,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3813.0,
      "completions/mean_length": 652.1495971679688,
      "completions/mean_terminated_length": 589.5340576171875,
      "completions/min_length": 148.0,
      "completions/min_terminated_length": 148.0,
      "epoch": 0.5784256559766764,
      "grad_norm": 0.13426145911216736,
      "learning_rate": 1e-06,
      "loss": 0.0179,
      "num_tokens": 35900785.0,
      "reward": 0.515625,
      "reward_std": 0.2607671618461609,
      "rewards/verify_math_reward/mean": 0.515625,
      "rewards/verify_math_reward/std": 0.5000349283218384,
      "step": 62
    },
    {
      "clip_ratio/high_max": 0.002157864728360437,
      "clip_ratio/high_mean": 0.0008281393202196341,
      "clip_ratio/low_mean": 0.0005194035911699757,
      "clip_ratio/low_min": 1.8590124454931356e-05,
      "clip_ratio/region_mean": 0.0013475428931997158,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2297.0,
      "completions/mean_length": 580.6674194335938,
      "completions/mean_terminated_length": 528.9127807617188,
      "completions/min_length": 14.0,
      "completions/min_terminated_length": 14.0,
      "epoch": 0.5877551020408164,
      "grad_norm": 0.12360631674528122,
      "learning_rate": 1e-06,
      "loss": -0.0046,
      "num_tokens": 36446471.0,
      "reward": 0.5848214626312256,
      "reward_std": 0.20023591816425323,
      "rewards/verify_math_reward/mean": 0.5848214030265808,
      "rewards/verify_math_reward/std": 0.49302801489830017,
      "step": 63
    },
    {
      "clip_ratio/high_max": 0.0023264320770977065,
      "clip_ratio/high_mean": 0.0009458195745537523,
      "clip_ratio/low_mean": 0.0006299283477346762,
      "clip_ratio/low_min": 1.771792994986754e-05,
      "clip_ratio/region_mean": 0.0015757478977320716,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3715.0,
      "completions/mean_length": 632.6730346679688,
      "completions/mean_terminated_length": 553.6015625,
      "completions/min_length": 126.0,
      "completions/min_terminated_length": 126.0,
      "epoch": 0.5970845481049563,
      "grad_norm": 0.13188877701759338,
      "learning_rate": 1e-06,
      "loss": -0.0005,
      "num_tokens": 37021194.0,
      "reward": 0.6261160969734192,
      "reward_std": 0.20310094952583313,
      "rewards/verify_math_reward/mean": 0.6261160969734192,
      "rewards/verify_math_reward/std": 0.48410359025001526,
      "step": 64
    },
    {
      "clip_ratio/high_max": 0.001885223107819911,
      "clip_ratio/high_mean": 0.0006958433959880495,
      "clip_ratio/low_mean": 0.0005695299187209457,
      "clip_ratio/low_min": 6.910875708854292e-05,
      "clip_ratio/region_mean": 0.001265373342903331,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3799.0,
      "completions/mean_length": 609.9017944335938,
      "completions/mean_terminated_length": 570.5552978515625,
      "completions/min_length": 99.0,
      "completions/min_terminated_length": 99.0,
      "epoch": 0.6064139941690962,
      "grad_norm": 0.12055953592061996,
      "learning_rate": 1e-06,
      "loss": 0.0326,
      "num_tokens": 37624386.0,
      "reward": 0.5491071939468384,
      "reward_std": 0.17585155367851257,
      "rewards/verify_math_reward/mean": 0.5491071343421936,
      "rewards/verify_math_reward/std": 0.49786055088043213,
      "step": 65
    },
    {
      "clip_ratio/high_max": 0.0020190873547107913,
      "clip_ratio/high_mean": 0.0008522569078195374,
      "clip_ratio/low_mean": 0.0006081984229240334,
      "clip_ratio/low_min": 3.8328054870362394e-05,
      "clip_ratio/region_mean": 0.0014604553034587298,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3076.0,
      "completions/mean_length": 584.3125,
      "completions/mean_terminated_length": 524.5221557617188,
      "completions/min_length": 115.0,
      "completions/min_terminated_length": 115.0,
      "epoch": 0.6157434402332361,
      "grad_norm": 0.12680187821388245,
      "learning_rate": 1e-06,
      "loss": 0.0162,
      "num_tokens": 38185026.0,
      "reward": 0.5915178656578064,
      "reward_std": 0.2031630128622055,
      "rewards/verify_math_reward/mean": 0.5915178656578064,
      "rewards/verify_math_reward/std": 0.49182769656181335,
      "step": 66
    },
    {
      "clip_ratio/high_max": 0.0018833797148545273,
      "clip_ratio/high_mean": 0.0007615654922119575,
      "clip_ratio/low_mean": 0.0005263847488095053,
      "clip_ratio/low_min": 1.594794593984261e-05,
      "clip_ratio/region_mean": 0.0012879502428404521,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3643.0,
      "completions/mean_length": 676.0111694335938,
      "completions/mean_terminated_length": 617.7821044921875,
      "completions/min_length": 132.0,
      "completions/min_terminated_length": 132.0,
      "epoch": 0.6250728862973761,
      "grad_norm": 0.10803545266389847,
      "learning_rate": 1e-06,
      "loss": -0.0159,
      "num_tokens": 38824964.0,
      "reward": 0.5334821939468384,
      "reward_std": 0.1932484656572342,
      "rewards/verify_math_reward/mean": 0.5334821343421936,
      "rewards/verify_math_reward/std": 0.49915632605552673,
      "step": 67
    },
    {
      "clip_ratio/high_max": 0.002362093233386986,
      "clip_ratio/high_mean": 0.00093513973752124,
      "clip_ratio/low_mean": 0.0006717547585139982,
      "clip_ratio/low_min": 8.071445336099714e-05,
      "clip_ratio/region_mean": 0.001606894515134627,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2535.0,
      "completions/mean_length": 593.169677734375,
      "completions/mean_terminated_length": 545.6199340820312,
      "completions/min_length": 5.0,
      "completions/min_terminated_length": 5.0,
      "epoch": 0.634402332361516,
      "grad_norm": 0.12663999199867249,
      "learning_rate": 1e-06,
      "loss": 0.014,
      "num_tokens": 39390916.0,
      "reward": 0.5580357313156128,
      "reward_std": 0.19655926525592804,
      "rewards/verify_math_reward/mean": 0.5580357313156128,
      "rewards/verify_math_reward/std": 0.49689781665802,
      "step": 68
    },
    {
      "clip_ratio/high_max": 0.0027452273498056456,
      "clip_ratio/high_mean": 0.0010903024740400724,
      "clip_ratio/low_mean": 0.0006977858447498875,
      "clip_ratio/low_min": 2.55620425377856e-05,
      "clip_ratio/region_mean": 0.0017880883387988433,
      "completions/clipped_ratio": 0.0267857142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3469.0,
      "completions/mean_length": 686.4498291015625,
      "completions/mean_terminated_length": 592.6089477539062,
      "completions/min_length": 133.0,
      "completions/min_terminated_length": 133.0,
      "epoch": 0.643731778425656,
      "grad_norm": 0.13761483132839203,
      "learning_rate": 1e-06,
      "loss": 0.0003,
      "num_tokens": 39998039.0,
      "reward": 0.5580357313156128,
      "reward_std": 0.2650946080684662,
      "rewards/verify_math_reward/mean": 0.5580357313156128,
      "rewards/verify_math_reward/std": 0.49689781665802,
      "step": 69
    },
    {
      "clip_ratio/high_max": 0.0020036659843754023,
      "clip_ratio/high_mean": 0.0008543112780898809,
      "clip_ratio/low_mean": 0.0006879072552692378,
      "clip_ratio/low_min": 3.6289940908318385e-05,
      "clip_ratio/region_mean": 0.0015422185533680022,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3380.0,
      "completions/mean_length": 638.3660888671875,
      "completions/mean_terminated_length": 579.4960327148438,
      "completions/min_length": 154.0,
      "completions/min_terminated_length": 154.0,
      "epoch": 0.6530612244897959,
      "grad_norm": 0.1263047307729721,
      "learning_rate": 1e-06,
      "loss": 0.0004,
      "num_tokens": 40598615.0,
      "reward": 0.543526828289032,
      "reward_std": 0.22634737193584442,
      "rewards/verify_math_reward/mean": 0.5435267686843872,
      "rewards/verify_math_reward/std": 0.49838003516197205,
      "step": 70
    },
    {
      "clip_ratio/high_max": 0.0018194154508819338,
      "clip_ratio/high_mean": 0.0006804778440709924,
      "clip_ratio/low_mean": 0.0007111599461495643,
      "clip_ratio/low_min": 6.525953904201742e-05,
      "clip_ratio/region_mean": 0.0013916377938585356,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3771.0,
      "completions/mean_length": 594.4263916015625,
      "completions/mean_terminated_length": 550.9039916992188,
      "completions/min_length": 145.0,
      "completions/min_terminated_length": 145.0,
      "epoch": 0.6623906705539359,
      "grad_norm": 0.12367860972881317,
      "learning_rate": 1e-06,
      "loss": 0.0003,
      "num_tokens": 41185749.0,
      "reward": 0.5491071939468384,
      "reward_std": 0.19320890307426453,
      "rewards/verify_math_reward/mean": 0.5491071343421936,
      "rewards/verify_math_reward/std": 0.49786055088043213,
      "step": 71
    },
    {
      "clip_ratio/high_max": 0.00217145054921275,
      "clip_ratio/high_mean": 0.0007920087136881193,
      "clip_ratio/low_mean": 0.0006000880857754964,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001392096768540796,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3970.0,
      "completions/mean_length": 614.833740234375,
      "completions/mean_terminated_length": 563.5820922851562,
      "completions/min_length": 132.0,
      "completions/min_terminated_length": 132.0,
      "epoch": 0.6717201166180758,
      "grad_norm": 0.1266845017671585,
      "learning_rate": 1e-06,
      "loss": 0.0054,
      "num_tokens": 41780832.0,
      "reward": 0.5267857313156128,
      "reward_std": 0.19486747682094574,
      "rewards/verify_math_reward/mean": 0.5267857313156128,
      "rewards/verify_math_reward/std": 0.4995608627796173,
      "step": 72
    },
    {
      "clip_ratio/high_max": 0.0018032259722531307,
      "clip_ratio/high_mean": 0.0006822762316005537,
      "clip_ratio/low_mean": 0.0005624001751129981,
      "clip_ratio/low_min": 3.8722011595382355e-05,
      "clip_ratio/region_mean": 0.0012446763867046684,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3378.0,
      "completions/mean_length": 649.0892944335938,
      "completions/mean_terminated_length": 570.3927001953125,
      "completions/min_length": 99.0,
      "completions/min_terminated_length": 99.0,
      "epoch": 0.6810495626822157,
      "grad_norm": 0.11287780106067657,
      "learning_rate": 1e-06,
      "loss": -0.0177,
      "num_tokens": 42377272.0,
      "reward": 0.5558035969734192,
      "reward_std": 0.1748398393392563,
      "rewards/verify_math_reward/mean": 0.5558035969734192,
      "rewards/verify_math_reward/std": 0.49715372920036316,
      "step": 73
    },
    {
      "clip_ratio/high_max": 0.0022270442932494916,
      "clip_ratio/high_mean": 0.000858560995766311,
      "clip_ratio/low_mean": 0.000592833250266267,
      "clip_ratio/low_min": 3.8368839341274e-05,
      "clip_ratio/region_mean": 0.001451394236937631,
      "completions/clipped_ratio": 0.024553571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3249.0,
      "completions/mean_length": 677.068115234375,
      "completions/mean_terminated_length": 591.0079956054688,
      "completions/min_length": 138.0,
      "completions/min_terminated_length": 138.0,
      "epoch": 0.6903790087463557,
      "grad_norm": 0.12197376787662506,
      "learning_rate": 1e-06,
      "loss": -0.0017,
      "num_tokens": 42983517.0,
      "reward": 0.5613839626312256,
      "reward_std": 0.20771123468875885,
      "rewards/verify_math_reward/mean": 0.5613839030265808,
      "rewards/verify_math_reward/std": 0.496494859457016,
      "step": 74
    },
    {
      "clip_ratio/high_max": 0.0016667769705236424,
      "clip_ratio/high_mean": 0.0006762091416021576,
      "clip_ratio/low_mean": 0.0005556680225708988,
      "clip_ratio/low_min": 3.057032972719753e-05,
      "clip_ratio/region_mean": 0.0012318771368882153,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4047.0,
      "completions/mean_length": 650.880615234375,
      "completions/mean_terminated_length": 596.1961669921875,
      "completions/min_length": 142.0,
      "completions/min_terminated_length": 142.0,
      "epoch": 0.6997084548104956,
      "grad_norm": 0.11935737729072571,
      "learning_rate": 1e-06,
      "loss": 0.0118,
      "num_tokens": 43594426.0,
      "reward": 0.5267857313156128,
      "reward_std": 0.18881477415561676,
      "rewards/verify_math_reward/mean": 0.5267857313156128,
      "rewards/verify_math_reward/std": 0.4995608627796173,
      "step": 75
    },
    {
      "clip_ratio/high_max": 0.0020895929555990733,
      "clip_ratio/high_mean": 0.0009295289673900697,
      "clip_ratio/low_mean": 0.0007338959840126336,
      "clip_ratio/low_min": 8.660398543725023e-05,
      "clip_ratio/region_mean": 0.0016634249477647245,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3594.0,
      "completions/mean_length": 680.5892944335938,
      "completions/mean_terminated_length": 602.6118774414062,
      "completions/min_length": 131.0,
      "completions/min_terminated_length": 131.0,
      "epoch": 0.7090379008746356,
      "grad_norm": 0.13902558386325836,
      "learning_rate": 1e-06,
      "loss": 0.0072,
      "num_tokens": 44215554.0,
      "reward": 0.5234375,
      "reward_std": 0.2520551383495331,
      "rewards/verify_math_reward/mean": 0.5234375,
      "rewards/verify_math_reward/std": 0.49972933530807495,
      "step": 76
    },
    {
      "clip_ratio/high_max": 0.0023232634193846025,
      "clip_ratio/high_mean": 0.0009882602316793054,
      "clip_ratio/low_mean": 0.0006640208102908218,
      "clip_ratio/low_min": 6.0430887060647365e-05,
      "clip_ratio/region_mean": 0.001652281036513159,
      "completions/clipped_ratio": 0.0279017857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2344.0,
      "completions/mean_length": 647.7913208007812,
      "completions/mean_terminated_length": 548.818603515625,
      "completions/min_length": 117.0,
      "completions/min_terminated_length": 117.0,
      "epoch": 0.7183673469387755,
      "grad_norm": 0.13488072156906128,
      "learning_rate": 1e-06,
      "loss": -0.0077,
      "num_tokens": 44785199.0,
      "reward": 0.5625,
      "reward_std": 0.2246202826499939,
      "rewards/verify_math_reward/mean": 0.5625,
      "rewards/verify_math_reward/std": 0.49635544419288635,
      "step": 77
    },
    {
      "clip_ratio/high_max": 0.002175509653170593,
      "clip_ratio/high_mean": 0.0008506925369147211,
      "clip_ratio/low_mean": 0.0005729131144107669,
      "clip_ratio/low_min": 1.6382700778194703e-05,
      "clip_ratio/region_mean": 0.001423605666786898,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3663.0,
      "completions/mean_length": 616.036865234375,
      "completions/mean_terminated_length": 556.78662109375,
      "completions/min_length": 162.0,
      "completions/min_terminated_length": 162.0,
      "epoch": 0.7276967930029155,
      "grad_norm": 0.1300690472126007,
      "learning_rate": 1e-06,
      "loss": 0.007,
      "num_tokens": 45361640.0,
      "reward": 0.5970982313156128,
      "reward_std": 0.1929485946893692,
      "rewards/verify_math_reward/mean": 0.5970982313156128,
      "rewards/verify_math_reward/std": 0.49075525999069214,
      "step": 78
    },
    {
      "clip_ratio/high_max": 0.002026535803452134,
      "clip_ratio/high_mean": 0.0008791490945441183,
      "clip_ratio/low_mean": 0.0005753004415964824,
      "clip_ratio/low_min": 1.3280918210512027e-05,
      "clip_ratio/region_mean": 0.0014544495243171696,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4077.0,
      "completions/mean_length": 592.9308471679688,
      "completions/mean_terminated_length": 533.2871704101562,
      "completions/min_length": 155.0,
      "completions/min_terminated_length": 155.0,
      "epoch": 0.7370262390670554,
      "grad_norm": 0.13578945398330688,
      "learning_rate": 1e-06,
      "loss": -0.0069,
      "num_tokens": 45920554.0,
      "reward": 0.5870535969734192,
      "reward_std": 0.21278755366802216,
      "rewards/verify_math_reward/mean": 0.5870535969734192,
      "rewards/verify_math_reward/std": 0.49263837933540344,
      "step": 79
    },
    {
      "clip_ratio/high_max": 0.001978152969968505,
      "clip_ratio/high_mean": 0.0009649189232732169,
      "clip_ratio/low_mean": 0.0005753234981966671,
      "clip_ratio/low_min": 2.4118670808093157e-05,
      "clip_ratio/region_mean": 0.0015402423960040323,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1958.0,
      "completions/mean_length": 571.3772583007812,
      "completions/mean_terminated_length": 519.48583984375,
      "completions/min_length": 125.0,
      "completions/min_terminated_length": 125.0,
      "epoch": 0.7463556851311953,
      "grad_norm": 0.14063102006912231,
      "learning_rate": 1e-06,
      "loss": 0.0075,
      "num_tokens": 46469308.0,
      "reward": 0.6127232313156128,
      "reward_std": 0.22371943295001984,
      "rewards/verify_math_reward/mean": 0.6127232313156128,
      "rewards/verify_math_reward/std": 0.4873998463153839,
      "step": 80
    },
    {
      "clip_ratio/high_max": 0.001734588273393456,
      "clip_ratio/high_mean": 0.0006513315038318979,
      "clip_ratio/low_mean": 0.000669722448947141,
      "clip_ratio/low_min": 2.6404732125229202e-05,
      "clip_ratio/region_mean": 0.0013210539618739858,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3800.0,
      "completions/mean_length": 658.5089721679688,
      "completions/mean_terminated_length": 592.0272827148438,
      "completions/min_length": 120.0,
      "completions/min_terminated_length": 120.0,
      "epoch": 0.7556851311953353,
      "grad_norm": 0.12003110349178314,
      "learning_rate": 1e-06,
      "loss": 0.006,
      "num_tokens": 47085516.0,
      "reward": 0.546875,
      "reward_std": 0.20770801603794098,
      "rewards/verify_math_reward/mean": 0.546875,
      "rewards/verify_math_reward/std": 0.4980759024620056,
      "step": 81
    },
    {
      "clip_ratio/high_max": 0.0021562257752520964,
      "clip_ratio/high_mean": 0.000907462617760757,
      "clip_ratio/low_mean": 0.0005941133849773905,
      "clip_ratio/low_min": 3.771137744479347e-05,
      "clip_ratio/region_mean": 0.0015015759781817906,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3714.0,
      "completions/mean_length": 661.5949096679688,
      "completions/mean_terminated_length": 583.1837768554688,
      "completions/min_length": 143.0,
      "completions/min_terminated_length": 143.0,
      "epoch": 0.7650145772594752,
      "grad_norm": 0.11883300542831421,
      "learning_rate": 1e-06,
      "loss": -0.0027,
      "num_tokens": 47690553.0,
      "reward": 0.5814732313156128,
      "reward_std": 0.1907336413860321,
      "rewards/verify_math_reward/mean": 0.5814732313156128,
      "rewards/verify_math_reward/std": 0.4935929775238037,
      "step": 82
    },
    {
      "clip_ratio/high_max": 0.0022070456616347656,
      "clip_ratio/high_mean": 0.0007432409220200498,
      "clip_ratio/low_mean": 0.0005474028639582684,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012906437877973076,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2399.0,
      "completions/mean_length": 606.53125,
      "completions/mean_terminated_length": 551.1428833007812,
      "completions/min_length": 122.0,
      "completions/min_terminated_length": 122.0,
      "epoch": 0.7743440233236152,
      "grad_norm": 0.1223452165722847,
      "learning_rate": 1e-06,
      "loss": 0.002,
      "num_tokens": 48261877.0,
      "reward": 0.640625,
      "reward_std": 0.19531255960464478,
      "rewards/verify_math_reward/mean": 0.640625,
      "rewards/verify_math_reward/std": 0.48008525371551514,
      "step": 83
    },
    {
      "clip_ratio/high_max": 0.0021039797793491744,
      "clip_ratio/high_mean": 0.0008076017456914997,
      "clip_ratio/low_mean": 0.0006205403151398059,
      "clip_ratio/low_min": 4.76356472063344e-05,
      "clip_ratio/region_mean": 0.0014281420735642314,
      "completions/clipped_ratio": 0.0267857142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2685.0,
      "completions/mean_length": 655.1707763671875,
      "completions/mean_terminated_length": 560.468994140625,
      "completions/min_length": 135.0,
      "completions/min_terminated_length": 135.0,
      "epoch": 0.7836734693877551,
      "grad_norm": 0.13013124465942383,
      "learning_rate": 1e-06,
      "loss": -0.0138,
      "num_tokens": 48846222.0,
      "reward": 0.4933035969734192,
      "reward_std": 0.21158543229103088,
      "rewards/verify_math_reward/mean": 0.4933035671710968,
      "rewards/verify_math_reward/std": 0.5002344250679016,
      "step": 84
    },
    {
      "clip_ratio/high_max": 0.0019734797679120675,
      "clip_ratio/high_mean": 0.0008554749056202127,
      "clip_ratio/low_mean": 0.0004858352594965254,
      "clip_ratio/low_min": 2.2116064428701065e-05,
      "clip_ratio/region_mean": 0.0013413101914920844,
      "completions/clipped_ratio": 0.0267857142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3247.0,
      "completions/mean_length": 645.2779541015625,
      "completions/mean_terminated_length": 550.3038940429688,
      "completions/min_length": 124.0,
      "completions/min_terminated_length": 124.0,
      "epoch": 0.793002915451895,
      "grad_norm": 0.12221498042345047,
      "learning_rate": 1e-06,
      "loss": -0.0039,
      "num_tokens": 49415815.0,
      "reward": 0.5658482313156128,
      "reward_std": 0.1856580227613449,
      "rewards/verify_math_reward/mean": 0.5658482313156128,
      "rewards/verify_math_reward/std": 0.49592188000679016,
      "step": 85
    },
    {
      "clip_ratio/high_max": 0.0016927571450651158,
      "clip_ratio/high_mean": 0.0006764325917174574,
      "clip_ratio/low_mean": 0.00042965691227436764,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011060894648835529,
      "completions/clipped_ratio": 0.0078125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2791.0,
      "completions/mean_length": 596.411865234375,
      "completions/mean_terminated_length": 568.8560180664062,
      "completions/min_length": 111.0,
      "completions/min_terminated_length": 111.0,
      "epoch": 0.8023323615160349,
      "grad_norm": 0.10956127196550369,
      "learning_rate": 1e-06,
      "loss": 0.0096,
      "num_tokens": 50008392.0,
      "reward": 0.6149553656578064,
      "reward_std": 0.16863587498664856,
      "rewards/verify_math_reward/mean": 0.6149553656578064,
      "rewards/verify_math_reward/std": 0.4868776500225067,
      "step": 86
    },
    {
      "clip_ratio/high_max": 0.002186433153838152,
      "clip_ratio/high_mean": 0.0009657641330704791,
      "clip_ratio/low_mean": 0.0005564167249758611,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0015221808716887608,
      "completions/clipped_ratio": 0.006696428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3365.0,
      "completions/mean_length": 554.3392944335938,
      "completions/mean_terminated_length": 530.4629516601562,
      "completions/min_length": 135.0,
      "completions/min_terminated_length": 135.0,
      "epoch": 0.8116618075801749,
      "grad_norm": 0.13649345934391022,
      "learning_rate": 1e-06,
      "loss": 0.0163,
      "num_tokens": 50561168.0,
      "reward": 0.6238839626312256,
      "reward_std": 0.19629782438278198,
      "rewards/verify_math_reward/mean": 0.6238839030265808,
      "rewards/verify_math_reward/std": 0.4846802353858948,
      "step": 87
    },
    {
      "clip_ratio/high_max": 0.00287763597589219,
      "clip_ratio/high_mean": 0.0009845189670159016,
      "clip_ratio/low_mean": 0.0007092171417752979,
      "clip_ratio/low_min": 5.837240314576775e-05,
      "clip_ratio/region_mean": 0.0016937361142481677,
      "completions/clipped_ratio": 0.0200892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4016.0,
      "completions/mean_length": 591.216552734375,
      "completions/mean_terminated_length": 519.364501953125,
      "completions/min_length": 106.0,
      "completions/min_terminated_length": 106.0,
      "epoch": 0.8209912536443149,
      "grad_norm": 0.1409979611635208,
      "learning_rate": 1e-06,
      "loss": -0.0017,
      "num_tokens": 51097242.0,
      "reward": 0.6283482313156128,
      "reward_std": 0.18588557839393616,
      "rewards/verify_math_reward/mean": 0.6283482313156128,
      "rewards/verify_math_reward/std": 0.4835159480571747,
      "step": 88
    },
    {
      "clip_ratio/high_max": 0.002211398081271909,
      "clip_ratio/high_mean": 0.0008753508591325954,
      "clip_ratio/low_mean": 0.0005992916412651539,
      "clip_ratio/low_min": 2.312245669600088e-05,
      "clip_ratio/region_mean": 0.0014746425076737069,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2010.0,
      "completions/mean_length": 615.833740234375,
      "completions/mean_terminated_length": 560.5929565429688,
      "completions/min_length": 113.0,
      "completions/min_terminated_length": 113.0,
      "epoch": 0.8303206997084548,
      "grad_norm": 0.13437823951244354,
      "learning_rate": 1e-06,
      "loss": -0.001,
      "num_tokens": 51693533.0,
      "reward": 0.5178571939468384,
      "reward_std": 0.1976105272769928,
      "rewards/verify_math_reward/mean": 0.5178571343421936,
      "rewards/verify_math_reward/std": 0.4999600946903229,
      "step": 89
    },
    {
      "clip_ratio/high_max": 0.0019194139240426011,
      "clip_ratio/high_mean": 0.0007599462496727938,
      "clip_ratio/low_mean": 0.0006348003917082679,
      "clip_ratio/low_min": 2.6421475922688842e-05,
      "clip_ratio/region_mean": 0.001394746661389945,
      "completions/clipped_ratio": 0.0200892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3274.0,
      "completions/mean_length": 638.2377319335938,
      "completions/mean_terminated_length": 567.3496704101562,
      "completions/min_length": 142.0,
      "completions/min_terminated_length": 142.0,
      "epoch": 0.8396501457725948,
      "grad_norm": 0.13324740529060364,
      "learning_rate": 1e-06,
      "loss": 0.0081,
      "num_tokens": 52279218.0,
      "reward": 0.551339328289032,
      "reward_std": 0.20012825727462769,
      "rewards/verify_math_reward/mean": 0.5513392686843872,
      "rewards/verify_math_reward/std": 0.4976350665092468,
      "step": 90
    },
    {
      "clip_ratio/high_max": 0.0021048882190370932,
      "clip_ratio/high_mean": 0.000845737680720049,
      "clip_ratio/low_mean": 0.0006471639298979426,
      "clip_ratio/low_min": 2.8259139980946202e-05,
      "clip_ratio/region_mean": 0.0014929016251699068,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3704.0,
      "completions/mean_length": 615.5725708007812,
      "completions/mean_terminated_length": 548.260498046875,
      "completions/min_length": 140.0,
      "completions/min_terminated_length": 140.0,
      "epoch": 0.8489795918367347,
      "grad_norm": 0.14451470971107483,
      "learning_rate": 1e-06,
      "loss": 0.0146,
      "num_tokens": 52849099.0,
      "reward": 0.59375,
      "reward_std": 0.19444282352924347,
      "rewards/verify_math_reward/mean": 0.59375,
      "rewards/verify_math_reward/std": 0.4914066195487976,
      "step": 91
    },
    {
      "clip_ratio/high_max": 0.001944730418472318,
      "clip_ratio/high_mean": 0.000814591403468512,
      "clip_ratio/low_mean": 0.00047515574715362163,
      "clip_ratio/low_min": 1.359582347504329e-05,
      "clip_ratio/region_mean": 0.0012897471751784906,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3802.0,
      "completions/mean_length": 650.4029541015625,
      "completions/mean_terminated_length": 571.7362670898438,
      "completions/min_length": 108.0,
      "completions/min_terminated_length": 108.0,
      "epoch": 0.8583090379008746,
      "grad_norm": 0.12525710463523865,
      "learning_rate": 1e-06,
      "loss": -0.0175,
      "num_tokens": 53437020.0,
      "reward": 0.6238839626312256,
      "reward_std": 0.18904298543930054,
      "rewards/verify_math_reward/mean": 0.6238839030265808,
      "rewards/verify_math_reward/std": 0.48468026518821716,
      "step": 92
    },
    {
      "clip_ratio/high_max": 0.002262782698380761,
      "clip_ratio/high_mean": 0.001004584528345731,
      "clip_ratio/low_mean": 0.0007672012488910696,
      "clip_ratio/low_min": 5.998021788400365e-05,
      "clip_ratio/region_mean": 0.0017717857917887159,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3590.0,
      "completions/mean_length": 597.5491333007812,
      "completions/mean_terminated_length": 546.0430297851562,
      "completions/min_length": 141.0,
      "completions/min_terminated_length": 141.0,
      "epoch": 0.8676384839650145,
      "grad_norm": 0.1381644457578659,
      "learning_rate": 1e-06,
      "loss": 0.0052,
      "num_tokens": 54016712.0,
      "reward": 0.5837053656578064,
      "reward_std": 0.25426867604255676,
      "rewards/verify_math_reward/mean": 0.5837053656578064,
      "rewards/verify_math_reward/std": 0.49321892857551575,
      "step": 93
    },
    {
      "clip_ratio/high_max": 0.0018785912179737352,
      "clip_ratio/high_mean": 0.0008331616409122944,
      "clip_ratio/low_mean": 0.0005843826238560723,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001417544270225335,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2449.0,
      "completions/mean_length": 607.6015625,
      "completions/mean_terminated_length": 556.2434692382812,
      "completions/min_length": 120.0,
      "completions/min_terminated_length": 120.0,
      "epoch": 0.8769679300291545,
      "grad_norm": 0.12567032873630524,
      "learning_rate": 1e-06,
      "loss": 0.0006,
      "num_tokens": 54584627.0,
      "reward": 0.5881696939468384,
      "reward_std": 0.1946403682231903,
      "rewards/verify_math_reward/mean": 0.5881696343421936,
      "rewards/verify_math_reward/std": 0.4924395978450775,
      "step": 94
    },
    {
      "clip_ratio/high_max": 0.0017815181017795112,
      "clip_ratio/high_mean": 0.0007609570038766833,
      "clip_ratio/low_mean": 0.0007353126402449561,
      "clip_ratio/low_min": 5.828756729897577e-05,
      "clip_ratio/region_mean": 0.0014962696586735547,
      "completions/clipped_ratio": 0.0279017857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3783.0,
      "completions/mean_length": 690.1484985351562,
      "completions/mean_terminated_length": 592.3914794921875,
      "completions/min_length": 126.0,
      "completions/min_terminated_length": 126.0,
      "epoch": 0.8862973760932945,
      "grad_norm": 0.1269090622663498,
      "learning_rate": 1e-06,
      "loss": -0.0018,
      "num_tokens": 55188096.0,
      "reward": 0.527901828289032,
      "reward_std": 0.22631528973579407,
      "rewards/verify_math_reward/mean": 0.5279017686843872,
      "rewards/verify_math_reward/std": 0.49949970841407776,
      "step": 95
    },
    {
      "clip_ratio/high_max": 0.0023371442439383827,
      "clip_ratio/high_mean": 0.0010629585303831846,
      "clip_ratio/low_mean": 0.0005052440212693909,
      "clip_ratio/low_min": 3.7416256418509874e-05,
      "clip_ratio/region_mean": 0.0015682025332353078,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3382.0,
      "completions/mean_length": 640.2109375,
      "completions/mean_terminated_length": 573.3754272460938,
      "completions/min_length": 134.0,
      "completions/min_terminated_length": 134.0,
      "epoch": 0.8956268221574344,
      "grad_norm": 0.12552319467067719,
      "learning_rate": 1e-06,
      "loss": 0.0034,
      "num_tokens": 55775549.0,
      "reward": 0.5959821939468384,
      "reward_std": 0.21726585924625397,
      "rewards/verify_math_reward/mean": 0.5959821343421936,
      "rewards/verify_math_reward/std": 0.490975022315979,
      "step": 96
    },
    {
      "clip_ratio/high_max": 0.0019180375420546625,
      "clip_ratio/high_mean": 0.0007718974793533562,
      "clip_ratio/low_mean": 0.0006347041562548839,
      "clip_ratio/low_min": 2.688036965992069e-05,
      "clip_ratio/region_mean": 0.0014066016374272294,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2894.0,
      "completions/mean_length": 617.6864013671875,
      "completions/mean_terminated_length": 566.4767456054688,
      "completions/min_length": 169.0,
      "completions/min_terminated_length": 169.0,
      "epoch": 0.9049562682215744,
      "grad_norm": 0.1271030753850937,
      "learning_rate": 1e-06,
      "loss": 0.0163,
      "num_tokens": 56367044.0,
      "reward": 0.5580357313156128,
      "reward_std": 0.19974736869335175,
      "rewards/verify_math_reward/mean": 0.5580357313156128,
      "rewards/verify_math_reward/std": 0.49689778685569763,
      "step": 97
    },
    {
      "clip_ratio/high_max": 0.0018232932598039042,
      "clip_ratio/high_mean": 0.0006611393855564529,
      "clip_ratio/low_mean": 0.0004289146963856183,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010900540910370182,
      "completions/clipped_ratio": 0.025669642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2921.0,
      "completions/mean_length": 710.0848388671875,
      "completions/mean_terminated_length": 620.8797607421875,
      "completions/min_length": 150.0,
      "completions/min_terminated_length": 150.0,
      "epoch": 0.9142857142857143,
      "grad_norm": 0.10877560079097748,
      "learning_rate": 1e-06,
      "loss": -0.0082,
      "num_tokens": 56995000.0,
      "reward": 0.5223214626312256,
      "reward_std": 0.18629701435565948,
      "rewards/verify_math_reward/mean": 0.5223214030265808,
      "rewards/verify_math_reward/std": 0.49978047609329224,
      "step": 98
    },
    {
      "clip_ratio/high_max": 0.0019879872706951573,
      "clip_ratio/high_mean": 0.0008827278543321881,
      "clip_ratio/low_mean": 0.0005190918327571126,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0014018196816323325,
      "completions/clipped_ratio": 0.029017857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2959.0,
      "completions/mean_length": 676.8449096679688,
      "completions/mean_terminated_length": 574.6632080078125,
      "completions/min_length": 140.0,
      "completions/min_terminated_length": 140.0,
      "epoch": 0.9236151603498542,
      "grad_norm": 0.12853676080703735,
      "learning_rate": 1e-06,
      "loss": 0.0135,
      "num_tokens": 57594173.0,
      "reward": 0.494419664144516,
      "reward_std": 0.20726223289966583,
      "rewards/verify_math_reward/mean": 0.4944196343421936,
      "rewards/verify_math_reward/std": 0.5002480745315552,
      "step": 99
    },
    {
      "clip_ratio/high_max": 0.0018739871666184627,
      "clip_ratio/high_mean": 0.0007508181433877326,
      "clip_ratio/low_mean": 0.0004454578192962799,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011962759890593588,
      "completions/clipped_ratio": 0.0200892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3519.0,
      "completions/mean_length": 662.8683471679688,
      "completions/mean_terminated_length": 592.4852294921875,
      "completions/min_length": 135.0,
      "completions/min_terminated_length": 135.0,
      "epoch": 0.9329446064139941,
      "grad_norm": 0.11898582428693771,
      "learning_rate": 1e-06,
      "loss": -0.003,
      "num_tokens": 58204639.0,
      "reward": 0.578125,
      "reward_std": 0.17757542431354523,
      "rewards/verify_math_reward/mean": 0.578125,
      "rewards/verify_math_reward/std": 0.4941346049308777,
      "step": 100
    },
    {
      "clip_ratio/high_max": 0.0021525089468923397,
      "clip_ratio/high_mean": 0.0008797519258223474,
      "clip_ratio/low_mean": 0.0006131259842732106,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0014928779273759574,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3869.0,
      "completions/mean_length": 637.5346069335938,
      "completions/mean_terminated_length": 574.6533813476562,
      "completions/min_length": 124.0,
      "completions/min_terminated_length": 124.0,
      "epoch": 0.9422740524781341,
      "grad_norm": 0.13018183410167694,
      "learning_rate": 1e-06,
      "loss": 0.0068,
      "num_tokens": 58795854.0,
      "reward": 0.5691964626312256,
      "reward_std": 0.20992687344551086,
      "rewards/verify_math_reward/mean": 0.5691964030265808,
      "rewards/verify_math_reward/std": 0.4954652488231659,
      "step": 101
    },
    {
      "clip_ratio/high_max": 0.0020191000949125737,
      "clip_ratio/high_mean": 0.0007565638316009426,
      "clip_ratio/low_mean": 0.0006582891796824697,
      "clip_ratio/low_min": 1.2278978829272091e-05,
      "clip_ratio/region_mean": 0.0014148530244710855,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2296.0,
      "completions/mean_length": 621.388427734375,
      "completions/mean_terminated_length": 566.23583984375,
      "completions/min_length": 115.0,
      "completions/min_terminated_length": 115.0,
      "epoch": 0.9516034985422741,
      "grad_norm": 0.12451858818531036,
      "learning_rate": 1e-06,
      "loss": 0.001,
      "num_tokens": 59395186.0,
      "reward": 0.559151828289032,
      "reward_std": 0.19505223631858826,
      "rewards/verify_math_reward/mean": 0.5591517686843872,
      "rewards/verify_math_reward/std": 0.496766060590744,
      "step": 102
    },
    {
      "clip_ratio/high_max": 0.0018583473174658138,
      "clip_ratio/high_mean": 0.000765695990594395,
      "clip_ratio/low_mean": 0.0005216967219894286,
      "clip_ratio/low_min": 3.714982904057251e-05,
      "clip_ratio/region_mean": 0.0012873927153123077,
      "completions/clipped_ratio": 0.0200892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3946.0,
      "completions/mean_length": 711.3203735351562,
      "completions/mean_terminated_length": 641.9305419921875,
      "completions/min_length": 149.0,
      "completions/min_terminated_length": 149.0,
      "epoch": 0.960932944606414,
      "grad_norm": 0.11913493275642395,
      "learning_rate": 1e-06,
      "loss": 0.0085,
      "num_tokens": 60046265.0,
      "reward": 0.5245535969734192,
      "reward_std": 0.18960891664028168,
      "rewards/verify_math_reward/mean": 0.5245535969734192,
      "rewards/verify_math_reward/std": 0.4996756613254547,
      "step": 103
    },
    {
      "clip_ratio/high_max": 0.0019098481170658488,
      "clip_ratio/high_mean": 0.0007057281472953036,
      "clip_ratio/low_mean": 0.0007603598660352873,
      "clip_ratio/low_min": 6.83317712173448e-05,
      "clip_ratio/region_mean": 0.0014660880333394744,
      "completions/clipped_ratio": 0.025669642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2183.0,
      "completions/mean_length": 643.609375,
      "completions/mean_terminated_length": 552.6529541015625,
      "completions/min_length": 83.0,
      "completions/min_terminated_length": 83.0,
      "epoch": 0.970262390670554,
      "grad_norm": 0.13780678808689117,
      "learning_rate": 1e-06,
      "loss": 0.0078,
      "num_tokens": 60621675.0,
      "reward": 0.5736607313156128,
      "reward_std": 0.20978209376335144,
      "rewards/verify_math_reward/mean": 0.5736607313156128,
      "rewards/verify_math_reward/std": 0.4948205351829529,
      "step": 104
    },
    {
      "clip_ratio/high_max": 0.0018821418343577534,
      "clip_ratio/high_mean": 0.0007210173389466945,
      "clip_ratio/low_mean": 0.0006037004368408816,
      "clip_ratio/low_min": 1.3891975868318696e-05,
      "clip_ratio/region_mean": 0.0013247177630546503,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3680.0,
      "completions/mean_length": 582.6183471679688,
      "completions/mean_terminated_length": 526.850341796875,
      "completions/min_length": 43.0,
      "completions/min_terminated_length": 43.0,
      "epoch": 0.9795918367346939,
      "grad_norm": 0.13550148904323578,
      "learning_rate": 1e-06,
      "loss": -0.0088,
      "num_tokens": 61174253.0,
      "reward": 0.5368303656578064,
      "reward_std": 0.19343574345111847,
      "rewards/verify_math_reward/mean": 0.5368303656578064,
      "rewards/verify_math_reward/std": 0.49892017245292664,
      "step": 105
    },
    {
      "clip_ratio/high_max": 0.0018749735718301963,
      "clip_ratio/high_mean": 0.0007686431654292392,
      "clip_ratio/low_mean": 0.0005044213003202458,
      "clip_ratio/low_min": 1.252254060091218e-05,
      "clip_ratio/region_mean": 0.0012730644848488737,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2133.0,
      "completions/mean_length": 607.5402221679688,
      "completions/mean_terminated_length": 556.1812133789062,
      "completions/min_length": 106.0,
      "completions/min_terminated_length": 106.0,
      "epoch": 0.9889212827988338,
      "grad_norm": 0.11981673538684845,
      "learning_rate": 1e-06,
      "loss": 0.0074,
      "num_tokens": 61747553.0,
      "reward": 0.5691964626312256,
      "reward_std": 0.18002675473690033,
      "rewards/verify_math_reward/mean": 0.5691964030265808,
      "rewards/verify_math_reward/std": 0.4954652488231659,
      "step": 106
    },
    {
      "clip_ratio/high_max": 0.002335174933250528,
      "clip_ratio/high_mean": 0.0008437378983217059,
      "clip_ratio/low_mean": 0.0006078883779991884,
      "clip_ratio/low_min": 1.4035481399332639e-05,
      "clip_ratio/region_mean": 0.0014516263036057353,
      "completions/clipped_ratio": 0.025568181818181768,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2461.0,
      "completions/mean_length": 594.6704711914062,
      "completions/mean_terminated_length": 502.7988586425781,
      "completions/min_length": 150.0,
      "completions/min_terminated_length": 150.0,
      "epoch": 0.9982507288629737,
      "grad_norm": 0.13220836222171783,
      "learning_rate": 1e-06,
      "loss": 0.0099,
      "num_tokens": 62317871.0,
      "reward": 0.5535714626312256,
      "reward_std": 0.19820643961429596,
      "rewards/verify_math_reward/mean": 0.5535714030265808,
      "rewards/verify_math_reward/std": 0.4973994791507721,
      "step": 107
    },
    {
      "clip_ratio/high_max": 0.0022387665885617025,
      "clip_ratio/high_mean": 0.000920310674700886,
      "clip_ratio/low_mean": 0.0006362590484059183,
      "clip_ratio/low_min": 4.261960566509515e-05,
      "clip_ratio/region_mean": 0.001556569717649836,
      "completions/clipped_ratio": 0.024553571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3271.0,
      "completions/mean_length": 645.7924194335938,
      "completions/mean_terminated_length": 558.945068359375,
      "completions/min_length": 166.0,
      "completions/min_terminated_length": 166.0,
      "epoch": 1.00932944606414,
      "grad_norm": 0.1348712146282196,
      "learning_rate": 1e-06,
      "loss": -0.0101,
      "num_tokens": 62896813.0,
      "reward": 0.5714285969734192,
      "reward_std": 0.22687985002994537,
      "rewards/verify_math_reward/mean": 0.5714285969734192,
      "rewards/verify_math_reward/std": 0.49514806270599365,
      "step": 108
    },
    {
      "clip_ratio/high_max": 0.0021707252963096835,
      "clip_ratio/high_mean": 0.0009288831206504256,
      "clip_ratio/low_mean": 0.000522829201145214,
      "clip_ratio/low_min": 2.4734090402489528e-05,
      "clip_ratio/region_mean": 0.0014517123272526078,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3794.0,
      "completions/mean_length": 619.0145263671875,
      "completions/mean_terminated_length": 551.76904296875,
      "completions/min_length": 149.0,
      "completions/min_terminated_length": 149.0,
      "epoch": 1.01865889212828,
      "grad_norm": 0.1271272599697113,
      "learning_rate": 1e-06,
      "loss": 0.0121,
      "num_tokens": 63478266.0,
      "reward": 0.5892857313156128,
      "reward_std": 0.19823963940143585,
      "rewards/verify_math_reward/mean": 0.5892857313156128,
      "rewards/verify_math_reward/std": 0.49223825335502625,
      "step": 109
    },
    {
      "clip_ratio/high_max": 0.0018047954326902982,
      "clip_ratio/high_mean": 0.0007129164569050772,
      "clip_ratio/low_mean": 0.00042467317234695656,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011375896174286027,
      "completions/clipped_ratio": 0.0345982142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3977.0,
      "completions/mean_length": 702.435302734375,
      "completions/mean_terminated_length": 580.816162109375,
      "completions/min_length": 156.0,
      "completions/min_terminated_length": 156.0,
      "epoch": 1.0279883381924197,
      "grad_norm": 0.12314493954181671,
      "learning_rate": 1e-06,
      "loss": 0.0046,
      "num_tokens": 64073672.0,
      "reward": 0.5212053656578064,
      "reward_std": 0.17389945685863495,
      "rewards/verify_math_reward/mean": 0.5212053656578064,
      "rewards/verify_math_reward/std": 0.49982914328575134,
      "step": 110
    },
    {
      "clip_ratio/high_max": 0.0017495191896159668,
      "clip_ratio/high_mean": 0.0006691754142593709,
      "clip_ratio/low_mean": 0.0004947921643179143,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001163967597676674,
      "completions/clipped_ratio": 0.025669642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2519.0,
      "completions/mean_length": 647.2176513671875,
      "completions/mean_terminated_length": 556.3562622070312,
      "completions/min_length": 96.0,
      "completions/min_terminated_length": 96.0,
      "epoch": 1.0373177842565597,
      "grad_norm": 0.11964970827102661,
      "learning_rate": 1e-06,
      "loss": -0.0029,
      "num_tokens": 64663747.0,
      "reward": 0.5267857313156128,
      "reward_std": 0.16743306815624237,
      "rewards/verify_math_reward/mean": 0.5267857313156128,
      "rewards/verify_math_reward/std": 0.4995608627796173,
      "step": 111
    },
    {
      "clip_ratio/high_max": 0.001938575558597222,
      "clip_ratio/high_mean": 0.0008848465495248092,
      "clip_ratio/low_mean": 0.0007573944767500507,
      "clip_ratio/low_min": 5.194687491894001e-05,
      "clip_ratio/region_mean": 0.0016422410117229447,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3994.0,
      "completions/mean_length": 610.716552734375,
      "completions/mean_terminated_length": 543.310546875,
      "completions/min_length": 91.0,
      "completions/min_terminated_length": 91.0,
      "epoch": 1.0466472303206997,
      "grad_norm": 0.13910211622714996,
      "learning_rate": 1e-06,
      "loss": 0.0075,
      "num_tokens": 65235893.0,
      "reward": 0.5892857313156128,
      "reward_std": 0.21459950506687164,
      "rewards/verify_math_reward/mean": 0.5892857313156128,
      "rewards/verify_math_reward/std": 0.49223825335502625,
      "step": 112
    },
    {
      "clip_ratio/high_max": 0.0021278136191540398,
      "clip_ratio/high_mean": 0.0008376358291570796,
      "clip_ratio/low_mean": 0.0006558591594512109,
      "clip_ratio/low_min": 4.088420246262103e-05,
      "clip_ratio/region_mean": 0.0014934949977032375,
      "completions/clipped_ratio": 0.0200892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2864.0,
      "completions/mean_length": 625.779052734375,
      "completions/mean_terminated_length": 554.6355590820312,
      "completions/min_length": 181.0,
      "completions/min_terminated_length": 181.0,
      "epoch": 1.0559766763848397,
      "grad_norm": 0.1188901960849762,
      "learning_rate": 1e-06,
      "loss": -0.0093,
      "num_tokens": 65813495.0,
      "reward": 0.5714285969734192,
      "reward_std": 0.1983586996793747,
      "rewards/verify_math_reward/mean": 0.5714285969734192,
      "rewards/verify_math_reward/std": 0.49514803290367126,
      "step": 113
    },
    {
      "clip_ratio/high_max": 0.0018923180177807808,
      "clip_ratio/high_mean": 0.0007647187485417817,
      "clip_ratio/low_mean": 0.0006024704180163098,
      "clip_ratio/low_min": 1.3501836292562075e-05,
      "clip_ratio/region_mean": 0.001367189186566975,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2139.0,
      "completions/mean_length": 617.6842041015625,
      "completions/mean_terminated_length": 550.4129638671875,
      "completions/min_length": 143.0,
      "completions/min_terminated_length": 143.0,
      "epoch": 1.0653061224489795,
      "grad_norm": 0.12145751714706421,
      "learning_rate": 1e-06,
      "loss": 0.0067,
      "num_tokens": 66408756.0,
      "reward": 0.5223214626312256,
      "reward_std": 0.1786727011203766,
      "rewards/verify_math_reward/mean": 0.5223214030265808,
      "rewards/verify_math_reward/std": 0.49978047609329224,
      "step": 114
    },
    {
      "clip_ratio/high_max": 0.0016835417263791896,
      "clip_ratio/high_mean": 0.0006536952914757421,
      "clip_ratio/low_mean": 0.0004384264557302231,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010921217544819228,
      "completions/clipped_ratio": 0.021205357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4068.0,
      "completions/mean_length": 629.193115234375,
      "completions/mean_terminated_length": 554.0855102539062,
      "completions/min_length": 147.0,
      "completions/min_terminated_length": 147.0,
      "epoch": 1.0746355685131195,
      "grad_norm": 0.12573300302028656,
      "learning_rate": 1e-06,
      "loss": 0.0053,
      "num_tokens": 66982601.0,
      "reward": 0.5580357313156128,
      "reward_std": 0.17559054493904114,
      "rewards/verify_math_reward/mean": 0.5580357313156128,
      "rewards/verify_math_reward/std": 0.49689778685569763,
      "step": 115
    },
    {
      "clip_ratio/high_max": 0.0017454099142923951,
      "clip_ratio/high_mean": 0.0007543304909631843,
      "clip_ratio/low_mean": 0.0005247638746368466,
      "clip_ratio/low_min": 1.396336028847145e-05,
      "clip_ratio/region_mean": 0.0012790943510481156,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2859.0,
      "completions/mean_length": 612.419677734375,
      "completions/mean_terminated_length": 569.1209106445312,
      "completions/min_length": 79.0,
      "completions/min_terminated_length": 79.0,
      "epoch": 1.0839650145772595,
      "grad_norm": 0.12264445424079895,
      "learning_rate": 1e-06,
      "loss": -0.0031,
      "num_tokens": 67574113.0,
      "reward": 0.6350446939468384,
      "reward_std": 0.19238336384296417,
      "rewards/verify_math_reward/mean": 0.6350446343421936,
      "rewards/verify_math_reward/std": 0.481686532497406,
      "step": 116
    },
    {
      "clip_ratio/high_max": 0.001786453249223996,
      "clip_ratio/high_mean": 0.0007486339327442693,
      "clip_ratio/low_mean": 0.000482122797620832,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012307567230891436,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3431.0,
      "completions/mean_length": 630.3046875,
      "completions/mean_terminated_length": 551.17919921875,
      "completions/min_length": 158.0,
      "completions/min_terminated_length": 158.0,
      "epoch": 1.0932944606413995,
      "grad_norm": 0.12435305863618851,
      "learning_rate": 1e-06,
      "loss": 0.0009,
      "num_tokens": 68142066.0,
      "reward": 0.5926339626312256,
      "reward_std": 0.2005397230386734,
      "rewards/verify_math_reward/mean": 0.5926339030265808,
      "rewards/verify_math_reward/std": 0.49161845445632935,
      "step": 117
    },
    {
      "clip_ratio/high_max": 0.0017564252302690875,
      "clip_ratio/high_mean": 0.0006838038789283019,
      "clip_ratio/low_mean": 0.00044850763742942945,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001132311484980164,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2756.0,
      "completions/mean_length": 617.4921875,
      "completions/mean_terminated_length": 554.24658203125,
      "completions/min_length": 148.0,
      "completions/min_terminated_length": 148.0,
      "epoch": 1.1026239067055394,
      "grad_norm": 0.1242511197924614,
      "learning_rate": 1e-06,
      "loss": 0.0001,
      "num_tokens": 68704155.0,
      "reward": 0.6328125,
      "reward_std": 0.16747723519802094,
      "rewards/verify_math_reward/mean": 0.6328125,
      "rewards/verify_math_reward/std": 0.48230743408203125,
      "step": 118
    },
    {
      "clip_ratio/high_max": 0.0017772580322343856,
      "clip_ratio/high_mean": 0.0007229723705677316,
      "clip_ratio/low_mean": 0.0005288728652885766,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012518452131189406,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3401.0,
      "completions/mean_length": 586.9732666015625,
      "completions/mean_terminated_length": 523.1727294921875,
      "completions/min_length": 117.0,
      "completions/min_terminated_length": 117.0,
      "epoch": 1.1119533527696792,
      "grad_norm": 0.1337558925151825,
      "learning_rate": 1e-06,
      "loss": 0.0021,
      "num_tokens": 69238171.0,
      "reward": 0.6428571939468384,
      "reward_std": 0.1710776388645172,
      "rewards/verify_math_reward/mean": 0.6428571343421936,
      "rewards/verify_math_reward/std": 0.4794250428676605,
      "step": 119
    },
    {
      "clip_ratio/high_max": 0.0015808707248652354,
      "clip_ratio/high_mean": 0.0006234206184672075,
      "clip_ratio/low_mean": 0.0005845670289090776,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012079876032657921,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3930.0,
      "completions/mean_length": 643.427490234375,
      "completions/mean_terminated_length": 580.6533813476562,
      "completions/min_length": 151.0,
      "completions/min_terminated_length": 151.0,
      "epoch": 1.1212827988338192,
      "grad_norm": 0.12093667685985565,
      "learning_rate": 1e-06,
      "loss": 0.0084,
      "num_tokens": 69845786.0,
      "reward": 0.5703125,
      "reward_std": 0.19471341371536255,
      "rewards/verify_math_reward/mean": 0.5703125,
      "rewards/verify_math_reward/std": 0.49530795216560364,
      "step": 120
    },
    {
      "clip_ratio/high_max": 0.0017549932235851884,
      "clip_ratio/high_mean": 0.0006848561370134121,
      "clip_ratio/low_mean": 0.0006726086385242525,
      "clip_ratio/low_min": 1.0254307198920287e-05,
      "clip_ratio/region_mean": 0.001357464770990191,
      "completions/clipped_ratio": 0.029017857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3980.0,
      "completions/mean_length": 686.0089721679688,
      "completions/mean_terminated_length": 584.1011352539062,
      "completions/min_length": 118.0,
      "completions/min_terminated_length": 118.0,
      "epoch": 1.1306122448979592,
      "grad_norm": 0.12279097735881805,
      "learning_rate": 1e-06,
      "loss": 0.006,
      "num_tokens": 70451154.0,
      "reward": 0.5223214626312256,
      "reward_std": 0.17611117660999298,
      "rewards/verify_math_reward/mean": 0.5223214030265808,
      "rewards/verify_math_reward/std": 0.49978047609329224,
      "step": 121
    },
    {
      "clip_ratio/high_max": 0.0018810498586390167,
      "clip_ratio/high_mean": 0.0008115886794257676,
      "clip_ratio/low_mean": 0.00046586242297053104,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012774511160387192,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2469.0,
      "completions/mean_length": 605.732177734375,
      "completions/mean_terminated_length": 546.3065185546875,
      "completions/min_length": 90.0,
      "completions/min_terminated_length": 90.0,
      "epoch": 1.1399416909620992,
      "grad_norm": 0.12779517471790314,
      "learning_rate": 1e-06,
      "loss": 0.0073,
      "num_tokens": 71011018.0,
      "reward": 0.6160714626312256,
      "reward_std": 0.1821650117635727,
      "rewards/verify_math_reward/mean": 0.6160714030265808,
      "rewards/verify_math_reward/std": 0.486612468957901,
      "step": 122
    },
    {
      "clip_ratio/high_max": 0.0017122595381806605,
      "clip_ratio/high_mean": 0.0006494648532680003,
      "clip_ratio/low_mean": 0.0006846807918918785,
      "clip_ratio/low_min": 2.707682506297715e-05,
      "clip_ratio/region_mean": 0.0013341456651687622,
      "completions/clipped_ratio": 0.0200892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3643.0,
      "completions/mean_length": 658.9241333007812,
      "completions/mean_terminated_length": 588.4601440429688,
      "completions/min_length": 155.0,
      "completions/min_terminated_length": 155.0,
      "epoch": 1.149271137026239,
      "grad_norm": 0.13286446034908295,
      "learning_rate": 1e-06,
      "loss": 0.0029,
      "num_tokens": 71617774.0,
      "reward": 0.5814732313156128,
      "reward_std": 0.21203728020191193,
      "rewards/verify_math_reward/mean": 0.5814732313156128,
      "rewards/verify_math_reward/std": 0.4935929775238037,
      "step": 123
    },
    {
      "clip_ratio/high_max": 0.001713228179141879,
      "clip_ratio/high_mean": 0.0007118931907825754,
      "clip_ratio/low_mean": 0.0006066005862521706,
      "clip_ratio/low_min": 2.9062808607704937e-05,
      "clip_ratio/region_mean": 0.001318493752478389,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2371.0,
      "completions/mean_length": 598.6339721679688,
      "completions/mean_terminated_length": 547.143798828125,
      "completions/min_length": 105.0,
      "completions/min_terminated_length": 105.0,
      "epoch": 1.158600583090379,
      "grad_norm": 0.14108534157276154,
      "learning_rate": 1e-06,
      "loss": 0.0061,
      "num_tokens": 72190414.0,
      "reward": 0.6049107313156128,
      "reward_std": 0.19196967780590057,
      "rewards/verify_math_reward/mean": 0.6049107313156128,
      "rewards/verify_math_reward/std": 0.48914292454719543,
      "step": 124
    },
    {
      "clip_ratio/high_max": 0.0012990183022338897,
      "clip_ratio/high_mean": 0.00048056226751214126,
      "clip_ratio/low_mean": 0.0003377165571691876,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008182788296835497,
      "completions/clipped_ratio": 0.0379464285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3494.0,
      "completions/mean_length": 720.2489013671875,
      "completions/mean_terminated_length": 587.0985717773438,
      "completions/min_length": 125.0,
      "completions/min_terminated_length": 125.0,
      "epoch": 1.167930029154519,
      "grad_norm": 0.10980133712291718,
      "learning_rate": 1e-06,
      "loss": -0.0089,
      "num_tokens": 72786837.0,
      "reward": 0.5033482313156128,
      "reward_std": 0.13801473379135132,
      "rewards/verify_math_reward/mean": 0.5033482313156128,
      "rewards/verify_math_reward/std": 0.5002680420875549,
      "step": 125
    },
    {
      "clip_ratio/high_max": 0.0019630566603154875,
      "clip_ratio/high_mean": 0.0007658281065232586,
      "clip_ratio/low_mean": 0.0004464349394766032,
      "clip_ratio/low_min": 1.295605306950165e-05,
      "clip_ratio/region_mean": 0.0012122630214435048,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2776.0,
      "completions/mean_length": 604.15625,
      "completions/mean_terminated_length": 544.7037963867188,
      "completions/min_length": 139.0,
      "completions/min_terminated_length": 139.0,
      "epoch": 1.177259475218659,
      "grad_norm": 0.12922635674476624,
      "learning_rate": 1e-06,
      "loss": 0.002,
      "num_tokens": 73355201.0,
      "reward": 0.6127232313156128,
      "reward_std": 0.17697879672050476,
      "rewards/verify_math_reward/mean": 0.6127232313156128,
      "rewards/verify_math_reward/std": 0.4873998463153839,
      "step": 126
    },
    {
      "clip_ratio/high_max": 0.0018201843595306855,
      "clip_ratio/high_mean": 0.0006656213408859912,
      "clip_ratio/low_mean": 0.000634996009466704,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0013006173285248224,
      "completions/clipped_ratio": 0.0078125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2575.0,
      "completions/mean_length": 573.984375,
      "completions/mean_terminated_length": 546.2520141601562,
      "completions/min_length": 152.0,
      "completions/min_terminated_length": 152.0,
      "epoch": 1.186588921282799,
      "grad_norm": 0.12683415412902832,
      "learning_rate": 1e-06,
      "loss": 0.0104,
      "num_tokens": 73923419.0,
      "reward": 0.629464328289032,
      "reward_std": 0.1753644049167633,
      "rewards/verify_math_reward/mean": 0.6294642686843872,
      "rewards/verify_math_reward/std": 0.4832179844379425,
      "step": 127
    },
    {
      "clip_ratio/high_max": 0.0018087178468704224,
      "clip_ratio/high_mean": 0.0008138277462421684,
      "clip_ratio/low_mean": 0.0005633177388517652,
      "clip_ratio/low_min": 2.562715144449612e-05,
      "clip_ratio/region_mean": 0.0013771454723610077,
      "completions/clipped_ratio": 0.021205357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3380.0,
      "completions/mean_length": 636.6652221679688,
      "completions/mean_terminated_length": 561.719482421875,
      "completions/min_length": 123.0,
      "completions/min_terminated_length": 123.0,
      "epoch": 1.1959183673469387,
      "grad_norm": 0.13377544283866882,
      "learning_rate": 1e-06,
      "loss": 0.0035,
      "num_tokens": 74507967.0,
      "reward": 0.5714285969734192,
      "reward_std": 0.2176763117313385,
      "rewards/verify_math_reward/mean": 0.5714285969734192,
      "rewards/verify_math_reward/std": 0.49514803290367126,
      "step": 128
    },
    {
      "clip_ratio/high_max": 0.002272482895932626,
      "clip_ratio/high_mean": 0.0008079380513663637,
      "clip_ratio/low_mean": 0.0007487964085157728,
      "clip_ratio/low_min": 3.629790353443241e-05,
      "clip_ratio/region_mean": 0.0015567344780720305,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2323.0,
      "completions/mean_length": 637.7879638671875,
      "completions/mean_terminated_length": 558.8333129882812,
      "completions/min_length": 147.0,
      "completions/min_terminated_length": 147.0,
      "epoch": 1.2052478134110787,
      "grad_norm": 0.14027684926986694,
      "learning_rate": 1e-06,
      "loss": 0.0009,
      "num_tokens": 75090593.0,
      "reward": 0.5569196939468384,
      "reward_std": 0.22007599472999573,
      "rewards/verify_math_reward/mean": 0.5569196343421936,
      "rewards/verify_math_reward/std": 0.4970270097255707,
      "step": 129
    },
    {
      "clip_ratio/high_max": 0.0017475325330451597,
      "clip_ratio/high_mean": 0.0006926625828782562,
      "clip_ratio/low_mean": 0.000576557072236028,
      "clip_ratio/low_min": 4.1899011193891056e-05,
      "clip_ratio/region_mean": 0.0012692196542047895,
      "completions/clipped_ratio": 0.0390625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4092.0,
      "completions/mean_length": 730.8136596679688,
      "completions/mean_terminated_length": 594.0173950195312,
      "completions/min_length": 140.0,
      "completions/min_terminated_length": 140.0,
      "epoch": 1.2145772594752187,
      "grad_norm": 0.13198024034500122,
      "learning_rate": 1e-06,
      "loss": -0.0003,
      "num_tokens": 75695186.0,
      "reward": 0.5033482313156128,
      "reward_std": 0.20410902798175812,
      "rewards/verify_math_reward/mean": 0.5033482313156128,
      "rewards/verify_math_reward/std": 0.5002680420875549,
      "step": 130
    },
    {
      "clip_ratio/high_max": 0.0018253561356686987,
      "clip_ratio/high_mean": 0.0008197991337510757,
      "clip_ratio/low_mean": 0.0004882724788330961,
      "clip_ratio/low_min": 3.2800064218463376e-05,
      "clip_ratio/region_mean": 0.0013080716053082142,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3964.0,
      "completions/mean_length": 585.388427734375,
      "completions/mean_terminated_length": 529.6644287109375,
      "completions/min_length": 144.0,
      "completions/min_terminated_length": 144.0,
      "epoch": 1.2239067055393587,
      "grad_norm": 0.14042188227176666,
      "learning_rate": 1e-06,
      "loss": 0.0181,
      "num_tokens": 76247502.0,
      "reward": 0.625,
      "reward_std": 0.1896064132452011,
      "rewards/verify_math_reward/mean": 0.625,
      "rewards/verify_math_reward/std": 0.48439329862594604,
      "step": 131
    },
    {
      "clip_ratio/high_max": 0.001913049283757573,
      "clip_ratio/high_mean": 0.0006868303107694373,
      "clip_ratio/low_mean": 0.00042972276014552335,
      "clip_ratio/low_min": 1.3957123883301392e-05,
      "clip_ratio/region_mean": 0.0011165530886501074,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3268.0,
      "completions/mean_length": 582.15625,
      "completions/mean_terminated_length": 538.4813842773438,
      "completions/min_length": 121.0,
      "completions/min_terminated_length": 121.0,
      "epoch": 1.2332361516034984,
      "grad_norm": 0.11454752832651138,
      "learning_rate": 1e-06,
      "loss": 0.0134,
      "num_tokens": 76809554.0,
      "reward": 0.5915178656578064,
      "reward_std": 0.16048020124435425,
      "rewards/verify_math_reward/mean": 0.5915178656578064,
      "rewards/verify_math_reward/std": 0.49182769656181335,
      "step": 132
    },
    {
      "clip_ratio/high_max": 0.001901395145978313,
      "clip_ratio/high_mean": 0.0007389826678263489,
      "clip_ratio/low_mean": 0.0005679324931406882,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0013069151646050159,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3139.0,
      "completions/mean_length": 590.3046875,
      "completions/mean_terminated_length": 550.737060546875,
      "completions/min_length": 106.0,
      "completions/min_terminated_length": 106.0,
      "epoch": 1.2425655976676384,
      "grad_norm": 0.13019219040870667,
      "learning_rate": 1e-06,
      "loss": 0.0039,
      "num_tokens": 77388379.0,
      "reward": 0.5368303656578064,
      "reward_std": 0.19268646836280823,
      "rewards/verify_math_reward/mean": 0.5368303656578064,
      "rewards/verify_math_reward/std": 0.49892017245292664,
      "step": 133
    },
    {
      "clip_ratio/high_max": 0.0019378843207960017,
      "clip_ratio/high_mean": 0.0007921883643575711,
      "clip_ratio/low_mean": 0.0006041999640729045,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0013963882774987724,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3501.0,
      "completions/mean_length": 593.3381958007812,
      "completions/mean_terminated_length": 545.790771484375,
      "completions/min_length": 122.0,
      "completions/min_terminated_length": 122.0,
      "epoch": 1.2518950437317784,
      "grad_norm": 0.12969744205474854,
      "learning_rate": 1e-06,
      "loss": -0.0026,
      "num_tokens": 77952122.0,
      "reward": 0.621651828289032,
      "reward_std": 0.1883269101381302,
      "rewards/verify_math_reward/mean": 0.6216517686843872,
      "rewards/verify_math_reward/std": 0.4852459728717804,
      "step": 134
    },
    {
      "clip_ratio/high_max": 0.0021529032892431132,
      "clip_ratio/high_mean": 0.0009231247586285463,
      "clip_ratio/low_mean": 0.0006282718204602133,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001551396617287537,
      "completions/clipped_ratio": 0.024553571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3528.0,
      "completions/mean_length": 614.7689819335938,
      "completions/mean_terminated_length": 527.1406860351562,
      "completions/min_length": 117.0,
      "completions/min_terminated_length": 117.0,
      "epoch": 1.2612244897959184,
      "grad_norm": 0.3380157947540283,
      "learning_rate": 1e-06,
      "loss": -0.0045,
      "num_tokens": 78496859.0,
      "reward": 0.6551339626312256,
      "reward_std": 0.21185435354709625,
      "rewards/verify_math_reward/mean": 0.6551339030265808,
      "rewards/verify_math_reward/std": 0.4755900502204895,
      "step": 135
    },
    {
      "clip_ratio/high_max": 0.0017971838769881288,
      "clip_ratio/high_mean": 0.0006118418068581377,
      "clip_ratio/low_mean": 0.0006130956890046946,
      "clip_ratio/low_min": 4.8540068746660836e-05,
      "clip_ratio/region_mean": 0.0012249375104147475,
      "completions/clipped_ratio": 0.0234375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2587.0,
      "completions/mean_length": 607.747802734375,
      "completions/mean_terminated_length": 524.0297241210938,
      "completions/min_length": 109.0,
      "completions/min_terminated_length": 109.0,
      "epoch": 1.2705539358600584,
      "grad_norm": 0.12717096507549286,
      "learning_rate": 1e-06,
      "loss": -0.0191,
      "num_tokens": 79046985.0,
      "reward": 0.5948660969734192,
      "reward_std": 0.16638249158859253,
      "rewards/verify_math_reward/mean": 0.5948660969734192,
      "rewards/verify_math_reward/std": 0.49119213223457336,
      "step": 136
    },
    {
      "clip_ratio/high_max": 0.002020962674578186,
      "clip_ratio/high_mean": 0.0007820503360562725,
      "clip_ratio/low_mean": 0.0005464571549964603,
      "clip_ratio/low_min": 1.3400514944805764e-05,
      "clip_ratio/region_mean": 0.0013285074528539553,
      "completions/clipped_ratio": 0.021205357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3811.0,
      "completions/mean_length": 644.1551513671875,
      "completions/mean_terminated_length": 569.3717041015625,
      "completions/min_length": 138.0,
      "completions/min_terminated_length": 138.0,
      "epoch": 1.2798833819241984,
      "grad_norm": 0.12049160152673721,
      "learning_rate": 1e-06,
      "loss": 0.0113,
      "num_tokens": 79632548.0,
      "reward": 0.5892857313156128,
      "reward_std": 0.18550649285316467,
      "rewards/verify_math_reward/mean": 0.5892857313156128,
      "rewards/verify_math_reward/std": 0.49223825335502625,
      "step": 137
    },
    {
      "clip_ratio/high_max": 0.0018255292561661918,
      "clip_ratio/high_mean": 0.0005775526515208185,
      "clip_ratio/low_mean": 0.0005093980871606618,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010869507277675439,
      "completions/clipped_ratio": 0.0200892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3068.0,
      "completions/mean_length": 636.234375,
      "completions/mean_terminated_length": 565.3052368164062,
      "completions/min_length": 117.0,
      "completions/min_terminated_length": 117.0,
      "epoch": 1.2892128279883381,
      "grad_norm": 0.12048103660345078,
      "learning_rate": 1e-06,
      "loss": -0.0038,
      "num_tokens": 80221198.0,
      "reward": 0.5580357313156128,
      "reward_std": 0.16288693249225616,
      "rewards/verify_math_reward/mean": 0.5580357313156128,
      "rewards/verify_math_reward/std": 0.49689781665802,
      "step": 138
    },
    {
      "clip_ratio/high_max": 0.002007623614190379,
      "clip_ratio/high_mean": 0.0007619925354447332,
      "clip_ratio/low_mean": 0.0006293182996159885,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0013913108559790999,
      "completions/clipped_ratio": 0.030133928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3092.0,
      "completions/mean_length": 677.138427734375,
      "completions/mean_terminated_length": 570.9136962890625,
      "completions/min_length": 129.0,
      "completions/min_terminated_length": 129.0,
      "epoch": 1.2985422740524781,
      "grad_norm": 0.12649589776992798,
      "learning_rate": 1e-06,
      "loss": -0.0119,
      "num_tokens": 80813106.0,
      "reward": 0.520089328289032,
      "reward_std": 0.19456186890602112,
      "rewards/verify_math_reward/mean": 0.5200892686843872,
      "rewards/verify_math_reward/std": 0.4998753070831299,
      "step": 139
    },
    {
      "clip_ratio/high_max": 0.0020343236610642634,
      "clip_ratio/high_mean": 0.0007822143088560551,
      "clip_ratio/low_mean": 0.0005923915932726231,
      "clip_ratio/low_min": 1.6099947970360518e-05,
      "clip_ratio/region_mean": 0.0013746059048571624,
      "completions/clipped_ratio": 0.03125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3701.0,
      "completions/mean_length": 698.8170166015625,
      "completions/mean_terminated_length": 589.2304077148438,
      "completions/min_length": 178.0,
      "completions/min_terminated_length": 178.0,
      "epoch": 1.3078717201166181,
      "grad_norm": 0.1343315690755844,
      "learning_rate": 1e-06,
      "loss": -0.0112,
      "num_tokens": 81417614.0,
      "reward": 0.520089328289032,
      "reward_std": 0.20493286848068237,
      "rewards/verify_math_reward/mean": 0.5200892686843872,
      "rewards/verify_math_reward/std": 0.4998753070831299,
      "step": 140
    },
    {
      "clip_ratio/high_max": 0.0015934638831822667,
      "clip_ratio/high_mean": 0.0005796222703793319,
      "clip_ratio/low_mean": 0.0005065776958872448,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010861999762710184,
      "completions/clipped_ratio": 0.0267857142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3603.0,
      "completions/mean_length": 671.3125,
      "completions/mean_terminated_length": 577.0549926757812,
      "completions/min_length": 146.0,
      "completions/min_terminated_length": 146.0,
      "epoch": 1.3172011661807579,
      "grad_norm": 0.11626088619232178,
      "learning_rate": 1e-06,
      "loss": 0.0077,
      "num_tokens": 82014734.0,
      "reward": 0.5133928656578064,
      "reward_std": 0.16897399723529816,
      "rewards/verify_math_reward/mean": 0.5133928656578064,
      "rewards/verify_math_reward/std": 0.500099778175354,
      "step": 141
    },
    {
      "clip_ratio/high_max": 0.0015611184171575587,
      "clip_ratio/high_mean": 0.0006733137779519893,
      "clip_ratio/low_mean": 0.0006984407709751395,
      "clip_ratio/low_min": 5.065845380158862e-05,
      "clip_ratio/region_mean": 0.0013717545552935917,
      "completions/clipped_ratio": 0.0279017857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2779.0,
      "completions/mean_length": 690.3348388671875,
      "completions/mean_terminated_length": 592.583251953125,
      "completions/min_length": 85.0,
      "completions/min_terminated_length": 85.0,
      "epoch": 1.3265306122448979,
      "grad_norm": 0.1328561007976532,
      "learning_rate": 1e-06,
      "loss": -0.0054,
      "num_tokens": 82624282.0,
      "reward": 0.5446428656578064,
      "reward_std": 0.2069302648305893,
      "rewards/verify_math_reward/mean": 0.5446428656578064,
      "rewards/verify_math_reward/std": 0.49828118085861206,
      "step": 142
    },
    {
      "clip_ratio/high_max": 0.002104275714373216,
      "clip_ratio/high_mean": 0.0007949095033836784,
      "clip_ratio/low_mean": 0.0006041574306436814,
      "clip_ratio/low_min": 1.7433751054340973e-05,
      "clip_ratio/region_mean": 0.0013990669249324128,
      "completions/clipped_ratio": 0.029017857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3350.0,
      "completions/mean_length": 663.1194458007812,
      "completions/mean_terminated_length": 560.527587890625,
      "completions/min_length": 117.0,
      "completions/min_terminated_length": 117.0,
      "epoch": 1.3358600583090379,
      "grad_norm": 0.14331470429897308,
      "learning_rate": 1e-06,
      "loss": -0.0046,
      "num_tokens": 83203509.0,
      "reward": 0.5212053656578064,
      "reward_std": 0.18919385969638824,
      "rewards/verify_math_reward/mean": 0.5212053656578064,
      "rewards/verify_math_reward/std": 0.49982914328575134,
      "step": 143
    },
    {
      "clip_ratio/high_max": 0.0015646205501980148,
      "clip_ratio/high_mean": 0.0006571413559868233,
      "clip_ratio/low_mean": 0.000618994738033507,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001276136092201341,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3337.0,
      "completions/mean_length": 686.2879638671875,
      "completions/mean_terminated_length": 608.4406127929688,
      "completions/min_length": 142.0,
      "completions/min_terminated_length": 142.0,
      "epoch": 1.3451895043731779,
      "grad_norm": 0.12529589235782623,
      "learning_rate": 1e-06,
      "loss": 0.0111,
      "num_tokens": 83832719.0,
      "reward": 0.546875,
      "reward_std": 0.19839440286159515,
      "rewards/verify_math_reward/mean": 0.546875,
      "rewards/verify_math_reward/std": 0.4980759024620056,
      "step": 144
    },
    {
      "clip_ratio/high_max": 0.0019495428350637667,
      "clip_ratio/high_mean": 0.000920495573154767,
      "clip_ratio/low_mean": 0.0005835863939864794,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0015040819671412464,
      "completions/clipped_ratio": 0.0267857142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2413.0,
      "completions/mean_length": 619.0346069335938,
      "completions/mean_terminated_length": 523.3382568359375,
      "completions/min_length": 149.0,
      "completions/min_terminated_length": 149.0,
      "epoch": 1.3545189504373178,
      "grad_norm": 0.14971975982189178,
      "learning_rate": 1e-06,
      "loss": -0.0016,
      "num_tokens": 84375726.0,
      "reward": 0.6383928656578064,
      "reward_std": 0.1888914555311203,
      "rewards/verify_math_reward/mean": 0.6383928656578064,
      "rewards/verify_math_reward/std": 0.4807341992855072,
      "step": 145
    },
    {
      "clip_ratio/high_max": 0.0017173868873214815,
      "clip_ratio/high_mean": 0.0007454751685145311,
      "clip_ratio/low_mean": 0.0006423831055144547,
      "clip_ratio/low_min": 2.6167050236836076e-05,
      "clip_ratio/region_mean": 0.0013878582940378692,
      "completions/clipped_ratio": 0.0267857142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3075.0,
      "completions/mean_length": 675.0267944335938,
      "completions/mean_terminated_length": 580.8715209960938,
      "completions/min_length": 131.0,
      "completions/min_terminated_length": 131.0,
      "epoch": 1.3638483965014578,
      "grad_norm": 0.13224931061267853,
      "learning_rate": 1e-06,
      "loss": 0.0072,
      "num_tokens": 84985630.0,
      "reward": 0.5725446939468384,
      "reward_std": 0.1929485946893692,
      "rewards/verify_math_reward/mean": 0.5725446343421936,
      "rewards/verify_math_reward/std": 0.49498558044433594,
      "step": 146
    },
    {
      "clip_ratio/high_max": 0.002161765700293472,
      "clip_ratio/high_mean": 0.0007993987874215236,
      "clip_ratio/low_mean": 0.00048263823282468366,
      "clip_ratio/low_min": 9.279880941903684e-06,
      "clip_ratio/region_mean": 0.0012820370502595324,
      "completions/clipped_ratio": 0.0234375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4068.0,
      "completions/mean_length": 669.5881958007812,
      "completions/mean_terminated_length": 587.3543090820312,
      "completions/min_length": 113.0,
      "completions/min_terminated_length": 113.0,
      "epoch": 1.3731778425655976,
      "grad_norm": 0.13312533497810364,
      "learning_rate": 1e-06,
      "loss": 0.0146,
      "num_tokens": 85587173.0,
      "reward": 0.5546875,
      "reward_std": 0.1856580376625061,
      "rewards/verify_math_reward/mean": 0.5546875,
      "rewards/verify_math_reward/std": 0.4972778558731079,
      "step": 147
    },
    {
      "clip_ratio/high_max": 0.0020616715628420934,
      "clip_ratio/high_mean": 0.0009268377507396508,
      "clip_ratio/low_mean": 0.0005328221686795587,
      "clip_ratio/low_min": 1.1036552677978761e-05,
      "clip_ratio/region_mean": 0.0014596599612559658,
      "completions/clipped_ratio": 0.0357142857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3995.0,
      "completions/mean_length": 718.458740234375,
      "completions/mean_terminated_length": 593.3645629882812,
      "completions/min_length": 144.0,
      "completions/min_terminated_length": 144.0,
      "epoch": 1.3825072886297376,
      "grad_norm": 0.13047988712787628,
      "learning_rate": 1e-06,
      "loss": 0.0047,
      "num_tokens": 86183288.0,
      "reward": 0.574776828289032,
      "reward_std": 0.21199268102645874,
      "rewards/verify_math_reward/mean": 0.5747767686843872,
      "rewards/verify_math_reward/std": 0.49465295672416687,
      "step": 148
    },
    {
      "clip_ratio/high_max": 0.0016880180082807783,
      "clip_ratio/high_mean": 0.0005727906318497844,
      "clip_ratio/low_mean": 0.000599437033997674,
      "clip_ratio/low_min": 1.3205155482864939e-05,
      "clip_ratio/region_mean": 0.0011722276831278577,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3820.0,
      "completions/mean_length": 607.1004638671875,
      "completions/mean_terminated_length": 543.6658935546875,
      "completions/min_length": 142.0,
      "completions/min_terminated_length": 142.0,
      "epoch": 1.3918367346938776,
      "grad_norm": 0.13434885442256927,
      "learning_rate": 1e-06,
      "loss": -0.0028,
      "num_tokens": 86746338.0,
      "reward": 0.6037946939468384,
      "reward_std": 0.18118859827518463,
      "rewards/verify_math_reward/mean": 0.6037946343421936,
      "rewards/verify_math_reward/std": 0.48938122391700745,
      "step": 149
    },
    {
      "clip_ratio/high_max": 0.0016494169176439755,
      "clip_ratio/high_mean": 0.0005675286411133129,
      "clip_ratio/low_mean": 0.0005270805522741284,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010946092043013778,
      "completions/clipped_ratio": 0.0234375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3588.0,
      "completions/mean_length": 652.505615234375,
      "completions/mean_terminated_length": 569.8616943359375,
      "completions/min_length": 159.0,
      "completions/min_terminated_length": 159.0,
      "epoch": 1.4011661807580174,
      "grad_norm": 0.11531613022089005,
      "learning_rate": 1e-06,
      "loss": 0.0125,
      "num_tokens": 87341223.0,
      "reward": 0.6183035969734192,
      "reward_std": 0.15431898832321167,
      "rewards/verify_math_reward/mean": 0.6183035969734192,
      "rewards/verify_math_reward/std": 0.4860740303993225,
      "step": 150
    },
    {
      "clip_ratio/high_max": 0.0018470843424438499,
      "clip_ratio/high_mean": 0.0006419708988687489,
      "clip_ratio/low_mean": 0.0006261602720769588,
      "clip_ratio/low_min": 5.929211329203099e-05,
      "clip_ratio/region_mean": 0.0012681312073254958,
      "completions/clipped_ratio": 0.03125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3478.0,
      "completions/mean_length": 686.271240234375,
      "completions/mean_terminated_length": 576.2799682617188,
      "completions/min_length": 128.0,
      "completions/min_terminated_length": 128.0,
      "epoch": 1.4104956268221573,
      "grad_norm": 0.1235036551952362,
      "learning_rate": 1e-06,
      "loss": 0.0208,
      "num_tokens": 87938634.0,
      "reward": 0.5580357313156128,
      "reward_std": 0.1676594614982605,
      "rewards/verify_math_reward/mean": 0.5580357313156128,
      "rewards/verify_math_reward/std": 0.49689781665802,
      "step": 151
    },
    {
      "clip_ratio/high_max": 0.0019267288444098085,
      "clip_ratio/high_mean": 0.0007970599690452218,
      "clip_ratio/low_mean": 0.0005695634208677802,
      "clip_ratio/low_min": 3.3647374948486686e-05,
      "clip_ratio/region_mean": 0.0013666233644471504,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3854.0,
      "completions/mean_length": 608.6015625,
      "completions/mean_terminated_length": 553.2460327148438,
      "completions/min_length": 124.0,
      "completions/min_terminated_length": 124.0,
      "epoch": 1.4198250728862973,
      "grad_norm": 0.13221563398838043,
      "learning_rate": 1e-06,
      "loss": 0.0088,
      "num_tokens": 88511077.0,
      "reward": 0.6417410969734192,
      "reward_std": 0.19846788048744202,
      "rewards/verify_math_reward/mean": 0.6417410969734192,
      "rewards/verify_math_reward/std": 0.47975656390190125,
      "step": 152
    },
    {
      "clip_ratio/high_max": 0.001906688241433585,
      "clip_ratio/high_mean": 0.0006397882425517309,
      "clip_ratio/low_mean": 0.00047918893233145354,
      "clip_ratio/low_min": 1.8027112673735246e-05,
      "clip_ratio/region_mean": 0.0011189771830686368,
      "completions/clipped_ratio": 0.0267857142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3324.0,
      "completions/mean_length": 645.466552734375,
      "completions/mean_terminated_length": 550.4976806640625,
      "completions/min_length": 136.0,
      "completions/min_terminated_length": 136.0,
      "epoch": 1.4291545189504373,
      "grad_norm": 0.1337098330259323,
      "learning_rate": 1e-06,
      "loss": 0.0115,
      "num_tokens": 89080335.0,
      "reward": 0.5680803656578064,
      "reward_std": 0.18040582537651062,
      "rewards/verify_math_reward/mean": 0.5680803656578064,
      "rewards/verify_math_reward/std": 0.4956200122833252,
      "step": 153
    },
    {
      "clip_ratio/high_max": 0.0015368815656984225,
      "clip_ratio/high_mean": 0.0005800444414489903,
      "clip_ratio/low_mean": 0.0005559555738727795,
      "clip_ratio/low_min": 1.67291218531318e-05,
      "clip_ratio/region_mean": 0.0011360000135027803,
      "completions/clipped_ratio": 0.0200892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3132.0,
      "completions/mean_length": 623.3158569335938,
      "completions/mean_terminated_length": 552.1218872070312,
      "completions/min_length": 156.0,
      "completions/min_terminated_length": 156.0,
      "epoch": 1.4384839650145773,
      "grad_norm": 0.12626437842845917,
      "learning_rate": 1e-06,
      "loss": 0.006,
      "num_tokens": 89647626.0,
      "reward": 0.6127232313156128,
      "reward_std": 0.16555652022361755,
      "rewards/verify_math_reward/mean": 0.6127232313156128,
      "rewards/verify_math_reward/std": 0.4873998463153839,
      "step": 154
    },
    {
      "clip_ratio/high_max": 0.001931624261487741,
      "clip_ratio/high_mean": 0.0007596589457534719,
      "clip_ratio/low_mean": 0.0003546154935065715,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011142744442622643,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2691.0,
      "completions/mean_length": 590.685302734375,
      "completions/mean_terminated_length": 522.8919067382812,
      "completions/min_length": 135.0,
      "completions/min_terminated_length": 135.0,
      "epoch": 1.4478134110787173,
      "grad_norm": 0.12856322526931763,
      "learning_rate": 1e-06,
      "loss": -0.003,
      "num_tokens": 90191576.0,
      "reward": 0.6517857313156128,
      "reward_std": 0.17574280500411987,
      "rewards/verify_math_reward/mean": 0.6517857313156128,
      "rewards/verify_math_reward/std": 0.47667038440704346,
      "step": 155
    },
    {
      "clip_ratio/high_max": 0.00201725831720978,
      "clip_ratio/high_mean": 0.0007344376808759989,
      "clip_ratio/low_mean": 0.0006907381075507146,
      "clip_ratio/low_min": 1.090560090233339e-05,
      "clip_ratio/region_mean": 0.0014251758184400387,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3844.0,
      "completions/mean_length": 594.6752319335938,
      "completions/mean_terminated_length": 535.0613403320312,
      "completions/min_length": 198.0,
      "completions/min_terminated_length": 198.0,
      "epoch": 1.457142857142857,
      "grad_norm": 0.13053034245967865,
      "learning_rate": 1e-06,
      "loss": -0.0,
      "num_tokens": 90752269.0,
      "reward": 0.625,
      "reward_std": 0.18832510709762573,
      "rewards/verify_math_reward/mean": 0.625,
      "rewards/verify_math_reward/std": 0.48439329862594604,
      "step": 156
    },
    {
      "clip_ratio/high_max": 0.0017435079353163019,
      "clip_ratio/high_mean": 0.0007276815913428436,
      "clip_ratio/low_mean": 0.0005316176857377286,
      "clip_ratio/low_min": 1.160846932179993e-05,
      "clip_ratio/region_mean": 0.0012592993043654133,
      "completions/clipped_ratio": 0.0267857142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3797.0,
      "completions/mean_length": 704.2031860351562,
      "completions/mean_terminated_length": 610.8508911132812,
      "completions/min_length": 154.0,
      "completions/min_terminated_length": 154.0,
      "epoch": 1.466472303206997,
      "grad_norm": 0.13034747540950775,
      "learning_rate": 1e-06,
      "loss": 0.0089,
      "num_tokens": 91376435.0,
      "reward": 0.5412946939468384,
      "reward_std": 0.2033594399690628,
      "rewards/verify_math_reward/mean": 0.5412946343421936,
      "rewards/verify_math_reward/std": 0.49857014417648315,
      "step": 157
    },
    {
      "clip_ratio/high_max": 0.0021951078888378106,
      "clip_ratio/high_mean": 0.0008293294667964801,
      "clip_ratio/low_mean": 0.0006260247982936562,
      "clip_ratio/low_min": 2.0167795810266398e-05,
      "clip_ratio/region_mean": 0.0014553542496287264,
      "completions/clipped_ratio": 0.024553571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4087.0,
      "completions/mean_length": 678.5111694335938,
      "completions/mean_terminated_length": 592.4873657226562,
      "completions/min_length": 164.0,
      "completions/min_terminated_length": 164.0,
      "epoch": 1.475801749271137,
      "grad_norm": 0.13196291029453278,
      "learning_rate": 1e-06,
      "loss": 0.0072,
      "num_tokens": 91985125.0,
      "reward": 0.5703125,
      "reward_std": 0.18870559334754944,
      "rewards/verify_math_reward/mean": 0.5703125,
      "rewards/verify_math_reward/std": 0.49530795216560364,
      "step": 158
    },
    {
      "clip_ratio/high_max": 0.001829417687986279,
      "clip_ratio/high_mean": 0.000784383266363875,
      "clip_ratio/low_mean": 0.0004983877370250411,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001282771016121842,
      "completions/clipped_ratio": 0.0234375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3707.0,
      "completions/mean_length": 682.1328125,
      "completions/mean_terminated_length": 600.2000122070312,
      "completions/min_length": 134.0,
      "completions/min_terminated_length": 134.0,
      "epoch": 1.485131195335277,
      "grad_norm": 0.1347026824951172,
      "learning_rate": 1e-06,
      "loss": -0.0104,
      "num_tokens": 92588844.0,
      "reward": 0.59375,
      "reward_std": 0.20260171592235565,
      "rewards/verify_math_reward/mean": 0.59375,
      "rewards/verify_math_reward/std": 0.4914066195487976,
      "step": 159
    },
    {
      "clip_ratio/high_max": 0.0017668598411546554,
      "clip_ratio/high_mean": 0.000813364764326252,
      "clip_ratio/low_mean": 0.0005534497195185395,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0013668144310940988,
      "completions/clipped_ratio": 0.024553571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2744.0,
      "completions/mean_length": 656.771240234375,
      "completions/mean_terminated_length": 570.2001953125,
      "completions/min_length": 124.0,
      "completions/min_terminated_length": 124.0,
      "epoch": 1.4944606413994168,
      "grad_norm": 0.13396763801574707,
      "learning_rate": 1e-06,
      "loss": -0.0139,
      "num_tokens": 93177303.0,
      "reward": 0.6127232313156128,
      "reward_std": 0.20827394723892212,
      "rewards/verify_math_reward/mean": 0.6127232313156128,
      "rewards/verify_math_reward/std": 0.4873998463153839,
      "step": 160
    },
    {
      "clip_ratio/high_max": 0.0019355226613697596,
      "clip_ratio/high_mean": 0.0007956197114253882,
      "clip_ratio/low_mean": 0.0005516102828551084,
      "clip_ratio/low_min": 5.0502947487984784e-05,
      "clip_ratio/region_mean": 0.001347230005194433,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2103.0,
      "completions/mean_length": 639.8471069335938,
      "completions/mean_terminated_length": 573.0045166015625,
      "completions/min_length": 112.0,
      "completions/min_terminated_length": 112.0,
      "epoch": 1.5037900874635568,
      "grad_norm": 0.1301068812608719,
      "learning_rate": 1e-06,
      "loss": 0.0099,
      "num_tokens": 93763366.0,
      "reward": 0.6071428656578064,
      "reward_std": 0.1840411126613617,
      "rewards/verify_math_reward/mean": 0.6071428656578064,
      "rewards/verify_math_reward/std": 0.48865824937820435,
      "step": 161
    },
    {
      "clip_ratio/high_max": 0.001923288309626514,
      "clip_ratio/high_mean": 0.000747525396036508,
      "clip_ratio/low_mean": 0.0004745077794723329,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012220331627759151,
      "completions/clipped_ratio": 0.029017857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3448.0,
      "completions/mean_length": 704.0569458007812,
      "completions/mean_terminated_length": 602.6884765625,
      "completions/min_length": 83.0,
      "completions/min_terminated_length": 83.0,
      "epoch": 1.5131195335276968,
      "grad_norm": 0.1191791296005249,
      "learning_rate": 1e-06,
      "loss": 0.006,
      "num_tokens": 94383145.0,
      "reward": 0.5368303656578064,
      "reward_std": 0.197902649641037,
      "rewards/verify_math_reward/mean": 0.5368303656578064,
      "rewards/verify_math_reward/std": 0.49892017245292664,
      "step": 162
    },
    {
      "clip_ratio/high_max": 0.0023761102493153885,
      "clip_ratio/high_mean": 0.00098472046374809,
      "clip_ratio/low_mean": 0.00046745350573473843,
      "clip_ratio/low_min": 1.3658216630574316e-05,
      "clip_ratio/region_mean": 0.0014521739576593973,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1893.0,
      "completions/mean_length": 610.7467041015625,
      "completions/mean_terminated_length": 555.4251708984375,
      "completions/min_length": 164.0,
      "completions/min_terminated_length": 164.0,
      "epoch": 1.5224489795918368,
      "grad_norm": 0.15298490226268768,
      "learning_rate": 1e-06,
      "loss": -0.0002,
      "num_tokens": 94974646.0,
      "reward": 0.543526828289032,
      "reward_std": 0.22872062027454376,
      "rewards/verify_math_reward/mean": 0.5435267686843872,
      "rewards/verify_math_reward/std": 0.49838000535964966,
      "step": 163
    },
    {
      "clip_ratio/high_max": 0.0015781065812916495,
      "clip_ratio/high_mean": 0.0004595067603077041,
      "clip_ratio/low_mean": 0.000494794807764265,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009543015621602535,
      "completions/clipped_ratio": 0.0267857142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3022.0,
      "completions/mean_length": 620.7455444335938,
      "completions/mean_terminated_length": 525.0963134765625,
      "completions/min_length": 157.0,
      "completions/min_terminated_length": 157.0,
      "epoch": 1.5317784256559768,
      "grad_norm": 0.10810443013906479,
      "learning_rate": 1e-06,
      "loss": -0.003,
      "num_tokens": 95526018.0,
      "reward": 0.59375,
      "reward_std": 0.12805740535259247,
      "rewards/verify_math_reward/mean": 0.59375,
      "rewards/verify_math_reward/std": 0.4914066195487976,
      "step": 164
    },
    {
      "clip_ratio/high_max": 0.0017954709655896295,
      "clip_ratio/high_mean": 0.0007386102424788987,
      "clip_ratio/low_mean": 0.0005059665991211659,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012445768479665276,
      "completions/clipped_ratio": 0.024553571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3908.0,
      "completions/mean_length": 688.3214721679688,
      "completions/mean_terminated_length": 602.5446166992188,
      "completions/min_length": 130.0,
      "completions/min_terminated_length": 130.0,
      "epoch": 1.5411078717201168,
      "grad_norm": 0.1268397867679596,
      "learning_rate": 1e-06,
      "loss": -0.0215,
      "num_tokens": 96144530.0,
      "reward": 0.5602678656578064,
      "reward_std": 0.18336710333824158,
      "rewards/verify_math_reward/mean": 0.5602678656578064,
      "rewards/verify_math_reward/std": 0.4966317415237427,
      "step": 165
    },
    {
      "clip_ratio/high_max": 0.0020396360268932767,
      "clip_ratio/high_mean": 0.0006896339455124689,
      "clip_ratio/low_mean": 0.0006078881506255129,
      "clip_ratio/low_min": 1.302083364862483e-05,
      "clip_ratio/region_mean": 0.0012975220706721302,
      "completions/clipped_ratio": 0.0200892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2533.0,
      "completions/mean_length": 587.96875,
      "completions/mean_terminated_length": 516.0501098632812,
      "completions/min_length": 144.0,
      "completions/min_terminated_length": 144.0,
      "epoch": 1.5504373177842565,
      "grad_norm": 0.13565339148044586,
      "learning_rate": 1e-06,
      "loss": -0.0079,
      "num_tokens": 96691182.0,
      "reward": 0.5993303656578064,
      "reward_std": 0.16999132931232452,
      "rewards/verify_math_reward/mean": 0.5993303656578064,
      "rewards/verify_math_reward/std": 0.49030786752700806,
      "step": 166
    },
    {
      "clip_ratio/high_max": 0.001906873680127319,
      "clip_ratio/high_mean": 0.0008471759992971784,
      "clip_ratio/low_mean": 0.0005641269253828796,
      "clip_ratio/low_min": 2.4888242478482425e-05,
      "clip_ratio/region_mean": 0.001411302902852185,
      "completions/clipped_ratio": 0.025669642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3258.0,
      "completions/mean_length": 644.575927734375,
      "completions/mean_terminated_length": 553.6449584960938,
      "completions/min_length": 144.0,
      "completions/min_terminated_length": 144.0,
      "epoch": 1.5597667638483965,
      "grad_norm": 0.14925526082515717,
      "learning_rate": 1e-06,
      "loss": 0.0125,
      "num_tokens": 97257418.0,
      "reward": 0.6082589626312256,
      "reward_std": 0.22755201160907745,
      "rewards/verify_math_reward/mean": 0.6082589030265808,
      "rewards/verify_math_reward/std": 0.48841196298599243,
      "step": 167
    },
    {
      "clip_ratio/high_max": 0.0015691809821873903,
      "clip_ratio/high_mean": 0.0006319213207461871,
      "clip_ratio/low_mean": 0.0004532409575404017,
      "clip_ratio/low_min": 1.1886649190273602e-05,
      "clip_ratio/region_mean": 0.0010851622682821471,
      "completions/clipped_ratio": 0.021205357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3579.0,
      "completions/mean_length": 680.5089721679688,
      "completions/mean_terminated_length": 606.5131225585938,
      "completions/min_length": 137.0,
      "completions/min_terminated_length": 137.0,
      "epoch": 1.5690962099125363,
      "grad_norm": 0.12171467393636703,
      "learning_rate": 1e-06,
      "loss": -0.0016,
      "num_tokens": 97873674.0,
      "reward": 0.5636160969734192,
      "reward_std": 0.17487628757953644,
      "rewards/verify_math_reward/mean": 0.5636160969734192,
      "rewards/verify_math_reward/std": 0.49621346592903137,
      "step": 168
    },
    {
      "clip_ratio/high_max": 0.0019097938602499198,
      "clip_ratio/high_mean": 0.0006869558783364482,
      "clip_ratio/low_mean": 0.0005390265587266185,
      "clip_ratio/low_min": 1.3266821952129249e-05,
      "clip_ratio/region_mean": 0.001225982417963678,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3752.0,
      "completions/mean_length": 632.9319458007812,
      "completions/mean_terminated_length": 553.8663940429688,
      "completions/min_length": 138.0,
      "completions/min_terminated_length": 138.0,
      "epoch": 1.5784256559766763,
      "grad_norm": 0.14499714970588684,
      "learning_rate": 1e-06,
      "loss": 0.0114,
      "num_tokens": 98446661.0,
      "reward": 0.578125,
      "reward_std": 0.1948240101337433,
      "rewards/verify_math_reward/mean": 0.578125,
      "rewards/verify_math_reward/std": 0.4941346049308777,
      "step": 169
    },
    {
      "clip_ratio/high_max": 0.0016660082401358522,
      "clip_ratio/high_mean": 0.000690355134793208,
      "clip_ratio/low_mean": 0.0005471073127409909,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001237462452991167,
      "completions/clipped_ratio": 0.0200892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2863.0,
      "completions/mean_length": 651.825927734375,
      "completions/mean_terminated_length": 581.2164306640625,
      "completions/min_length": 125.0,
      "completions/min_terminated_length": 125.0,
      "epoch": 1.5877551020408163,
      "grad_norm": 0.13439850509166718,
      "learning_rate": 1e-06,
      "loss": 0.0111,
      "num_tokens": 99046137.0,
      "reward": 0.5959821939468384,
      "reward_std": 0.1863390952348709,
      "rewards/verify_math_reward/mean": 0.5959821343421936,
      "rewards/verify_math_reward/std": 0.490975022315979,
      "step": 170
    },
    {
      "clip_ratio/high_max": 0.0022043542776373215,
      "clip_ratio/high_mean": 0.000894695227543707,
      "clip_ratio/low_mean": 0.0007906444052423467,
      "clip_ratio/low_min": 0.00010231894702883437,
      "clip_ratio/region_mean": 0.0016853396155056544,
      "completions/clipped_ratio": 0.021205357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3812.0,
      "completions/mean_length": 648.1439819335938,
      "completions/mean_terminated_length": 573.4469604492188,
      "completions/min_length": 140.0,
      "completions/min_terminated_length": 140.0,
      "epoch": 1.5970845481049563,
      "grad_norm": 0.14859704673290253,
      "learning_rate": 1e-06,
      "loss": -0.0009,
      "num_tokens": 99640138.0,
      "reward": 0.5848214626312256,
      "reward_std": 0.22000113129615784,
      "rewards/verify_math_reward/mean": 0.5848214030265808,
      "rewards/verify_math_reward/std": 0.49302801489830017,
      "step": 171
    },
    {
      "clip_ratio/high_max": 0.0019844864873448387,
      "clip_ratio/high_mean": 0.0009608767031750176,
      "clip_ratio/low_mean": 0.0005933990096309572,
      "clip_ratio/low_min": 7.017723510216456e-05,
      "clip_ratio/region_mean": 0.0015542756736977026,
      "completions/clipped_ratio": 0.025669642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4076.0,
      "completions/mean_length": 684.0424194335938,
      "completions/mean_terminated_length": 594.1512451171875,
      "completions/min_length": 144.0,
      "completions/min_terminated_length": 144.0,
      "epoch": 1.6064139941690962,
      "grad_norm": 0.14803965389728546,
      "learning_rate": 1e-06,
      "loss": 0.0022,
      "num_tokens": 100250584.0,
      "reward": 0.606026828289032,
      "reward_std": 0.23868004977703094,
      "rewards/verify_math_reward/mean": 0.6060267686843872,
      "rewards/verify_math_reward/std": 0.48890191316604614,
      "step": 172
    },
    {
      "clip_ratio/high_max": 0.001996364320802968,
      "clip_ratio/high_mean": 0.0008315825980389491,
      "clip_ratio/low_mean": 0.00047165330761345103,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001303235887462506,
      "completions/clipped_ratio": 0.033482142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3660.0,
      "completions/mean_length": 670.2076416015625,
      "completions/mean_terminated_length": 551.5311889648438,
      "completions/min_length": 106.0,
      "completions/min_terminated_length": 106.0,
      "epoch": 1.6157434402332362,
      "grad_norm": 0.14180468022823334,
      "learning_rate": 1e-06,
      "loss": 0.0052,
      "num_tokens": 100808658.0,
      "reward": 0.6127232313156128,
      "reward_std": 0.19099578261375427,
      "rewards/verify_math_reward/mean": 0.6127232313156128,
      "rewards/verify_math_reward/std": 0.4873998463153839,
      "step": 173
    },
    {
      "clip_ratio/high_max": 0.0015758375484438147,
      "clip_ratio/high_mean": 0.0005748821095039602,
      "clip_ratio/low_mean": 0.0006755555532436119,
      "clip_ratio/low_min": 3.074549567827489e-05,
      "clip_ratio/region_mean": 0.0012504376863944344,
      "completions/clipped_ratio": 0.025669642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3215.0,
      "completions/mean_length": 641.6752319335938,
      "completions/mean_terminated_length": 550.6678466796875,
      "completions/min_length": 111.0,
      "completions/min_terminated_length": 111.0,
      "epoch": 1.6250728862973762,
      "grad_norm": 0.1388065218925476,
      "learning_rate": 1e-06,
      "loss": 0.0048,
      "num_tokens": 101379703.0,
      "reward": 0.5647321939468384,
      "reward_std": 0.19498512148857117,
      "rewards/verify_math_reward/mean": 0.5647321343421936,
      "rewards/verify_math_reward/std": 0.49606895446777344,
      "step": 174
    },
    {
      "clip_ratio/high_max": 0.0017157134316221345,
      "clip_ratio/high_mean": 0.0006739685177308274,
      "clip_ratio/low_mean": 0.00044784355395677267,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011218121253477875,
      "completions/clipped_ratio": 0.024553571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3381.0,
      "completions/mean_length": 663.0892944335938,
      "completions/mean_terminated_length": 576.6773071289062,
      "completions/min_length": 117.0,
      "completions/min_terminated_length": 117.0,
      "epoch": 1.634402332361516,
      "grad_norm": 0.12144182622432709,
      "learning_rate": 1e-06,
      "loss": 0.0137,
      "num_tokens": 101992087.0,
      "reward": 0.5714285969734192,
      "reward_std": 0.18536199629306793,
      "rewards/verify_math_reward/mean": 0.5714285969734192,
      "rewards/verify_math_reward/std": 0.49514803290367126,
      "step": 175
    },
    {
      "clip_ratio/high_max": 0.0019562089692044538,
      "clip_ratio/high_mean": 0.000716243475835654,
      "clip_ratio/low_mean": 0.0006760624455637299,
      "clip_ratio/low_min": 3.2546440706937574e-05,
      "clip_ratio/region_mean": 0.0013923059159424156,
      "completions/clipped_ratio": 0.0200892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3865.0,
      "completions/mean_length": 667.140625,
      "completions/mean_terminated_length": 596.8451538085938,
      "completions/min_length": 167.0,
      "completions/min_terminated_length": 167.0,
      "epoch": 1.643731778425656,
      "grad_norm": 0.14351120591163635,
      "learning_rate": 1e-06,
      "loss": 0.02,
      "num_tokens": 102606421.0,
      "reward": 0.5959821939468384,
      "reward_std": 0.22338497638702393,
      "rewards/verify_math_reward/mean": 0.5959821343421936,
      "rewards/verify_math_reward/std": 0.490975022315979,
      "step": 176
    },
    {
      "clip_ratio/high_max": 0.0014324550465971697,
      "clip_ratio/high_mean": 0.0005830946192872943,
      "clip_ratio/low_mean": 0.00042451115496078273,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010076057878904976,
      "completions/clipped_ratio": 0.025669642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1928.0,
      "completions/mean_length": 662.0335083007812,
      "completions/mean_terminated_length": 571.5624389648438,
      "completions/min_length": 147.0,
      "completions/min_terminated_length": 147.0,
      "epoch": 1.6530612244897958,
      "grad_norm": 0.1510154753923416,
      "learning_rate": 1e-06,
      "loss": -0.0063,
      "num_tokens": 103196011.0,
      "reward": 0.559151828289032,
      "reward_std": 0.15965604782104492,
      "rewards/verify_math_reward/mean": 0.5591517686843872,
      "rewards/verify_math_reward/std": 0.496766060590744,
      "step": 177
    },
    {
      "clip_ratio/high_max": 0.0016313164414896164,
      "clip_ratio/high_mean": 0.0006895035476190969,
      "clip_ratio/low_mean": 0.00042845612392738985,
      "clip_ratio/low_min": 5.960425278317416e-05,
      "clip_ratio/region_mean": 0.0011179596513102297,
      "completions/clipped_ratio": 0.03125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3988.0,
      "completions/mean_length": 668.107177734375,
      "completions/mean_terminated_length": 557.5299682617188,
      "completions/min_length": 131.0,
      "completions/min_terminated_length": 131.0,
      "epoch": 1.6623906705539357,
      "grad_norm": 0.13701440393924713,
      "learning_rate": 1e-06,
      "loss": 0.0011,
      "num_tokens": 103763403.0,
      "reward": 0.6272321939468384,
      "reward_std": 0.18814215064048767,
      "rewards/verify_math_reward/mean": 0.6272321343421936,
      "rewards/verify_math_reward/std": 0.4838111698627472,
      "step": 178
    },
    {
      "clip_ratio/high_max": 0.0016562871787755284,
      "clip_ratio/high_mean": 0.0006508016022053198,
      "clip_ratio/low_mean": 0.0005484965413415921,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011992981671937741,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4022.0,
      "completions/mean_length": 604.0223388671875,
      "completions/mean_terminated_length": 552.6115112304688,
      "completions/min_length": 120.0,
      "completions/min_terminated_length": 120.0,
      "epoch": 1.6717201166180757,
      "grad_norm": 0.13368184864521027,
      "learning_rate": 1e-06,
      "loss": 0.0106,
      "num_tokens": 104333543.0,
      "reward": 0.6037946939468384,
      "reward_std": 0.18080882728099823,
      "rewards/verify_math_reward/mean": 0.6037946343421936,
      "rewards/verify_math_reward/std": 0.48938122391700745,
      "step": 179
    },
    {
      "clip_ratio/high_max": 0.0015244390233419836,
      "clip_ratio/high_mean": 0.0006030370268490515,
      "clip_ratio/low_mean": 0.0004817474000446964,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010847844405361684,
      "completions/clipped_ratio": 0.024553571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3576.0,
      "completions/mean_length": 622.0826416015625,
      "completions/mean_terminated_length": 534.638427734375,
      "completions/min_length": 147.0,
      "completions/min_terminated_length": 147.0,
      "epoch": 1.6810495626822157,
      "grad_norm": 0.13400602340698242,
      "learning_rate": 1e-06,
      "loss": -0.0059,
      "num_tokens": 104887721.0,
      "reward": 0.5859375,
      "reward_std": 0.1712280660867691,
      "rewards/verify_math_reward/mean": 0.5859375,
      "rewards/verify_math_reward/std": 0.4928344786167145,
      "step": 180
    },
    {
      "clip_ratio/high_max": 0.0018641551541804802,
      "clip_ratio/high_mean": 0.0006986685366427992,
      "clip_ratio/low_mean": 0.0005265117324597668,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012251802727405448,
      "completions/clipped_ratio": 0.0234375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2528.0,
      "completions/mean_length": 634.6361694335938,
      "completions/mean_terminated_length": 551.5634155273438,
      "completions/min_length": 139.0,
      "completions/min_terminated_length": 139.0,
      "epoch": 1.6903790087463557,
      "grad_norm": 0.14882251620292664,
      "learning_rate": 1e-06,
      "loss": -0.0071,
      "num_tokens": 105453187.0,
      "reward": 0.6339285969734192,
      "reward_std": 0.17780296504497528,
      "rewards/verify_math_reward/mean": 0.6339285969734192,
      "rewards/verify_math_reward/std": 0.48199835419654846,
      "step": 181
    },
    {
      "clip_ratio/high_max": 0.0018991185024788138,
      "clip_ratio/high_mean": 0.0006759333355148556,
      "clip_ratio/low_mean": 0.0006143502687336877,
      "clip_ratio/low_min": 9.475439583184198e-06,
      "clip_ratio/region_mean": 0.0012902836278954055,
      "completions/clipped_ratio": 0.036830357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3800.0,
      "completions/mean_length": 758.9174194335938,
      "completions/mean_terminated_length": 631.3117065429688,
      "completions/min_length": 136.0,
      "completions/min_terminated_length": 136.0,
      "epoch": 1.6997084548104957,
      "grad_norm": 0.1366676241159439,
      "learning_rate": 1e-06,
      "loss": 0.0184,
      "num_tokens": 106102513.0,
      "reward": 0.512276828289032,
      "reward_std": 0.18652385473251343,
      "rewards/verify_math_reward/mean": 0.5122767686843872,
      "rewards/verify_math_reward/std": 0.500128448009491,
      "step": 182
    },
    {
      "clip_ratio/high_max": 0.0019408968910283875,
      "clip_ratio/high_mean": 0.0007383292740996694,
      "clip_ratio/low_mean": 0.0005783888327641762,
      "clip_ratio/low_min": 1.0807539183588233e-05,
      "clip_ratio/region_mean": 0.001316718124144245,
      "completions/clipped_ratio": 0.0200892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3244.0,
      "completions/mean_length": 581.9732666015625,
      "completions/mean_terminated_length": 509.9316711425781,
      "completions/min_length": 137.0,
      "completions/min_terminated_length": 137.0,
      "epoch": 1.7090379008746357,
      "grad_norm": 0.1475510448217392,
      "learning_rate": 1e-06,
      "loss": 0.0112,
      "num_tokens": 106646009.0,
      "reward": 0.6439732313156128,
      "reward_std": 0.19053933024406433,
      "rewards/verify_math_reward/mean": 0.6439732313156128,
      "rewards/verify_math_reward/std": 0.47909072041511536,
      "step": 183
    },
    {
      "clip_ratio/high_max": 0.002301610446011182,
      "clip_ratio/high_mean": 0.0007948559814394685,
      "clip_ratio/low_mean": 0.0005266468115223688,
      "clip_ratio/low_min": 1.1428049219830427e-05,
      "clip_ratio/region_mean": 0.0013215027829573955,
      "completions/clipped_ratio": 0.025669642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4092.0,
      "completions/mean_length": 694.8717041015625,
      "completions/mean_terminated_length": 605.2658081054688,
      "completions/min_length": 124.0,
      "completions/min_terminated_length": 124.0,
      "epoch": 1.7183673469387755,
      "grad_norm": 0.1307675987482071,
      "learning_rate": 1e-06,
      "loss": 0.0055,
      "num_tokens": 107267254.0,
      "reward": 0.5725446939468384,
      "reward_std": 0.17975644767284393,
      "rewards/verify_math_reward/mean": 0.5725446343421936,
      "rewards/verify_math_reward/std": 0.49498558044433594,
      "step": 184
    },
    {
      "clip_ratio/high_max": 0.0016229440116148908,
      "clip_ratio/high_mean": 0.0005987291697238106,
      "clip_ratio/low_mean": 0.0004522435929175117,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.00105097275445587,
      "completions/clipped_ratio": 0.03125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3853.0,
      "completions/mean_length": 699.0636596679688,
      "completions/mean_terminated_length": 589.4850463867188,
      "completions/min_length": 128.0,
      "completions/min_terminated_length": 128.0,
      "epoch": 1.7276967930029155,
      "grad_norm": 0.1379019021987915,
      "learning_rate": 1e-06,
      "loss": 0.0122,
      "num_tokens": 107864711.0,
      "reward": 0.5491071939468384,
      "reward_std": 0.17472361028194427,
      "rewards/verify_math_reward/mean": 0.5491071343421936,
      "rewards/verify_math_reward/std": 0.49786055088043213,
      "step": 185
    },
    {
      "clip_ratio/high_max": 0.0015986760408850387,
      "clip_ratio/high_mean": 0.0006530470236612018,
      "clip_ratio/low_mean": 0.0005655967963775765,
      "clip_ratio/low_min": 2.2465852453024127e-05,
      "clip_ratio/region_mean": 0.0012186437888885848,
      "completions/clipped_ratio": 0.0234375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3619.0,
      "completions/mean_length": 660.578125,
      "completions/mean_terminated_length": 578.1279907226562,
      "completions/min_length": 138.0,
      "completions/min_terminated_length": 138.0,
      "epoch": 1.7370262390670554,
      "grad_norm": 0.13076432049274445,
      "learning_rate": 1e-06,
      "loss": 0.0006,
      "num_tokens": 108462421.0,
      "reward": 0.5066964626312256,
      "reward_std": 0.19260655343532562,
      "rewards/verify_math_reward/mean": 0.5066964030265808,
      "rewards/verify_math_reward/std": 0.5002344250679016,
      "step": 186
    },
    {
      "clip_ratio/high_max": 0.001723245884932112,
      "clip_ratio/high_mean": 0.0006323671332211234,
      "clip_ratio/low_mean": 0.0005192197158976342,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011515868463902734,
      "completions/clipped_ratio": 0.021205357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2299.0,
      "completions/mean_length": 605.864990234375,
      "completions/mean_terminated_length": 530.251953125,
      "completions/min_length": 163.0,
      "completions/min_terminated_length": 163.0,
      "epoch": 1.7463556851311952,
      "grad_norm": 0.13046713173389435,
      "learning_rate": 1e-06,
      "loss": -0.012,
      "num_tokens": 109015196.0,
      "reward": 0.6540178656578064,
      "reward_std": 0.15146467089653015,
      "rewards/verify_math_reward/mean": 0.6540178656578064,
      "rewards/verify_math_reward/std": 0.4759531021118164,
      "step": 187
    },
    {
      "clip_ratio/high_max": 0.0014721749957971042,
      "clip_ratio/high_mean": 0.0006529896340907726,
      "clip_ratio/low_mean": 0.0005742745020143047,
      "clip_ratio/low_min": 2.578914791229181e-05,
      "clip_ratio/region_mean": 0.0012272641288291197,
      "completions/clipped_ratio": 0.030133928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2710.0,
      "completions/mean_length": 713.8125610351562,
      "completions/mean_terminated_length": 608.727294921875,
      "completions/min_length": 177.0,
      "completions/min_terminated_length": 177.0,
      "epoch": 1.7556851311953352,
      "grad_norm": 0.13606221973896027,
      "learning_rate": 1e-06,
      "loss": -0.0024,
      "num_tokens": 109635196.0,
      "reward": 0.5636160969734192,
      "reward_std": 0.18332546949386597,
      "rewards/verify_math_reward/mean": 0.5636160969734192,
      "rewards/verify_math_reward/std": 0.49621346592903137,
      "step": 188
    },
    {
      "clip_ratio/high_max": 0.0022245493601076305,
      "clip_ratio/high_mean": 0.0008970431299530901,
      "clip_ratio/low_mean": 0.0006182360411912668,
      "clip_ratio/low_min": 1.0991910130542237e-05,
      "clip_ratio/region_mean": 0.0015152791602304205,
      "completions/clipped_ratio": 0.0200892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3470.0,
      "completions/mean_length": 647.6417846679688,
      "completions/mean_terminated_length": 576.9464721679688,
      "completions/min_length": 146.0,
      "completions/min_terminated_length": 146.0,
      "epoch": 1.7650145772594752,
      "grad_norm": 0.15010227262973785,
      "learning_rate": 1e-06,
      "loss": 0.0162,
      "num_tokens": 110238443.0,
      "reward": 0.59375,
      "reward_std": 0.19418713450431824,
      "rewards/verify_math_reward/mean": 0.59375,
      "rewards/verify_math_reward/std": 0.4914066195487976,
      "step": 189
    },
    {
      "clip_ratio/high_max": 0.001977579278900521,
      "clip_ratio/high_mean": 0.0007247905523399822,
      "clip_ratio/low_mean": 0.0005474706013046671,
      "clip_ratio/low_min": 2.3674318981647957e-05,
      "clip_ratio/region_mean": 0.0012722611572826281,
      "completions/clipped_ratio": 0.0357142857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4022.0,
      "completions/mean_length": 741.0201416015625,
      "completions/mean_terminated_length": 616.7615966796875,
      "completions/min_length": 138.0,
      "completions/min_terminated_length": 138.0,
      "epoch": 1.7743440233236152,
      "grad_norm": 0.12722791731357574,
      "learning_rate": 1e-06,
      "loss": -0.0104,
      "num_tokens": 110856109.0,
      "reward": 0.5870535969734192,
      "reward_std": 0.19825245440006256,
      "rewards/verify_math_reward/mean": 0.5870535969734192,
      "rewards/verify_math_reward/std": 0.49263834953308105,
      "step": 190
    },
    {
      "clip_ratio/high_max": 0.0017347467874060385,
      "clip_ratio/high_mean": 0.0006408966291928664,
      "clip_ratio/low_mean": 0.000596689736994449,
      "clip_ratio/low_min": 4.890091440756805e-05,
      "clip_ratio/region_mean": 0.0012375863698252942,
      "completions/clipped_ratio": 0.0279017857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2890.0,
      "completions/mean_length": 638.685302734375,
      "completions/mean_terminated_length": 539.451171875,
      "completions/min_length": 140.0,
      "completions/min_terminated_length": 140.0,
      "epoch": 1.7836734693877552,
      "grad_norm": 0.13227508962154388,
      "learning_rate": 1e-06,
      "loss": -0.0021,
      "num_tokens": 111416323.0,
      "reward": 0.6272321939468384,
      "reward_std": 0.15597616136074066,
      "rewards/verify_math_reward/mean": 0.6272321343421936,
      "rewards/verify_math_reward/std": 0.4838111698627472,
      "step": 191
    },
    {
      "clip_ratio/high_max": 0.0022329735547828022,
      "clip_ratio/high_mean": 0.0008203545858123107,
      "clip_ratio/low_mean": 0.0005361180983527447,
      "clip_ratio/low_min": 1.538272226753179e-05,
      "clip_ratio/region_mean": 0.0013564726687036455,
      "completions/clipped_ratio": 0.029017857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2336.0,
      "completions/mean_length": 650.591552734375,
      "completions/mean_terminated_length": 547.6253051757812,
      "completions/min_length": 156.0,
      "completions/min_terminated_length": 156.0,
      "epoch": 1.7930029154518952,
      "grad_norm": 0.14059096574783325,
      "learning_rate": 1e-06,
      "loss": -0.0052,
      "num_tokens": 111976405.0,
      "reward": 0.5870535969734192,
      "reward_std": 0.1956181675195694,
      "rewards/verify_math_reward/mean": 0.5870535969734192,
      "rewards/verify_math_reward/std": 0.49263837933540344,
      "step": 192
    },
    {
      "clip_ratio/high_max": 0.0020801638493139762,
      "clip_ratio/high_mean": 0.0007264574742293917,
      "clip_ratio/low_mean": 0.0004943578360325773,
      "clip_ratio/low_min": 2.9367339266173076e-05,
      "clip_ratio/region_mean": 0.0012208152911625803,
      "completions/clipped_ratio": 0.0345982142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3845.0,
      "completions/mean_length": 735.6205444335938,
      "completions/mean_terminated_length": 615.1907348632812,
      "completions/min_length": 148.0,
      "completions/min_terminated_length": 148.0,
      "epoch": 1.802332361516035,
      "grad_norm": 0.12742440402507782,
      "learning_rate": 1e-06,
      "loss": -0.0054,
      "num_tokens": 112601041.0,
      "reward": 0.5412946939468384,
      "reward_std": 0.16578517854213715,
      "rewards/verify_math_reward/mean": 0.5412946343421936,
      "rewards/verify_math_reward/std": 0.49857014417648315,
      "step": 193
    },
    {
      "clip_ratio/high_max": 0.0020587184917530976,
      "clip_ratio/high_mean": 0.0008120808706735261,
      "clip_ratio/low_mean": 0.0005044134632044006,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0013164943629817571,
      "completions/clipped_ratio": 0.030133928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2822.0,
      "completions/mean_length": 635.114990234375,
      "completions/mean_terminated_length": 527.5845947265625,
      "completions/min_length": 149.0,
      "completions/min_terminated_length": 149.0,
      "epoch": 1.811661807580175,
      "grad_norm": 0.13608218729496002,
      "learning_rate": 1e-06,
      "loss": -0.0228,
      "num_tokens": 113144968.0,
      "reward": 0.613839328289032,
      "reward_std": 0.17926861345767975,
      "rewards/verify_math_reward/mean": 0.6138392686843872,
      "rewards/verify_math_reward/std": 0.48714008927345276,
      "step": 194
    },
    {
      "clip_ratio/high_max": 0.0019229853060096502,
      "clip_ratio/high_mean": 0.0007687071210966678,
      "clip_ratio/low_mean": 0.0005006855881219963,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001269392749236431,
      "completions/clipped_ratio": 0.029017857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3543.0,
      "completions/mean_length": 658.7176513671875,
      "completions/mean_terminated_length": 555.9942626953125,
      "completions/min_length": 158.0,
      "completions/min_terminated_length": 158.0,
      "epoch": 1.820991253644315,
      "grad_norm": 0.1397802084684372,
      "learning_rate": 1e-06,
      "loss": -0.0028,
      "num_tokens": 113713067.0,
      "reward": 0.5848214626312256,
      "reward_std": 0.1881396323442459,
      "rewards/verify_math_reward/mean": 0.5848214030265808,
      "rewards/verify_math_reward/std": 0.49302801489830017,
      "step": 195
    },
    {
      "clip_ratio/high_max": 0.0014796245559409726,
      "clip_ratio/high_mean": 0.0005814004280182417,
      "clip_ratio/low_mean": 0.0004082226628270291,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009896231149468804,
      "completions/clipped_ratio": 0.0323660714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3957.0,
      "completions/mean_length": 698.755615234375,
      "completions/mean_terminated_length": 585.1222534179688,
      "completions/min_length": 115.0,
      "completions/min_terminated_length": 115.0,
      "epoch": 1.8303206997084547,
      "grad_norm": 0.11784183979034424,
      "learning_rate": 1e-06,
      "loss": -0.0068,
      "num_tokens": 114303240.0,
      "reward": 0.637276828289032,
      "reward_std": 0.14492550492286682,
      "rewards/verify_math_reward/mean": 0.6372767686843872,
      "rewards/verify_math_reward/std": 0.481054425239563,
      "step": 196
    },
    {
      "clip_ratio/high_max": 0.001966052717762068,
      "clip_ratio/high_mean": 0.0006731036294240766,
      "clip_ratio/low_mean": 0.0005578692271228647,
      "clip_ratio/low_min": 1.4256386748456862e-05,
      "clip_ratio/region_mean": 0.001230972873599967,
      "completions/clipped_ratio": 0.030133928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2341.0,
      "completions/mean_length": 730.2377319335938,
      "completions/mean_terminated_length": 625.662841796875,
      "completions/min_length": 181.0,
      "completions/min_terminated_length": 181.0,
      "epoch": 1.8396501457725947,
      "grad_norm": 0.14265641570091248,
      "learning_rate": 1e-06,
      "loss": 0.0031,
      "num_tokens": 114947749.0,
      "reward": 0.4654017984867096,
      "reward_std": 0.2022992968559265,
      "rewards/verify_math_reward/mean": 0.4654017984867096,
      "rewards/verify_math_reward/std": 0.4990801215171814,
      "step": 197
    },
    {
      "clip_ratio/high_max": 0.0017364472514600493,
      "clip_ratio/high_mean": 0.0004902536475128727,
      "clip_ratio/low_mean": 0.0005718366019209498,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010620902430673596,
      "completions/clipped_ratio": 0.03125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3200.0,
      "completions/mean_length": 669.8772583007812,
      "completions/mean_terminated_length": 559.3571166992188,
      "completions/min_length": 144.0,
      "completions/min_terminated_length": 144.0,
      "epoch": 1.8489795918367347,
      "grad_norm": 0.1364136040210724,
      "learning_rate": 1e-06,
      "loss": -0.0024,
      "num_tokens": 115528807.0,
      "reward": 0.5089285969734192,
      "reward_std": 0.15800705552101135,
      "rewards/verify_math_reward/mean": 0.5089285969734192,
      "rewards/verify_math_reward/std": 0.5001994967460632,
      "step": 198
    },
    {
      "clip_ratio/high_max": 0.002096293719660025,
      "clip_ratio/high_mean": 0.0007575220151920803,
      "clip_ratio/low_mean": 0.0005292972218740033,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001286819257074967,
      "completions/clipped_ratio": 0.0323660714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3367.0,
      "completions/mean_length": 654.5960083007812,
      "completions/mean_terminated_length": 539.4855346679688,
      "completions/min_length": 142.0,
      "completions/min_terminated_length": 142.0,
      "epoch": 1.8583090379008746,
      "grad_norm": 0.14795687794685364,
      "learning_rate": 1e-06,
      "loss": -0.0018,
      "num_tokens": 116077109.0,
      "reward": 0.590401828289032,
      "reward_std": 0.17405030131340027,
      "rewards/verify_math_reward/mean": 0.5904017686843872,
      "rewards/verify_math_reward/std": 0.49203425645828247,
      "step": 199
    },
    {
      "clip_ratio/high_max": 0.0022372160965460353,
      "clip_ratio/high_mean": 0.0008441779791610315,
      "clip_ratio/low_mean": 0.00047425334059880697,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0013184313211240806,
      "completions/clipped_ratio": 0.030133928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3538.0,
      "completions/mean_length": 693.1585083007812,
      "completions/mean_terminated_length": 587.4315185546875,
      "completions/min_length": 96.0,
      "completions/min_terminated_length": 96.0,
      "epoch": 1.8676384839650146,
      "grad_norm": 0.1414903700351715,
      "learning_rate": 1e-06,
      "loss": -0.0079,
      "num_tokens": 116678979.0,
      "reward": 0.645089328289032,
      "reward_std": 0.18137337267398834,
      "rewards/verify_math_reward/mean": 0.6450892686843872,
      "rewards/verify_math_reward/std": 0.4787535071372986,
      "step": 200
    },
    {
      "clip_ratio/high_max": 0.002172154694562778,
      "clip_ratio/high_mean": 0.0008787740134721389,
      "clip_ratio/low_mean": 0.0006796053594371188,
      "clip_ratio/low_min": 6.416153973987093e-05,
      "clip_ratio/region_mean": 0.001558379353809869,
      "completions/clipped_ratio": 0.0200892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2580.0,
      "completions/mean_length": 621.599365234375,
      "completions/mean_terminated_length": 550.3701782226562,
      "completions/min_length": 131.0,
      "completions/min_terminated_length": 131.0,
      "epoch": 1.8769679300291546,
      "grad_norm": 0.15065741539001465,
      "learning_rate": 1e-06,
      "loss": 0.0035,
      "num_tokens": 117242820.0,
      "reward": 0.590401828289032,
      "reward_std": 0.22236761450767517,
      "rewards/verify_math_reward/mean": 0.5904017686843872,
      "rewards/verify_math_reward/std": 0.49203425645828247,
      "step": 201
    },
    {
      "clip_ratio/high_max": 0.001663742968958104,
      "clip_ratio/high_mean": 0.0007463917900167871,
      "clip_ratio/low_mean": 0.0005143352955201408,
      "clip_ratio/low_min": 1.0354539881518576e-05,
      "clip_ratio/region_mean": 0.0012607270437001716,
      "completions/clipped_ratio": 0.033482142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3838.0,
      "completions/mean_length": 722.099365234375,
      "completions/mean_terminated_length": 605.2205200195312,
      "completions/min_length": 132.0,
      "completions/min_terminated_length": 132.0,
      "epoch": 1.8862973760932946,
      "grad_norm": 0.1430848240852356,
      "learning_rate": 1e-06,
      "loss": -0.0065,
      "num_tokens": 117847021.0,
      "reward": 0.5837053656578064,
      "reward_std": 0.19399844110012054,
      "rewards/verify_math_reward/mean": 0.5837053656578064,
      "rewards/verify_math_reward/std": 0.49321895837783813,
      "step": 202
    },
    {
      "clip_ratio/high_max": 0.002206108252721606,
      "clip_ratio/high_mean": 0.0007869080991440569,
      "clip_ratio/low_mean": 0.00048120848441612907,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012681166044785641,
      "completions/clipped_ratio": 0.030133928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3943.0,
      "completions/mean_length": 683.8627319335938,
      "completions/mean_terminated_length": 577.8469848632812,
      "completions/min_length": 116.0,
      "completions/min_terminated_length": 116.0,
      "epoch": 1.8956268221574344,
      "grad_norm": 0.14681637287139893,
      "learning_rate": 1e-06,
      "loss": -0.0017,
      "num_tokens": 118440962.0,
      "reward": 0.543526828289032,
      "reward_std": 0.18483206629753113,
      "rewards/verify_math_reward/mean": 0.5435267686843872,
      "rewards/verify_math_reward/std": 0.49838000535964966,
      "step": 203
    },
    {
      "clip_ratio/high_max": 0.002019617433688836,
      "clip_ratio/high_mean": 0.0007471297667507315,
      "clip_ratio/low_mean": 0.0005144346114320797,
      "clip_ratio/low_min": 1.2245297511981335e-05,
      "clip_ratio/region_mean": 0.0012615643536264542,
      "completions/clipped_ratio": 0.0345982142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3194.0,
      "completions/mean_length": 711.247802734375,
      "completions/mean_terminated_length": 589.9445190429688,
      "completions/min_length": 154.0,
      "completions/min_terminated_length": 154.0,
      "epoch": 1.9049562682215744,
      "grad_norm": 0.15287570655345917,
      "learning_rate": 1e-06,
      "loss": -0.0109,
      "num_tokens": 119048216.0,
      "reward": 0.5758928656578064,
      "reward_std": 0.19024580717086792,
      "rewards/verify_math_reward/mean": 0.5758928656578064,
      "rewards/verify_math_reward/std": 0.49448272585868835,
      "step": 204
    },
    {
      "clip_ratio/high_max": 0.0020018596915178932,
      "clip_ratio/high_mean": 0.000648028108116705,
      "clip_ratio/low_mean": 0.0004865585397055838,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001134586625994416,
      "completions/clipped_ratio": 0.033482142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1974.0,
      "completions/mean_length": 692.755615234375,
      "completions/mean_terminated_length": 574.8602905273438,
      "completions/min_length": 127.0,
      "completions/min_terminated_length": 127.0,
      "epoch": 1.9142857142857141,
      "grad_norm": 0.13454777002334595,
      "learning_rate": 1e-06,
      "loss": 0.0072,
      "num_tokens": 119631861.0,
      "reward": 0.5412946939468384,
      "reward_std": 0.16961295902729034,
      "rewards/verify_math_reward/mean": 0.5412946343421936,
      "rewards/verify_math_reward/std": 0.49857014417648315,
      "step": 205
    },
    {
      "clip_ratio/high_max": 0.0017946812877198681,
      "clip_ratio/high_mean": 0.0006605059361390886,
      "clip_ratio/low_mean": 0.0004883128474375553,
      "clip_ratio/low_min": 1.0580666639725678e-05,
      "clip_ratio/region_mean": 0.0011488187810755335,
      "completions/clipped_ratio": 0.029017857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2389.0,
      "completions/mean_length": 734.0592041015625,
      "completions/mean_terminated_length": 633.5873413085938,
      "completions/min_length": 172.0,
      "completions/min_terminated_length": 172.0,
      "epoch": 1.9236151603498541,
      "grad_norm": 0.12182007730007172,
      "learning_rate": 1e-06,
      "loss": -0.0096,
      "num_tokens": 120262706.0,
      "reward": 0.5725446939468384,
      "reward_std": 0.17675015330314636,
      "rewards/verify_math_reward/mean": 0.5725446343421936,
      "rewards/verify_math_reward/std": 0.49498558044433594,
      "step": 206
    },
    {
      "clip_ratio/high_max": 0.0017613017371331807,
      "clip_ratio/high_mean": 0.000660313854496053,
      "clip_ratio/low_mean": 0.00043818357789859874,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010984974178427365,
      "completions/clipped_ratio": 0.025669642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4049.0,
      "completions/mean_length": 678.669677734375,
      "completions/mean_terminated_length": 588.6369018554688,
      "completions/min_length": 118.0,
      "completions/min_terminated_length": 118.0,
      "epoch": 1.9329446064139941,
      "grad_norm": 0.14148029685020447,
      "learning_rate": 1e-06,
      "loss": -0.0001,
      "num_tokens": 120870034.0,
      "reward": 0.6149553656578064,
      "reward_std": 0.15826597809791565,
      "rewards/verify_math_reward/mean": 0.6149553656578064,
      "rewards/verify_math_reward/std": 0.4868776500225067,
      "step": 207
    },
    {
      "clip_ratio/high_max": 0.0016052682476583868,
      "clip_ratio/high_mean": 0.0007449024196830578,
      "clip_ratio/low_mean": 0.0006256066963032936,
      "clip_ratio/low_min": 6.049065268598497e-05,
      "clip_ratio/region_mean": 0.0013705091005249415,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3599.0,
      "completions/mean_length": 695.5413208007812,
      "completions/mean_terminated_length": 633.7147827148438,
      "completions/min_length": 179.0,
      "completions/min_terminated_length": 179.0,
      "epoch": 1.9422740524781341,
      "grad_norm": 0.13558965921401978,
      "learning_rate": 1e-06,
      "loss": 0.0108,
      "num_tokens": 121518695.0,
      "reward": 0.5558035969734192,
      "reward_std": 0.2175946682691574,
      "rewards/verify_math_reward/mean": 0.5558035969734192,
      "rewards/verify_math_reward/std": 0.49715369939804077,
      "step": 208
    },
    {
      "clip_ratio/high_max": 0.001782178060238948,
      "clip_ratio/high_mean": 0.0006993906808929751,
      "clip_ratio/low_mean": 0.0005009642336517572,
      "clip_ratio/low_min": 3.2224801543634385e-05,
      "clip_ratio/region_mean": 0.0012003549163637217,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1833.0,
      "completions/mean_length": 579.3560791015625,
      "completions/mean_terminated_length": 515.4170532226562,
      "completions/min_length": 133.0,
      "completions/min_terminated_length": 133.0,
      "epoch": 1.951603498542274,
      "grad_norm": 0.14529386162757874,
      "learning_rate": 1e-06,
      "loss": 0.0072,
      "num_tokens": 122057486.0,
      "reward": 0.6863839626312256,
      "reward_std": 0.1657840609550476,
      "rewards/verify_math_reward/mean": 0.6863839030265808,
      "rewards/verify_math_reward/std": 0.46422144770622253,
      "step": 209
    },
    {
      "clip_ratio/high_max": 0.0017631303999223746,
      "clip_ratio/high_mean": 0.0007128642464522272,
      "clip_ratio/low_mean": 0.0004985708683307166,
      "clip_ratio/low_min": 1.0222440323559567e-05,
      "clip_ratio/region_mean": 0.0012114351120544598,
      "completions/clipped_ratio": 0.0279017857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3927.0,
      "completions/mean_length": 645.0803833007812,
      "completions/mean_terminated_length": 546.0298461914062,
      "completions/min_length": 129.0,
      "completions/min_terminated_length": 129.0,
      "epoch": 1.960932944606414,
      "grad_norm": 0.15338926017284393,
      "learning_rate": 1e-06,
      "loss": 0.0038,
      "num_tokens": 122615702.0,
      "reward": 0.6506696939468384,
      "reward_std": 0.17303253710269928,
      "rewards/verify_math_reward/mean": 0.6506696343421936,
      "rewards/verify_math_reward/std": 0.47702476382255554,
      "step": 210
    },
    {
      "clip_ratio/high_max": 0.0017781652713892981,
      "clip_ratio/high_mean": 0.0007072143907862483,
      "clip_ratio/low_mean": 0.00048664995665603783,
      "clip_ratio/low_min": 2.6575239644444082e-05,
      "clip_ratio/region_mean": 0.0011938643765461165,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1985.0,
      "completions/mean_length": 607.3850708007812,
      "completions/mean_terminated_length": 560.0283203125,
      "completions/min_length": 163.0,
      "completions/min_terminated_length": 163.0,
      "epoch": 1.970262390670554,
      "grad_norm": 0.1385481059551239,
      "learning_rate": 1e-06,
      "loss": 0.0008,
      "num_tokens": 123200687.0,
      "reward": 0.6227678656578064,
      "reward_std": 0.1695060133934021,
      "rewards/verify_math_reward/mean": 0.6227678656578064,
      "rewards/verify_math_reward/std": 0.4849644899368286,
      "step": 211
    },
    {
      "clip_ratio/high_max": 0.002027250469836872,
      "clip_ratio/high_mean": 0.0007511444637202658,
      "clip_ratio/low_mean": 0.0005570881203311728,
      "clip_ratio/low_min": 1.2005378266621847e-05,
      "clip_ratio/region_mean": 0.001308232585870428,
      "completions/clipped_ratio": 0.024553571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2055.0,
      "completions/mean_length": 612.4085083007812,
      "completions/mean_terminated_length": 524.7208251953125,
      "completions/min_length": 180.0,
      "completions/min_terminated_length": 180.0,
      "epoch": 1.9795918367346939,
      "grad_norm": 0.14795175194740295,
      "learning_rate": 1e-06,
      "loss": -0.0012,
      "num_tokens": 123743829.0,
      "reward": 0.6283482313156128,
      "reward_std": 0.17059119045734406,
      "rewards/verify_math_reward/mean": 0.6283482313156128,
      "rewards/verify_math_reward/std": 0.4835159480571747,
      "step": 212
    },
    {
      "clip_ratio/high_max": 0.001795672404114157,
      "clip_ratio/high_mean": 0.0006457516119553475,
      "clip_ratio/low_mean": 0.0005550283021875657,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012007799123239238,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2018.0,
      "completions/mean_length": 581.1897583007812,
      "completions/mean_terminated_length": 525.3991088867188,
      "completions/min_length": 147.0,
      "completions/min_terminated_length": 147.0,
      "epoch": 1.9889212827988338,
      "grad_norm": 0.15848539769649506,
      "learning_rate": 1e-06,
      "loss": 0.0096,
      "num_tokens": 124299983.0,
      "reward": 0.6071428656578064,
      "reward_std": 0.16484861075878143,
      "rewards/verify_math_reward/mean": 0.6071428656578064,
      "rewards/verify_math_reward/std": 0.48865827918052673,
      "step": 213
    },
    {
      "clip_ratio/high_max": 0.0015777293083374389,
      "clip_ratio/high_mean": 0.000572459652175894,
      "clip_ratio/low_mean": 0.0003918970678569167,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009643567136663478,
      "completions/clipped_ratio": 0.02840909090909094,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3519.0,
      "completions/mean_length": 690.633544921875,
      "completions/mean_terminated_length": 591.0614013671875,
      "completions/min_length": 161.0,
      "completions/min_terminated_length": 161.0,
      "epoch": 1.9982507288629736,
      "grad_norm": 0.1399824172258377,
      "learning_rate": 1e-06,
      "loss": -0.0084,
      "num_tokens": 124888491.0,
      "reward": 0.613839328289032,
      "reward_std": 0.16029614210128784,
      "rewards/verify_math_reward/mean": 0.6138392686843872,
      "rewards/verify_math_reward/std": 0.48714008927345276,
      "step": 214
    },
    {
      "clip_ratio/high_max": 0.0019608186412369832,
      "clip_ratio/high_mean": 0.0006975977848924231,
      "clip_ratio/low_mean": 0.0006009238823025953,
      "clip_ratio/low_min": 1.3174536434235051e-05,
      "clip_ratio/region_mean": 0.0012985216344532091,
      "completions/clipped_ratio": 0.021205357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3672.0,
      "completions/mean_length": 636.1317138671875,
      "completions/mean_terminated_length": 561.1744384765625,
      "completions/min_length": 83.0,
      "completions/min_terminated_length": 83.0,
      "epoch": 2.00932944606414,
      "grad_norm": 0.14438770711421967,
      "learning_rate": 1e-06,
      "loss": -0.0137,
      "num_tokens": 125469129.0,
      "reward": 0.6339285969734192,
      "reward_std": 0.17990799248218536,
      "rewards/verify_math_reward/mean": 0.6339285969734192,
      "rewards/verify_math_reward/std": 0.48199835419654846,
      "step": 215
    },
    {
      "clip_ratio/high_max": 0.001747039375914028,
      "clip_ratio/high_mean": 0.0007047757480904693,
      "clip_ratio/low_mean": 0.00044305246865405934,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001147828215835034,
      "completions/clipped_ratio": 0.03125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3625.0,
      "completions/mean_length": 692.1038208007812,
      "completions/mean_terminated_length": 582.3007202148438,
      "completions/min_length": 110.0,
      "completions/min_terminated_length": 110.0,
      "epoch": 2.01865889212828,
      "grad_norm": 0.13369223475456238,
      "learning_rate": 1e-06,
      "loss": -0.0044,
      "num_tokens": 126064246.0,
      "reward": 0.5970982313156128,
      "reward_std": 0.15488240122795105,
      "rewards/verify_math_reward/mean": 0.5970982313156128,
      "rewards/verify_math_reward/std": 0.4907552897930145,
      "step": 216
    },
    {
      "clip_ratio/high_max": 0.0019376089476281777,
      "clip_ratio/high_mean": 0.0007361176521953894,
      "clip_ratio/low_mean": 0.0005074308755865786,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012435485332389362,
      "completions/clipped_ratio": 0.029017857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2339.0,
      "completions/mean_length": 646.5881958007812,
      "completions/mean_terminated_length": 543.5023193359375,
      "completions/min_length": 132.0,
      "completions/min_terminated_length": 132.0,
      "epoch": 2.02798833819242,
      "grad_norm": 0.15771497786045074,
      "learning_rate": 1e-06,
      "loss": -0.0062,
      "num_tokens": 126630789.0,
      "reward": 0.5524553656578064,
      "reward_std": 0.16668446362018585,
      "rewards/verify_math_reward/mean": 0.5524553656578064,
      "rewards/verify_math_reward/std": 0.49751853942871094,
      "step": 217
    },
    {
      "clip_ratio/high_max": 0.0020025025005452335,
      "clip_ratio/high_mean": 0.0007051961019897135,
      "clip_ratio/low_mean": 0.0005879925811314024,
      "clip_ratio/low_min": 2.1136287614353932e-05,
      "clip_ratio/region_mean": 0.0012931887067679781,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3835.0,
      "completions/mean_length": 693.0111694335938,
      "completions/mean_terminated_length": 627.19677734375,
      "completions/min_length": 150.0,
      "completions/min_terminated_length": 150.0,
      "epoch": 2.03731778425656,
      "grad_norm": 0.1345938891172409,
      "learning_rate": 1e-06,
      "loss": -0.0044,
      "num_tokens": 127273407.0,
      "reward": 0.5714285969734192,
      "reward_std": 0.19032247364521027,
      "rewards/verify_math_reward/mean": 0.5714285969734192,
      "rewards/verify_math_reward/std": 0.49514803290367126,
      "step": 218
    },
    {
      "clip_ratio/high_max": 0.002085612293740269,
      "clip_ratio/high_mean": 0.0007509668284910731,
      "clip_ratio/low_mean": 0.0005286602381602279,
      "clip_ratio/low_min": 2.0826391846640036e-05,
      "clip_ratio/region_mean": 0.0012796270530088805,
      "completions/clipped_ratio": 0.0234375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4026.0,
      "completions/mean_length": 596.810302734375,
      "completions/mean_terminated_length": 512.8297119140625,
      "completions/min_length": 143.0,
      "completions/min_terminated_length": 143.0,
      "epoch": 2.0466472303206995,
      "grad_norm": 0.16107362508773804,
      "learning_rate": 1e-06,
      "loss": 0.0131,
      "num_tokens": 127809477.0,
      "reward": 0.6852678656578064,
      "reward_std": 0.16612105071544647,
      "rewards/verify_math_reward/mean": 0.6852678656578064,
      "rewards/verify_math_reward/std": 0.46466848254203796,
      "step": 219
    },
    {
      "clip_ratio/high_max": 0.0017594212222320493,
      "clip_ratio/high_mean": 0.0007006275009189267,
      "clip_ratio/low_mean": 0.0006378696471074363,
      "clip_ratio/low_min": 9.72308680502465e-06,
      "clip_ratio/region_mean": 0.0013384971534833312,
      "completions/clipped_ratio": 0.0323660714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2498.0,
      "completions/mean_length": 711.802490234375,
      "completions/mean_terminated_length": 598.6055297851562,
      "completions/min_length": 164.0,
      "completions/min_terminated_length": 164.0,
      "epoch": 2.0559766763848395,
      "grad_norm": 0.15395912528038025,
      "learning_rate": 1e-06,
      "loss": -0.021,
      "num_tokens": 128423004.0,
      "reward": 0.5647321939468384,
      "reward_std": 0.18844525516033173,
      "rewards/verify_math_reward/mean": 0.5647321343421936,
      "rewards/verify_math_reward/std": 0.49606895446777344,
      "step": 220
    },
    {
      "clip_ratio/high_max": 0.0016991322499961825,
      "clip_ratio/high_mean": 0.0006614746425839257,
      "clip_ratio/low_mean": 0.00036323781841929303,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010247124973830068,
      "completions/clipped_ratio": 0.025669642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1999.0,
      "completions/mean_length": 650.1261596679688,
      "completions/mean_terminated_length": 559.3413696289062,
      "completions/min_length": 128.0,
      "completions/min_terminated_length": 128.0,
      "epoch": 2.0653061224489795,
      "grad_norm": 0.1293102353811264,
      "learning_rate": 1e-06,
      "loss": 0.006,
      "num_tokens": 128999221.0,
      "reward": 0.6104910969734192,
      "reward_std": 0.1566508710384369,
      "rewards/verify_math_reward/mean": 0.6104910969734192,
      "rewards/verify_math_reward/std": 0.48791125416755676,
      "step": 221
    },
    {
      "clip_ratio/high_max": 0.001932140989083564,
      "clip_ratio/high_mean": 0.0006858597153041046,
      "clip_ratio/low_mean": 0.0005068721893621841,
      "clip_ratio/low_min": 1.9853874619002454e-05,
      "clip_ratio/region_mean": 0.0011927319246751722,
      "completions/clipped_ratio": 0.030133928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3364.0,
      "completions/mean_length": 701.7120971679688,
      "completions/mean_terminated_length": 596.2508544921875,
      "completions/min_length": 123.0,
      "completions/min_terminated_length": 123.0,
      "epoch": 2.0746355685131195,
      "grad_norm": 0.14621222019195557,
      "learning_rate": 1e-06,
      "loss": -0.0102,
      "num_tokens": 129609011.0,
      "reward": 0.6071428656578064,
      "reward_std": 0.1595044881105423,
      "rewards/verify_math_reward/mean": 0.6071428656578064,
      "rewards/verify_math_reward/std": 0.48865827918052673,
      "step": 222
    },
    {
      "clip_ratio/high_max": 0.0016444951834273525,
      "clip_ratio/high_mean": 0.0005902672855881974,
      "clip_ratio/low_mean": 0.0004461511125555262,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010364184017817024,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3674.0,
      "completions/mean_length": 612.3660888671875,
      "completions/mean_terminated_length": 532.8310546875,
      "completions/min_length": 109.0,
      "completions/min_terminated_length": 109.0,
      "epoch": 2.0839650145772595,
      "grad_norm": 0.12587220966815948,
      "learning_rate": 1e-06,
      "loss": -0.0072,
      "num_tokens": 130176891.0,
      "reward": 0.6852678656578064,
      "reward_std": 0.13034509122371674,
      "rewards/verify_math_reward/mean": 0.6852678656578064,
      "rewards/verify_math_reward/std": 0.46466848254203796,
      "step": 223
    },
    {
      "clip_ratio/high_max": 0.001594408957316773,
      "clip_ratio/high_mean": 0.0005833374389112578,
      "clip_ratio/low_mean": 0.000571088754441007,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011544262270035688,
      "completions/clipped_ratio": 0.036830357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3728.0,
      "completions/mean_length": 706.3717041015625,
      "completions/mean_terminated_length": 576.7566528320312,
      "completions/min_length": 171.0,
      "completions/min_terminated_length": 171.0,
      "epoch": 2.0932944606413995,
      "grad_norm": 0.13816197216510773,
      "learning_rate": 1e-06,
      "loss": 0.0006,
      "num_tokens": 130760336.0,
      "reward": 0.625,
      "reward_std": 0.16093555092811584,
      "rewards/verify_math_reward/mean": 0.625,
      "rewards/verify_math_reward/std": 0.48439329862594604,
      "step": 224
    },
    {
      "clip_ratio/high_max": 0.0017887444228108507,
      "clip_ratio/high_mean": 0.0007015915643933113,
      "clip_ratio/low_mean": 0.0005311268314471818,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012327184194873553,
      "completions/clipped_ratio": 0.03125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3030.0,
      "completions/mean_length": 711.6328735351562,
      "completions/mean_terminated_length": 602.4596557617188,
      "completions/min_length": 155.0,
      "completions/min_terminated_length": 155.0,
      "epoch": 2.1026239067055394,
      "grad_norm": 0.13910657167434692,
      "learning_rate": 1e-06,
      "loss": -0.0021,
      "num_tokens": 131381679.0,
      "reward": 0.5323660969734192,
      "reward_std": 0.17585085332393646,
      "rewards/verify_math_reward/mean": 0.5323660969734192,
      "rewards/verify_math_reward/std": 0.4992299973964691,
      "step": 225
    },
    {
      "clip_ratio/high_max": 0.001888532075099647,
      "clip_ratio/high_mean": 0.0008734535094845342,
      "clip_ratio/low_mean": 0.0006097136374592083,
      "clip_ratio/low_min": 2.069764923362527e-05,
      "clip_ratio/region_mean": 0.0014831671505817212,
      "completions/clipped_ratio": 0.030133928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3364.0,
      "completions/mean_length": 716.9085083007812,
      "completions/mean_terminated_length": 611.91943359375,
      "completions/min_length": 124.0,
      "completions/min_terminated_length": 124.0,
      "epoch": 2.1119533527696794,
      "grad_norm": 0.14524471759796143,
      "learning_rate": 1e-06,
      "loss": 0.0039,
      "num_tokens": 132005285.0,
      "reward": 0.6383928656578064,
      "reward_std": 0.20140846073627472,
      "rewards/verify_math_reward/mean": 0.6383928656578064,
      "rewards/verify_math_reward/std": 0.4807341992855072,
      "step": 226
    },
    {
      "clip_ratio/high_max": 0.0017761674280336592,
      "clip_ratio/high_mean": 0.0006098909088905202,
      "clip_ratio/low_mean": 0.0006259718484216137,
      "clip_ratio/low_min": 1.4344732335302979e-05,
      "clip_ratio/region_mean": 0.001235862753674155,
      "completions/clipped_ratio": 0.0234375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3936.0,
      "completions/mean_length": 646.9252319335938,
      "completions/mean_terminated_length": 564.1473999023438,
      "completions/min_length": 150.0,
      "completions/min_terminated_length": 150.0,
      "epoch": 2.1212827988338194,
      "grad_norm": 0.1339639276266098,
      "learning_rate": 1e-06,
      "loss": 0.0029,
      "num_tokens": 132581666.0,
      "reward": 0.6462053656578064,
      "reward_std": 0.16766269505023956,
      "rewards/verify_math_reward/mean": 0.6462053656578064,
      "rewards/verify_math_reward/std": 0.478413462638855,
      "step": 227
    },
    {
      "clip_ratio/high_max": 0.001584508914675098,
      "clip_ratio/high_mean": 0.0007411727146973135,
      "clip_ratio/low_mean": 0.0006377400495694019,
      "clip_ratio/low_min": 1.3861166735296138e-05,
      "clip_ratio/region_mean": 0.0013789127515337896,
      "completions/clipped_ratio": 0.036830357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3111.0,
      "completions/mean_length": 714.2589721679688,
      "completions/mean_terminated_length": 584.9454956054688,
      "completions/min_length": 154.0,
      "completions/min_terminated_length": 154.0,
      "epoch": 2.130612244897959,
      "grad_norm": 0.16043105721473694,
      "learning_rate": 1e-06,
      "loss": 0.0152,
      "num_tokens": 133174882.0,
      "reward": 0.598214328289032,
      "reward_std": 0.20316554605960846,
      "rewards/verify_math_reward/mean": 0.5982142686843872,
      "rewards/verify_math_reward/std": 0.49053287506103516,
      "step": 228
    },
    {
      "clip_ratio/high_max": 0.00202133289349149,
      "clip_ratio/high_mean": 0.0007314009126275778,
      "clip_ratio/low_mean": 0.000528357786606648,
      "clip_ratio/low_min": 2.1324311092030257e-05,
      "clip_ratio/region_mean": 0.0012597587556228973,
      "completions/clipped_ratio": 0.030133928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3621.0,
      "completions/mean_length": 733.8069458007812,
      "completions/mean_terminated_length": 629.3429565429688,
      "completions/min_length": 187.0,
      "completions/min_terminated_length": 187.0,
      "epoch": 2.139941690962099,
      "grad_norm": 0.1381755769252777,
      "learning_rate": 1e-06,
      "loss": -0.0069,
      "num_tokens": 133816717.0,
      "reward": 0.5714285969734192,
      "reward_std": 0.18257686495780945,
      "rewards/verify_math_reward/mean": 0.5714285969734192,
      "rewards/verify_math_reward/std": 0.49514803290367126,
      "step": 229
    },
    {
      "clip_ratio/high_max": 0.0017229677687282674,
      "clip_ratio/high_mean": 0.000659604680549819,
      "clip_ratio/low_mean": 0.0007401865786960116,
      "clip_ratio/low_min": 8.758067815506365e-05,
      "clip_ratio/region_mean": 0.0013997912683407776,
      "completions/clipped_ratio": 0.0267857142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3955.0,
      "completions/mean_length": 637.4967041015625,
      "completions/mean_terminated_length": 542.3084716796875,
      "completions/min_length": 89.0,
      "completions/min_terminated_length": 89.0,
      "epoch": 2.149271137026239,
      "grad_norm": 0.1531965136528015,
      "learning_rate": 1e-06,
      "loss": -0.0032,
      "num_tokens": 134375202.0,
      "reward": 0.613839328289032,
      "reward_std": 0.2004224956035614,
      "rewards/verify_math_reward/mean": 0.6138392686843872,
      "rewards/verify_math_reward/std": 0.48714008927345276,
      "step": 230
    },
    {
      "clip_ratio/high_max": 0.001587845054018544,
      "clip_ratio/high_mean": 0.0006205899171618512,
      "clip_ratio/low_mean": 0.00047607856777176494,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010966684676532168,
      "completions/clipped_ratio": 0.03125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3870.0,
      "completions/mean_length": 658.1551513671875,
      "completions/mean_terminated_length": 547.2568969726562,
      "completions/min_length": 128.0,
      "completions/min_terminated_length": 128.0,
      "epoch": 2.158600583090379,
      "grad_norm": 0.2171899527311325,
      "learning_rate": 1e-06,
      "loss": 0.0093,
      "num_tokens": 134939957.0,
      "reward": 0.590401828289032,
      "reward_std": 0.1589820384979248,
      "rewards/verify_math_reward/mean": 0.5904017686843872,
      "rewards/verify_math_reward/std": 0.49203425645828247,
      "step": 231
    },
    {
      "clip_ratio/high_max": 0.0015843191467865836,
      "clip_ratio/high_mean": 0.000552942197828088,
      "clip_ratio/low_mean": 0.0005816124626107921,
      "clip_ratio/low_min": 7.835025826352648e-06,
      "clip_ratio/region_mean": 0.0011345546663505957,
      "completions/clipped_ratio": 0.0379464285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3939.0,
      "completions/mean_length": 733.8939819335938,
      "completions/mean_terminated_length": 601.2818603515625,
      "completions/min_length": 108.0,
      "completions/min_terminated_length": 108.0,
      "epoch": 2.167930029154519,
      "grad_norm": 0.14793087542057037,
      "learning_rate": 1e-06,
      "loss": 0.0093,
      "num_tokens": 135548638.0,
      "reward": 0.5569196939468384,
      "reward_std": 0.16735707223415375,
      "rewards/verify_math_reward/mean": 0.5569196343421936,
      "rewards/verify_math_reward/std": 0.4970270097255707,
      "step": 232
    },
    {
      "clip_ratio/high_max": 0.0018679627937672194,
      "clip_ratio/high_mean": 0.0006411258782463847,
      "clip_ratio/low_mean": 0.0006753035140718566,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.00131642939595622,
      "completions/clipped_ratio": 0.0234375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3750.0,
      "completions/mean_length": 691.0435791015625,
      "completions/mean_terminated_length": 609.3245849609375,
      "completions/min_length": 40.0,
      "completions/min_terminated_length": 40.0,
      "epoch": 2.177259475218659,
      "grad_norm": 0.14007490873336792,
      "learning_rate": 1e-06,
      "loss": 0.0073,
      "num_tokens": 136173061.0,
      "reward": 0.5602678656578064,
      "reward_std": 0.1868622601032257,
      "rewards/verify_math_reward/mean": 0.5602678656578064,
      "rewards/verify_math_reward/std": 0.4966317415237427,
      "step": 233
    },
    {
      "clip_ratio/high_max": 0.0016494242208864307,
      "clip_ratio/high_mean": 0.0006789761091567925,
      "clip_ratio/low_mean": 0.0006994724626565585,
      "clip_ratio/low_min": 3.795822976826457e-05,
      "clip_ratio/region_mean": 0.0013784485818177927,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2673.0,
      "completions/mean_length": 596.630615234375,
      "completions/mean_terminated_length": 537.0499877929688,
      "completions/min_length": 124.0,
      "completions/min_terminated_length": 124.0,
      "epoch": 2.186588921282799,
      "grad_norm": 0.15200549364089966,
      "learning_rate": 1e-06,
      "loss": 0.0045,
      "num_tokens": 136753466.0,
      "reward": 0.5502232313156128,
      "reward_std": 0.19666621088981628,
      "rewards/verify_math_reward/mean": 0.5502232313156128,
      "rewards/verify_math_reward/std": 0.49774909019470215,
      "step": 234
    },
    {
      "clip_ratio/high_max": 0.002031806063314434,
      "clip_ratio/high_mean": 0.0008012997132027522,
      "clip_ratio/low_mean": 0.0005328882953108405,
      "clip_ratio/low_min": 1.6391293684137054e-05,
      "clip_ratio/region_mean": 0.0013341879821382463,
      "completions/clipped_ratio": 0.0279017857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1817.0,
      "completions/mean_length": 625.3069458007812,
      "completions/mean_terminated_length": 525.6888427734375,
      "completions/min_length": 176.0,
      "completions/min_terminated_length": 176.0,
      "epoch": 2.195918367346939,
      "grad_norm": 0.16664724051952362,
      "learning_rate": 1e-06,
      "loss": 0.0064,
      "num_tokens": 137298637.0,
      "reward": 0.621651828289032,
      "reward_std": 0.195388525724411,
      "rewards/verify_math_reward/mean": 0.6216517686843872,
      "rewards/verify_math_reward/std": 0.4852459728717804,
      "step": 235
    },
    {
      "clip_ratio/high_max": 0.001413239078829065,
      "clip_ratio/high_mean": 0.0005412902673924691,
      "clip_ratio/low_mean": 0.00039417503830918577,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009354653302580118,
      "completions/clipped_ratio": 0.0267857142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3812.0,
      "completions/mean_length": 635.8214721679688,
      "completions/mean_terminated_length": 540.587158203125,
      "completions/min_length": 131.0,
      "completions/min_terminated_length": 131.0,
      "epoch": 2.205247813411079,
      "grad_norm": 0.1374477744102478,
      "learning_rate": 1e-06,
      "loss": -0.0058,
      "num_tokens": 137859461.0,
      "reward": 0.6417410969734192,
      "reward_std": 0.1322651207447052,
      "rewards/verify_math_reward/mean": 0.6417410969734192,
      "rewards/verify_math_reward/std": 0.47975653409957886,
      "step": 236
    },
    {
      "clip_ratio/high_max": 0.0020341953641036525,
      "clip_ratio/high_mean": 0.000699316549798823,
      "clip_ratio/low_mean": 0.000633741795354581,
      "clip_ratio/low_min": 3.271380228397902e-05,
      "clip_ratio/region_mean": 0.0013330583642527927,
      "completions/clipped_ratio": 0.0279017857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2705.0,
      "completions/mean_length": 654.0625,
      "completions/mean_terminated_length": 555.269775390625,
      "completions/min_length": 164.0,
      "completions/min_terminated_length": 164.0,
      "epoch": 2.2145772594752184,
      "grad_norm": 0.15545177459716797,
      "learning_rate": 1e-06,
      "loss": -0.0037,
      "num_tokens": 138426509.0,
      "reward": 0.5959821939468384,
      "reward_std": 0.19335976243019104,
      "rewards/verify_math_reward/mean": 0.5959821343421936,
      "rewards/verify_math_reward/std": 0.490975022315979,
      "step": 237
    },
    {
      "clip_ratio/high_max": 0.0016926105454331264,
      "clip_ratio/high_mean": 0.0006320074353425298,
      "clip_ratio/low_mean": 0.0005650783195960685,
      "clip_ratio/low_min": 1.4156285033095628e-05,
      "clip_ratio/region_mean": 0.0011970857522101142,
      "completions/clipped_ratio": 0.0279017857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3088.0,
      "completions/mean_length": 656.4308471679688,
      "completions/mean_terminated_length": 557.7060546875,
      "completions/min_length": 145.0,
      "completions/min_terminated_length": 145.0,
      "epoch": 2.2239067055393584,
      "grad_norm": 0.14693720638751984,
      "learning_rate": 1e-06,
      "loss": 0.0073,
      "num_tokens": 138994007.0,
      "reward": 0.5736607313156128,
      "reward_std": 0.1749846339225769,
      "rewards/verify_math_reward/mean": 0.5736607313156128,
      "rewards/verify_math_reward/std": 0.4948205351829529,
      "step": 238
    },
    {
      "clip_ratio/high_max": 0.0017540780572744552,
      "clip_ratio/high_mean": 0.0006414892068278277,
      "clip_ratio/low_mean": 0.0005285826046019793,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011700718314386904,
      "completions/clipped_ratio": 0.025669642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3847.0,
      "completions/mean_length": 619.7767944335938,
      "completions/mean_terminated_length": 528.1924438476562,
      "completions/min_length": 138.0,
      "completions/min_terminated_length": 138.0,
      "epoch": 2.2332361516034984,
      "grad_norm": 0.1568622887134552,
      "learning_rate": 1e-06,
      "loss": 0.0138,
      "num_tokens": 139546311.0,
      "reward": 0.6071428656578064,
      "reward_std": 0.1755894124507904,
      "rewards/verify_math_reward/mean": 0.6071428656578064,
      "rewards/verify_math_reward/std": 0.48865827918052673,
      "step": 239
    },
    {
      "clip_ratio/high_max": 0.0016234175564022735,
      "clip_ratio/high_mean": 0.0005522355531866197,
      "clip_ratio/low_mean": 0.0004864165694016265,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001038652117131278,
      "completions/clipped_ratio": 0.041294642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2066.0,
      "completions/mean_length": 685.591552734375,
      "completions/mean_terminated_length": 538.69384765625,
      "completions/min_length": 136.0,
      "completions/min_terminated_length": 136.0,
      "epoch": 2.2425655976676384,
      "grad_norm": 0.14180706441402435,
      "learning_rate": 1e-06,
      "loss": -0.0037,
      "num_tokens": 140084193.0,
      "reward": 0.676339328289032,
      "reward_std": 0.15041227638721466,
      "rewards/verify_math_reward/mean": 0.6763392686843872,
      "rewards/verify_math_reward/std": 0.4681335985660553,
      "step": 240
    },
    {
      "clip_ratio/high_max": 0.0020767255045939237,
      "clip_ratio/high_mean": 0.0006557366978086066,
      "clip_ratio/low_mean": 0.0005495269324455876,
      "clip_ratio/low_min": 8.420910489803646e-06,
      "clip_ratio/region_mean": 0.0012052636375301518,
      "completions/clipped_ratio": 0.021205357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2247.0,
      "completions/mean_length": 629.234375,
      "completions/mean_terminated_length": 554.127685546875,
      "completions/min_length": 164.0,
      "completions/min_terminated_length": 164.0,
      "epoch": 2.2518950437317784,
      "grad_norm": 0.14197291433811188,
      "learning_rate": 1e-06,
      "loss": 0.0024,
      "num_tokens": 140658643.0,
      "reward": 0.6049107313156128,
      "reward_std": 0.16075009107589722,
      "rewards/verify_math_reward/mean": 0.6049107313156128,
      "rewards/verify_math_reward/std": 0.48914292454719543,
      "step": 241
    },
    {
      "clip_ratio/high_max": 0.002139833912224276,
      "clip_ratio/high_mean": 0.0006846389969723532,
      "clip_ratio/low_mean": 0.000460942414974852,
      "clip_ratio/low_min": 1.6104097085190006e-05,
      "clip_ratio/region_mean": 0.0011455814164946787,
      "completions/clipped_ratio": 0.021205357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3501.0,
      "completions/mean_length": 638.7433471679688,
      "completions/mean_terminated_length": 563.8426513671875,
      "completions/min_length": 130.0,
      "completions/min_terminated_length": 130.0,
      "epoch": 2.2612244897959184,
      "grad_norm": 0.14257590472698212,
      "learning_rate": 1e-06,
      "loss": -0.0106,
      "num_tokens": 141233101.0,
      "reward": 0.6718750596046448,
      "reward_std": 0.1579635888338089,
      "rewards/verify_math_reward/mean": 0.671875,
      "rewards/verify_math_reward/std": 0.46979284286499023,
      "step": 242
    },
    {
      "clip_ratio/high_max": 0.001527231670479523,
      "clip_ratio/high_mean": 0.0005186072030483047,
      "clip_ratio/low_mean": 0.00044814040757046314,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000966747640632093,
      "completions/clipped_ratio": 0.03125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2871.0,
      "completions/mean_length": 660.1730346679688,
      "completions/mean_terminated_length": 549.33984375,
      "completions/min_length": 175.0,
      "completions/min_terminated_length": 175.0,
      "epoch": 2.2705539358600584,
      "grad_norm": 0.12827670574188232,
      "learning_rate": 1e-06,
      "loss": -0.0195,
      "num_tokens": 141795584.0,
      "reward": 0.5658482313156128,
      "reward_std": 0.1409432291984558,
      "rewards/verify_math_reward/mean": 0.5658482313156128,
      "rewards/verify_math_reward/std": 0.49592188000679016,
      "step": 243
    },
    {
      "clip_ratio/high_max": 0.0016318869857059326,
      "clip_ratio/high_mean": 0.0005328227020982013,
      "clip_ratio/low_mean": 0.0005002548914490035,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010330776203772984,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2599.0,
      "completions/mean_length": 575.9029541015625,
      "completions/mean_terminated_length": 511.901123046875,
      "completions/min_length": 117.0,
      "completions/min_terminated_length": 117.0,
      "epoch": 2.2798833819241984,
      "grad_norm": 0.1491820514202118,
      "learning_rate": 1e-06,
      "loss": -0.0053,
      "num_tokens": 142323121.0,
      "reward": 0.6383928656578064,
      "reward_std": 0.12854453921318054,
      "rewards/verify_math_reward/mean": 0.6383928656578064,
      "rewards/verify_math_reward/std": 0.4807341992855072,
      "step": 244
    },
    {
      "clip_ratio/high_max": 0.001400911838572938,
      "clip_ratio/high_mean": 0.0005099018467262795,
      "clip_ratio/low_mean": 0.0005332937489583855,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010431956179672852,
      "completions/clipped_ratio": 0.036830357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3987.0,
      "completions/mean_length": 699.450927734375,
      "completions/mean_terminated_length": 569.5712280273438,
      "completions/min_length": 139.0,
      "completions/min_terminated_length": 139.0,
      "epoch": 2.2892128279883384,
      "grad_norm": 0.14287753403186798,
      "learning_rate": 1e-06,
      "loss": 0.0051,
      "num_tokens": 142897917.0,
      "reward": 0.5970982313156128,
      "reward_std": 0.1634860783815384,
      "rewards/verify_math_reward/mean": 0.5970982313156128,
      "rewards/verify_math_reward/std": 0.49075525999069214,
      "step": 245
    },
    {
      "clip_ratio/high_max": 0.0018064162686641794,
      "clip_ratio/high_mean": 0.0006929526716703549,
      "clip_ratio/low_mean": 0.000614358965322026,
      "clip_ratio/low_min": 1.6297262845910154e-05,
      "clip_ratio/region_mean": 0.0013073116570012644,
      "completions/clipped_ratio": 0.024553571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2610.0,
      "completions/mean_length": 651.6607666015625,
      "completions/mean_terminated_length": 564.9610595703125,
      "completions/min_length": 154.0,
      "completions/min_terminated_length": 154.0,
      "epoch": 2.298542274052478,
      "grad_norm": 0.16102290153503418,
      "learning_rate": 1e-06,
      "loss": -0.0026,
      "num_tokens": 143484685.0,
      "reward": 0.5703125,
      "reward_std": 0.1980997622013092,
      "rewards/verify_math_reward/mean": 0.5703125,
      "rewards/verify_math_reward/std": 0.49530795216560364,
      "step": 246
    },
    {
      "clip_ratio/high_max": 0.0016381983587052673,
      "clip_ratio/high_mean": 0.0006063256041670684,
      "clip_ratio/low_mean": 0.00041601886732678395,
      "clip_ratio/low_min": 1.1895698662556242e-05,
      "clip_ratio/region_mean": 0.001022344469674863,
      "completions/clipped_ratio": 0.021205357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2455.0,
      "completions/mean_length": 636.1361694335938,
      "completions/mean_terminated_length": 561.1790161132812,
      "completions/min_length": 118.0,
      "completions/min_terminated_length": 118.0,
      "epoch": 2.307871720116618,
      "grad_norm": 0.1496383249759674,
      "learning_rate": 1e-06,
      "loss": 0.0083,
      "num_tokens": 144072527.0,
      "reward": 0.590401828289032,
      "reward_std": 0.1496659368276596,
      "rewards/verify_math_reward/mean": 0.5904017686843872,
      "rewards/verify_math_reward/std": 0.49203425645828247,
      "step": 247
    },
    {
      "clip_ratio/high_max": 0.0021030395073466934,
      "clip_ratio/high_mean": 0.0008465516311844112,
      "clip_ratio/low_mean": 0.0005066342091595288,
      "clip_ratio/low_min": 1.3504753951565363e-05,
      "clip_ratio/region_mean": 0.0013531858385249507,
      "completions/clipped_ratio": 0.03125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3977.0,
      "completions/mean_length": 647.4330444335938,
      "completions/mean_terminated_length": 536.18896484375,
      "completions/min_length": 138.0,
      "completions/min_terminated_length": 138.0,
      "epoch": 2.317201166180758,
      "grad_norm": 0.1665707528591156,
      "learning_rate": 1e-06,
      "loss": -0.004,
      "num_tokens": 144628939.0,
      "reward": 0.5915178656578064,
      "reward_std": 0.21658296883106232,
      "rewards/verify_math_reward/mean": 0.5915178656578064,
      "rewards/verify_math_reward/std": 0.49182769656181335,
      "step": 248
    },
    {
      "clip_ratio/high_max": 0.0015528605035797227,
      "clip_ratio/high_mean": 0.0006142230213299626,
      "clip_ratio/low_mean": 0.00034504018594816444,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009592631977284327,
      "completions/clipped_ratio": 0.033482142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3226.0,
      "completions/mean_length": 700.0189819335938,
      "completions/mean_terminated_length": 582.3753051757812,
      "completions/min_length": 109.0,
      "completions/min_terminated_length": 109.0,
      "epoch": 2.326530612244898,
      "grad_norm": 0.12995047867298126,
      "learning_rate": 1e-06,
      "loss": 0.0058,
      "num_tokens": 145223932.0,
      "reward": 0.5915178656578064,
      "reward_std": 0.14515261352062225,
      "rewards/verify_math_reward/mean": 0.5915178656578064,
      "rewards/verify_math_reward/std": 0.49182769656181335,
      "step": 249
    },
    {
      "clip_ratio/high_max": 0.0015787725133122876,
      "clip_ratio/high_mean": 0.000603437587415101,
      "clip_ratio/low_mean": 0.00041420321167606744,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010176408359257039,
      "completions/clipped_ratio": 0.03125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3889.0,
      "completions/mean_length": 654.3638916015625,
      "completions/mean_terminated_length": 543.3433227539062,
      "completions/min_length": 122.0,
      "completions/min_terminated_length": 122.0,
      "epoch": 2.335860058309038,
      "grad_norm": 0.14374585449695587,
      "learning_rate": 1e-06,
      "loss": -0.0037,
      "num_tokens": 145778618.0,
      "reward": 0.6495535969734192,
      "reward_std": 0.16070660948753357,
      "rewards/verify_math_reward/mean": 0.6495535969734192,
      "rewards/verify_math_reward/std": 0.477376252412796,
      "step": 250
    },
    {
      "clip_ratio/high_max": 0.0019336156474309973,
      "clip_ratio/high_mean": 0.0006991442587604979,
      "clip_ratio/low_mean": 0.0005083418527647154,
      "clip_ratio/low_min": 1.1222840839764103e-05,
      "clip_ratio/region_mean": 0.0012074861042492557,
      "completions/clipped_ratio": 0.036830357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2679.0,
      "completions/mean_length": 659.286865234375,
      "completions/mean_terminated_length": 527.871337890625,
      "completions/min_length": 113.0,
      "completions/min_terminated_length": 113.0,
      "epoch": 2.345189504373178,
      "grad_norm": 0.15194326639175415,
      "learning_rate": 1e-06,
      "loss": -0.0078,
      "num_tokens": 146326011.0,
      "reward": 0.5401785969734192,
      "reward_std": 0.16642414033412933,
      "rewards/verify_math_reward/mean": 0.5401785969734192,
      "rewards/verify_math_reward/std": 0.49866142868995667,
      "step": 251
    },
    {
      "clip_ratio/high_max": 0.001801699421775993,
      "clip_ratio/high_mean": 0.0007357687318290118,
      "clip_ratio/low_mean": 0.0006126707685325528,
      "clip_ratio/low_min": 3.877171184285544e-05,
      "clip_ratio/region_mean": 0.001348439502180554,
      "completions/clipped_ratio": 0.0379464285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2570.0,
      "completions/mean_length": 658.0870971679688,
      "completions/mean_terminated_length": 522.4849243164062,
      "completions/min_length": 131.0,
      "completions/min_terminated_length": 131.0,
      "epoch": 2.354518950437318,
      "grad_norm": 0.1665963977575302,
      "learning_rate": 1e-06,
      "loss": 0.0089,
      "num_tokens": 146861721.0,
      "reward": 0.590401828289032,
      "reward_std": 0.1912222057580948,
      "rewards/verify_math_reward/mean": 0.5904017686843872,
      "rewards/verify_math_reward/std": 0.49203425645828247,
      "step": 252
    },
    {
      "clip_ratio/high_max": 0.0017145752426586114,
      "clip_ratio/high_mean": 0.0006349218183459016,
      "clip_ratio/low_mean": 0.00037618277292494895,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010111046212841757,
      "completions/clipped_ratio": 0.029017857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2928.0,
      "completions/mean_length": 653.325927734375,
      "completions/mean_terminated_length": 550.44140625,
      "completions/min_length": 109.0,
      "completions/min_terminated_length": 109.0,
      "epoch": 2.363848396501458,
      "grad_norm": 0.12984620034694672,
      "learning_rate": 1e-06,
      "loss": -0.0086,
      "num_tokens": 147425741.0,
      "reward": 0.6595982313156128,
      "reward_std": 0.13940228521823883,
      "rewards/verify_math_reward/mean": 0.6595982313156128,
      "rewards/verify_math_reward/std": 0.4741089344024658,
      "step": 253
    },
    {
      "clip_ratio/high_max": 0.0015443033153133001,
      "clip_ratio/high_mean": 0.000532831417331181,
      "clip_ratio/low_mean": 0.0006252439561649226,
      "clip_ratio/low_min": 6.449988632084569e-05,
      "clip_ratio/region_mean": 0.0011580753744055983,
      "completions/clipped_ratio": 0.033482142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3603.0,
      "completions/mean_length": 720.2455444335938,
      "completions/mean_terminated_length": 603.3025512695312,
      "completions/min_length": 170.0,
      "completions/min_terminated_length": 170.0,
      "epoch": 2.373177842565598,
      "grad_norm": 0.14159341156482697,
      "learning_rate": 1e-06,
      "loss": 0.002,
      "num_tokens": 148030657.0,
      "reward": 0.5636160969734192,
      "reward_std": 0.16724716126918793,
      "rewards/verify_math_reward/mean": 0.5636160969734192,
      "rewards/verify_math_reward/std": 0.49621346592903137,
      "step": 254
    },
    {
      "clip_ratio/high_max": 0.0019088175176875666,
      "clip_ratio/high_mean": 0.0007623265610163799,
      "clip_ratio/low_mean": 0.00039129305423557526,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011536196179804392,
      "completions/clipped_ratio": 0.0234375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2227.0,
      "completions/mean_length": 616.1551513671875,
      "completions/mean_terminated_length": 532.6388549804688,
      "completions/min_length": 130.0,
      "completions/min_terminated_length": 130.0,
      "epoch": 2.3825072886297374,
      "grad_norm": 0.14764274656772614,
      "learning_rate": 1e-06,
      "loss": -0.016,
      "num_tokens": 148581748.0,
      "reward": 0.6651785969734192,
      "reward_std": 0.17810744047164917,
      "rewards/verify_math_reward/mean": 0.6651785969734192,
      "rewards/verify_math_reward/std": 0.47219157218933105,
      "step": 255
    },
    {
      "clip_ratio/high_max": 0.0015490252699237317,
      "clip_ratio/high_mean": 0.0005637399626721162,
      "clip_ratio/low_mean": 0.0003616811254687491,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.00092542108905036,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3699.0,
      "completions/mean_length": 648.4553833007812,
      "completions/mean_terminated_length": 593.732421875,
      "completions/min_length": 140.0,
      "completions/min_terminated_length": 140.0,
      "epoch": 2.3918367346938774,
      "grad_norm": 0.13943234086036682,
      "learning_rate": 1e-06,
      "loss": 0.0078,
      "num_tokens": 149199116.0,
      "reward": 0.578125,
      "reward_std": 0.14083515107631683,
      "rewards/verify_math_reward/mean": 0.578125,
      "rewards/verify_math_reward/std": 0.4941346049308777,
      "step": 256
    },
    {
      "clip_ratio/high_max": 0.00218620785017265,
      "clip_ratio/high_mean": 0.0009041907687787898,
      "clip_ratio/low_mean": 0.0005129284945724066,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0014171192960930057,
      "completions/clipped_ratio": 0.0234375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3906.0,
      "completions/mean_length": 665.1942138671875,
      "completions/mean_terminated_length": 582.8548583984375,
      "completions/min_length": 115.0,
      "completions/min_terminated_length": 115.0,
      "epoch": 2.4011661807580174,
      "grad_norm": 0.14255432784557343,
      "learning_rate": 1e-06,
      "loss": 0.0014,
      "num_tokens": 149797626.0,
      "reward": 0.6082589626312256,
      "reward_std": 0.1925688236951828,
      "rewards/verify_math_reward/mean": 0.6082589030265808,
      "rewards/verify_math_reward/std": 0.4884119927883148,
      "step": 257
    },
    {
      "clip_ratio/high_max": 0.001797141281713266,
      "clip_ratio/high_mean": 0.0006807713007219718,
      "clip_ratio/low_mean": 0.0005956136137683643,
      "clip_ratio/low_min": 1.3926024621468969e-05,
      "clip_ratio/region_mean": 0.0012763849335897248,
      "completions/clipped_ratio": 0.0200892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2351.0,
      "completions/mean_length": 608.4285888671875,
      "completions/mean_terminated_length": 536.9293823242188,
      "completions/min_length": 146.0,
      "completions/min_terminated_length": 146.0,
      "epoch": 2.4104956268221573,
      "grad_norm": 0.1662139594554901,
      "learning_rate": 1e-06,
      "loss": 0.0027,
      "num_tokens": 150353954.0,
      "reward": 0.645089328289032,
      "reward_std": 0.19801212847232819,
      "rewards/verify_math_reward/mean": 0.6450892686843872,
      "rewards/verify_math_reward/std": 0.4787535071372986,
      "step": 258
    },
    {
      "clip_ratio/high_max": 0.002128478605300188,
      "clip_ratio/high_mean": 0.0008221288917411584,
      "clip_ratio/low_mean": 0.0004552070704448852,
      "clip_ratio/low_min": 1.4902241673553362e-05,
      "clip_ratio/region_mean": 0.0012773359740094747,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3439.0,
      "completions/mean_length": 602.7600708007812,
      "completions/mean_terminated_length": 543.2838134765625,
      "completions/min_length": 160.0,
      "completions/min_terminated_length": 160.0,
      "epoch": 2.4198250728862973,
      "grad_norm": 0.15467597544193268,
      "learning_rate": 1e-06,
      "loss": -0.007,
      "num_tokens": 150924123.0,
      "reward": 0.660714328289032,
      "reward_std": 0.16371431946754456,
      "rewards/verify_math_reward/mean": 0.6607142686843872,
      "rewards/verify_math_reward/std": 0.4737313687801361,
      "step": 259
    },
    {
      "clip_ratio/high_max": 0.001650483456614893,
      "clip_ratio/high_mean": 0.0007707060449320124,
      "clip_ratio/low_mean": 0.0006107790904934518,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0013814851445204113,
      "completions/clipped_ratio": 0.0345982142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3470.0,
      "completions/mean_length": 683.4230346679688,
      "completions/mean_terminated_length": 561.12255859375,
      "completions/min_length": 158.0,
      "completions/min_terminated_length": 158.0,
      "epoch": 2.4291545189504373,
      "grad_norm": 0.1504625380039215,
      "learning_rate": 1e-06,
      "loss": 0.0002,
      "num_tokens": 151501126.0,
      "reward": 0.5714285969734192,
      "reward_std": 0.191038578748703,
      "rewards/verify_math_reward/mean": 0.5714285969734192,
      "rewards/verify_math_reward/std": 0.49514803290367126,
      "step": 260
    },
    {
      "clip_ratio/high_max": 0.0017616295153857209,
      "clip_ratio/high_mean": 0.0006268397346502752,
      "clip_ratio/low_mean": 0.0005830239133501891,
      "clip_ratio/low_min": 3.093332452408504e-05,
      "clip_ratio/region_mean": 0.0012098636179871392,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2362.0,
      "completions/mean_length": 632.9017944335938,
      "completions/mean_terminated_length": 573.938720703125,
      "completions/min_length": 89.0,
      "completions/min_terminated_length": 89.0,
      "epoch": 2.4384839650145773,
      "grad_norm": 0.16305653750896454,
      "learning_rate": 1e-06,
      "loss": 0.0024,
      "num_tokens": 152102046.0,
      "reward": 0.6205357313156128,
      "reward_std": 0.1796495020389557,
      "rewards/verify_math_reward/mean": 0.6205357313156128,
      "rewards/verify_math_reward/std": 0.4855247139930725,
      "step": 261
    },
    {
      "clip_ratio/high_max": 0.001908959475258598,
      "clip_ratio/high_mean": 0.0007649866220162949,
      "clip_ratio/low_mean": 0.0006326360653474694,
      "clip_ratio/low_min": 1.3001872503082268e-05,
      "clip_ratio/region_mean": 0.001397622712829616,
      "completions/clipped_ratio": 0.0379464285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2269.0,
      "completions/mean_length": 665.8582763671875,
      "completions/mean_terminated_length": 530.5626220703125,
      "completions/min_length": 142.0,
      "completions/min_terminated_length": 142.0,
      "epoch": 2.4478134110787173,
      "grad_norm": 0.15925399959087372,
      "learning_rate": 1e-06,
      "loss": -0.0146,
      "num_tokens": 152648231.0,
      "reward": 0.5636160969734192,
      "reward_std": 0.1882181465625763,
      "rewards/verify_math_reward/mean": 0.5636160969734192,
      "rewards/verify_math_reward/std": 0.49621346592903137,
      "step": 262
    },
    {
      "clip_ratio/high_max": 0.0016611664577794727,
      "clip_ratio/high_mean": 0.0005999356662869104,
      "clip_ratio/low_mean": 0.0005950688196207921,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011950044900004286,
      "completions/clipped_ratio": 0.033482142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3736.0,
      "completions/mean_length": 648.9933471679688,
      "completions/mean_terminated_length": 529.5819702148438,
      "completions/min_length": 109.0,
      "completions/min_terminated_length": 109.0,
      "epoch": 2.4571428571428573,
      "grad_norm": 0.12971743941307068,
      "learning_rate": 1e-06,
      "loss": -0.0062,
      "num_tokens": 153191657.0,
      "reward": 0.6071428656578064,
      "reward_std": 0.1479288637638092,
      "rewards/verify_math_reward/mean": 0.6071428656578064,
      "rewards/verify_math_reward/std": 0.48865827918052673,
      "step": 263
    },
    {
      "clip_ratio/high_max": 0.0017268379779125098,
      "clip_ratio/high_mean": 0.000678713744491688,
      "clip_ratio/low_mean": 0.0006471916422015056,
      "clip_ratio/low_min": 1.4633575119660236e-05,
      "clip_ratio/region_mean": 0.001325905406702077,
      "completions/clipped_ratio": 0.030133928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2479.0,
      "completions/mean_length": 647.1127319335938,
      "completions/mean_terminated_length": 539.9551391601562,
      "completions/min_length": 159.0,
      "completions/min_terminated_length": 159.0,
      "epoch": 2.466472303206997,
      "grad_norm": 0.1600545346736908,
      "learning_rate": 1e-06,
      "loss": -0.0099,
      "num_tokens": 153755110.0,
      "reward": 0.5959821939468384,
      "reward_std": 0.20831559598445892,
      "rewards/verify_math_reward/mean": 0.5959821343421936,
      "rewards/verify_math_reward/std": 0.490975022315979,
      "step": 264
    },
    {
      "clip_ratio/high_max": 0.0013077658313704887,
      "clip_ratio/high_mean": 0.00045768982727167895,
      "clip_ratio/low_mean": 0.0002947282459899725,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007524180691689253,
      "completions/clipped_ratio": 0.0401785714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4078.0,
      "completions/mean_length": 709.7835083007812,
      "completions/mean_terminated_length": 568.034912109375,
      "completions/min_length": 125.0,
      "completions/min_terminated_length": 125.0,
      "epoch": 2.4758017492711373,
      "grad_norm": 0.12772515416145325,
      "learning_rate": 1e-06,
      "loss": -0.0048,
      "num_tokens": 154332364.0,
      "reward": 0.5859375,
      "reward_std": 0.13023702800273895,
      "rewards/verify_math_reward/mean": 0.5859375,
      "rewards/verify_math_reward/std": 0.4928344786167145,
      "step": 265
    },
    {
      "clip_ratio/high_max": 0.0019189923150406685,
      "clip_ratio/high_mean": 0.0007898198637121823,
      "clip_ratio/low_mean": 0.000392467261008278,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011822871238109656,
      "completions/clipped_ratio": 0.0390625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2964.0,
      "completions/mean_length": 665.5223388671875,
      "completions/mean_terminated_length": 526.072021484375,
      "completions/min_length": 147.0,
      "completions/min_terminated_length": 147.0,
      "epoch": 2.485131195335277,
      "grad_norm": 0.16601820290088654,
      "learning_rate": 1e-06,
      "loss": -0.0055,
      "num_tokens": 154868656.0,
      "reward": 0.6707589626312256,
      "reward_std": 0.19001504778862,
      "rewards/verify_math_reward/mean": 0.6707589030265808,
      "rewards/verify_math_reward/std": 0.4702001214027405,
      "step": 266
    },
    {
      "clip_ratio/high_max": 0.0015905233594821766,
      "clip_ratio/high_mean": 0.0005608878418570384,
      "clip_ratio/low_mean": 0.0005168577772565186,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010777456154755782,
      "completions/clipped_ratio": 0.024553571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3940.0,
      "completions/mean_length": 636.3381958007812,
      "completions/mean_terminated_length": 549.2528076171875,
      "completions/min_length": 143.0,
      "completions/min_terminated_length": 143.0,
      "epoch": 2.494460641399417,
      "grad_norm": 0.13197234272956848,
      "learning_rate": 1e-06,
      "loss": -0.0034,
      "num_tokens": 155434911.0,
      "reward": 0.621651828289032,
      "reward_std": 0.13177543878555298,
      "rewards/verify_math_reward/mean": 0.6216517686843872,
      "rewards/verify_math_reward/std": 0.485245943069458,
      "step": 267
    },
    {
      "clip_ratio/high_max": 0.0016901352646527812,
      "clip_ratio/high_mean": 0.0006072937467251904,
      "clip_ratio/low_mean": 0.00041761771080928156,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010249114711768925,
      "completions/clipped_ratio": 0.0345982142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4068.0,
      "completions/mean_length": 755.8850708007812,
      "completions/mean_terminated_length": 636.1815185546875,
      "completions/min_length": 152.0,
      "completions/min_terminated_length": 152.0,
      "epoch": 2.503790087463557,
      "grad_norm": 0.13727904856204987,
      "learning_rate": 1e-06,
      "loss": -0.007,
      "num_tokens": 156084184.0,
      "reward": 0.582589328289032,
      "reward_std": 0.1645801067352295,
      "rewards/verify_math_reward/mean": 0.5825892686843872,
      "rewards/verify_math_reward/std": 0.4934072494506836,
      "step": 268
    },
    {
      "clip_ratio/high_max": 0.002206893463153392,
      "clip_ratio/high_mean": 0.0008154273909894982,
      "clip_ratio/low_mean": 0.0005104676602059044,
      "clip_ratio/low_min": 6.633410976064624e-06,
      "clip_ratio/region_mean": 0.0013258950493764132,
      "completions/clipped_ratio": 0.0267857142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3698.0,
      "completions/mean_length": 654.0714721679688,
      "completions/mean_terminated_length": 559.3394165039062,
      "completions/min_length": 141.0,
      "completions/min_terminated_length": 141.0,
      "epoch": 2.513119533527697,
      "grad_norm": 0.15356619656085968,
      "learning_rate": 1e-06,
      "loss": -0.018,
      "num_tokens": 156656768.0,
      "reward": 0.6473214626312256,
      "reward_std": 0.1895604282617569,
      "rewards/verify_math_reward/mean": 0.6473214030265808,
      "rewards/verify_math_reward/std": 0.47807058691978455,
      "step": 269
    },
    {
      "clip_ratio/high_max": 0.0017181057191919535,
      "clip_ratio/high_mean": 0.0005694239230251696,
      "clip_ratio/low_mean": 0.0004686009879151243,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010380249223089777,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2846.0,
      "completions/mean_length": 631.4765625,
      "completions/mean_terminated_length": 552.3778076171875,
      "completions/min_length": 121.0,
      "completions/min_terminated_length": 121.0,
      "epoch": 2.522448979591837,
      "grad_norm": 0.14008112251758575,
      "learning_rate": 1e-06,
      "loss": -0.01,
      "num_tokens": 157234427.0,
      "reward": 0.6082589626312256,
      "reward_std": 0.1346297711133957,
      "rewards/verify_math_reward/mean": 0.6082589030265808,
      "rewards/verify_math_reward/std": 0.48841196298599243,
      "step": 270
    },
    {
      "clip_ratio/high_max": 0.0020013369012303883,
      "clip_ratio/high_mean": 0.0006366890547724324,
      "clip_ratio/low_mean": 0.0005819433490614756,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001218632394738961,
      "completions/clipped_ratio": 0.025669642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3925.0,
      "completions/mean_length": 705.5301513671875,
      "completions/mean_terminated_length": 616.205078125,
      "completions/min_length": 192.0,
      "completions/min_terminated_length": 192.0,
      "epoch": 2.5317784256559768,
      "grad_norm": 0.143727645277977,
      "learning_rate": 1e-06,
      "loss": 0.0047,
      "num_tokens": 157883374.0,
      "reward": 0.5792410969734192,
      "reward_std": 0.16676117479801178,
      "rewards/verify_math_reward/mean": 0.5792410969734192,
      "rewards/verify_math_reward/std": 0.49395665526390076,
      "step": 271
    },
    {
      "clip_ratio/high_max": 0.0019367850800335873,
      "clip_ratio/high_mean": 0.0007181636847235495,
      "clip_ratio/low_mean": 0.0006665767095910269,
      "clip_ratio/low_min": 5.396064170781756e-05,
      "clip_ratio/region_mean": 0.0013847403970430605,
      "completions/clipped_ratio": 0.024553571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1988.0,
      "completions/mean_length": 635.3538208007812,
      "completions/mean_terminated_length": 548.24365234375,
      "completions/min_length": 107.0,
      "completions/min_terminated_length": 107.0,
      "epoch": 2.5411078717201168,
      "grad_norm": 0.16358663141727448,
      "learning_rate": 1e-06,
      "loss": 0.0077,
      "num_tokens": 158457627.0,
      "reward": 0.6082589626312256,
      "reward_std": 0.1747995913028717,
      "rewards/verify_math_reward/mean": 0.6082589030265808,
      "rewards/verify_math_reward/std": 0.4884119927883148,
      "step": 272
    },
    {
      "clip_ratio/high_max": 0.0021312850803951733,
      "clip_ratio/high_mean": 0.0008729010842216667,
      "clip_ratio/low_mean": 0.0006221409239515197,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0014950420190871228,
      "completions/clipped_ratio": 0.0267857142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2360.0,
      "completions/mean_length": 659.3917846679688,
      "completions/mean_terminated_length": 564.80615234375,
      "completions/min_length": 120.0,
      "completions/min_terminated_length": 120.0,
      "epoch": 2.5504373177842563,
      "grad_norm": 0.15482480823993683,
      "learning_rate": 1e-06,
      "loss": -0.0006,
      "num_tokens": 159032274.0,
      "reward": 0.629464328289032,
      "reward_std": 0.20749185979366302,
      "rewards/verify_math_reward/mean": 0.6294642686843872,
      "rewards/verify_math_reward/std": 0.4832179844379425,
      "step": 273
    },
    {
      "clip_ratio/high_max": 0.0017957189520529937,
      "clip_ratio/high_mean": 0.0006052036906112335,
      "clip_ratio/low_mean": 0.0005452909608720802,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011504946523928083,
      "completions/clipped_ratio": 0.0479910714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3438.0,
      "completions/mean_length": 800.5938110351562,
      "completions/mean_terminated_length": 634.4712524414062,
      "completions/min_length": 105.0,
      "completions/min_terminated_length": 105.0,
      "epoch": 2.5597667638483967,
      "grad_norm": 0.13584306836128235,
      "learning_rate": 1e-06,
      "loss": -0.0064,
      "num_tokens": 159658286.0,
      "reward": 0.5479910969734192,
      "reward_std": 0.17423324286937714,
      "rewards/verify_math_reward/mean": 0.5479910969734192,
      "rewards/verify_math_reward/std": 0.49796950817108154,
      "step": 274
    },
    {
      "clip_ratio/high_max": 0.0019833818441838957,
      "clip_ratio/high_mean": 0.0008121807804855052,
      "clip_ratio/low_mean": 0.0007322955134441145,
      "clip_ratio/low_min": 4.051339146826649e-05,
      "clip_ratio/region_mean": 0.0015444763157574926,
      "completions/clipped_ratio": 0.024553571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2553.0,
      "completions/mean_length": 659.607177734375,
      "completions/mean_terminated_length": 573.1075439453125,
      "completions/min_length": 147.0,
      "completions/min_terminated_length": 147.0,
      "epoch": 2.5690962099125363,
      "grad_norm": 0.16442091763019562,
      "learning_rate": 1e-06,
      "loss": 0.0062,
      "num_tokens": 160246262.0,
      "reward": 0.5770089626312256,
      "reward_std": 0.1950957179069519,
      "rewards/verify_math_reward/mean": 0.5770089030265808,
      "rewards/verify_math_reward/std": 0.4943099617958069,
      "step": 275
    },
    {
      "clip_ratio/high_max": 0.0020285667706048116,
      "clip_ratio/high_mean": 0.0008122730541799683,
      "clip_ratio/low_mean": 0.0005162023753655376,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0013284754131746013,
      "completions/clipped_ratio": 0.025669642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3548.0,
      "completions/mean_length": 677.9207763671875,
      "completions/mean_terminated_length": 587.8682861328125,
      "completions/min_length": 168.0,
      "completions/min_terminated_length": 168.0,
      "epoch": 2.5784256559766763,
      "grad_norm": 0.16298985481262207,
      "learning_rate": 1e-06,
      "loss": 0.0005,
      "num_tokens": 160856743.0,
      "reward": 0.6194196939468384,
      "reward_std": 0.18223848938941956,
      "rewards/verify_math_reward/mean": 0.6194196343421936,
      "rewards/verify_math_reward/std": 0.48580071330070496,
      "step": 276
    },
    {
      "clip_ratio/high_max": 0.0012773446906066965,
      "clip_ratio/high_mean": 0.00046138433572195936,
      "clip_ratio/low_mean": 0.0005144769911566982,
      "clip_ratio/low_min": 1.004338719212683e-05,
      "clip_ratio/region_mean": 0.0009758613596204668,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4020.0,
      "completions/mean_length": 618.3527221679688,
      "completions/mean_terminated_length": 559.1419067382812,
      "completions/min_length": 150.0,
      "completions/min_terminated_length": 150.0,
      "epoch": 2.5877551020408163,
      "grad_norm": 0.1506633758544922,
      "learning_rate": 1e-06,
      "loss": 0.0107,
      "num_tokens": 161436283.0,
      "reward": 0.5803571939468384,
      "reward_std": 0.15323200821876526,
      "rewards/verify_math_reward/mean": 0.5803571343421936,
      "rewards/verify_math_reward/std": 0.4937761425971985,
      "step": 277
    },
    {
      "clip_ratio/high_max": 0.0016585250850766897,
      "clip_ratio/high_mean": 0.0006474419169535395,
      "clip_ratio/low_mean": 0.000588792541748262,
      "clip_ratio/low_min": 2.9994330361660104e-05,
      "clip_ratio/region_mean": 0.0012362344841676531,
      "completions/clipped_ratio": 0.033482142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3539.0,
      "completions/mean_length": 696.2098388671875,
      "completions/mean_terminated_length": 578.4342041015625,
      "completions/min_length": 148.0,
      "completions/min_terminated_length": 148.0,
      "epoch": 2.5970845481049563,
      "grad_norm": 0.1527535766363144,
      "learning_rate": 1e-06,
      "loss": -0.0088,
      "num_tokens": 162029799.0,
      "reward": 0.5892857313156128,
      "reward_std": 0.18652454018592834,
      "rewards/verify_math_reward/mean": 0.5892857313156128,
      "rewards/verify_math_reward/std": 0.49223825335502625,
      "step": 278
    },
    {
      "clip_ratio/high_max": 0.001710698736133054,
      "clip_ratio/high_mean": 0.0007456493276549736,
      "clip_ratio/low_mean": 0.0005641687066599843,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0013098180315864738,
      "completions/clipped_ratio": 0.0279017857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1964.0,
      "completions/mean_length": 624.9765625,
      "completions/mean_terminated_length": 525.3489990234375,
      "completions/min_length": 131.0,
      "completions/min_terminated_length": 131.0,
      "epoch": 2.6064139941690962,
      "grad_norm": 0.15717408061027527,
      "learning_rate": 1e-06,
      "loss": -0.0128,
      "num_tokens": 162574402.0,
      "reward": 0.640625,
      "reward_std": 0.20094947516918182,
      "rewards/verify_math_reward/mean": 0.640625,
      "rewards/verify_math_reward/std": 0.48008525371551514,
      "step": 279
    },
    {
      "clip_ratio/high_max": 0.0016847828483150806,
      "clip_ratio/high_mean": 0.0006836661832494428,
      "clip_ratio/low_mean": 0.0005904825584366336,
      "clip_ratio/low_min": 1.7561113054398447e-05,
      "clip_ratio/region_mean": 0.0012741487589664757,
      "completions/clipped_ratio": 0.033482142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3214.0,
      "completions/mean_length": 662.0748291015625,
      "completions/mean_terminated_length": 543.1166381835938,
      "completions/min_length": 137.0,
      "completions/min_terminated_length": 137.0,
      "epoch": 2.6157434402332362,
      "grad_norm": 0.15844663977622986,
      "learning_rate": 1e-06,
      "loss": -0.0254,
      "num_tokens": 163129125.0,
      "reward": 0.59375,
      "reward_std": 0.1752542406320572,
      "rewards/verify_math_reward/mean": 0.59375,
      "rewards/verify_math_reward/std": 0.4914066195487976,
      "step": 280
    },
    {
      "clip_ratio/high_max": 0.0019050513583351858,
      "clip_ratio/high_mean": 0.0008218782295443816,
      "clip_ratio/low_mean": 0.0004894271578450571,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0013113054010318592,
      "completions/clipped_ratio": 0.025669642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3920.0,
      "completions/mean_length": 605.3381958007812,
      "completions/mean_terminated_length": 513.3734741210938,
      "completions/min_length": 137.0,
      "completions/min_terminated_length": 137.0,
      "epoch": 2.6250728862973762,
      "grad_norm": 0.1863740086555481,
      "learning_rate": 1e-06,
      "loss": 0.0071,
      "num_tokens": 163658220.0,
      "reward": 0.6930803656578064,
      "reward_std": 0.1828383058309555,
      "rewards/verify_math_reward/mean": 0.6930803656578064,
      "rewards/verify_math_reward/std": 0.46147334575653076,
      "step": 281
    },
    {
      "clip_ratio/high_max": 0.0019721330536413006,
      "clip_ratio/high_mean": 0.000803927548986394,
      "clip_ratio/low_mean": 0.0003882240598613862,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001192151612485759,
      "completions/clipped_ratio": 0.024553571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3741.0,
      "completions/mean_length": 642.8482666015625,
      "completions/mean_terminated_length": 555.9267578125,
      "completions/min_length": 125.0,
      "completions/min_terminated_length": 125.0,
      "epoch": 2.6344023323615158,
      "grad_norm": 0.14842215180397034,
      "learning_rate": 1e-06,
      "loss": -0.0119,
      "num_tokens": 164228764.0,
      "reward": 0.6886160969734192,
      "reward_std": 0.16296431422233582,
      "rewards/verify_math_reward/mean": 0.6886160969734192,
      "rewards/verify_math_reward/std": 0.46331799030303955,
      "step": 282
    },
    {
      "clip_ratio/high_max": 0.0017780308335204609,
      "clip_ratio/high_mean": 0.0006858137767267181,
      "clip_ratio/low_mean": 0.00055878469174786,
      "clip_ratio/low_min": 8.270477337646298e-06,
      "clip_ratio/region_mean": 0.0012445984611986205,
      "completions/clipped_ratio": 0.0357142857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3752.0,
      "completions/mean_length": 672.7176513671875,
      "completions/mean_terminated_length": 545.9293823242188,
      "completions/min_length": 124.0,
      "completions/min_terminated_length": 124.0,
      "epoch": 2.643731778425656,
      "grad_norm": 0.15470071136951447,
      "learning_rate": 1e-06,
      "loss": 0.0023,
      "num_tokens": 164780959.0,
      "reward": 0.6049107313156128,
      "reward_std": 0.17442938685417175,
      "rewards/verify_math_reward/mean": 0.6049107313156128,
      "rewards/verify_math_reward/std": 0.48914292454719543,
      "step": 283
    },
    {
      "clip_ratio/high_max": 0.001965646730241133,
      "clip_ratio/high_mean": 0.0007492106942663668,
      "clip_ratio/low_mean": 0.0004284022634237772,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011776129540521652,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3432.0,
      "completions/mean_length": 590.1495971679688,
      "completions/mean_terminated_length": 526.4067993164062,
      "completions/min_length": 132.0,
      "completions/min_terminated_length": 132.0,
      "epoch": 2.6530612244897958,
      "grad_norm": 0.16078557074069977,
      "learning_rate": 1e-06,
      "loss": -0.0044,
      "num_tokens": 165339453.0,
      "reward": 0.6808035969734192,
      "reward_std": 0.1886282116174698,
      "rewards/verify_math_reward/mean": 0.6808035969734192,
      "rewards/verify_math_reward/std": 0.46642565727233887,
      "step": 284
    },
    {
      "clip_ratio/high_max": 0.0014745079060958233,
      "clip_ratio/high_mean": 0.0005221892333793221,
      "clip_ratio/low_mean": 0.0005899140305700712,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011121032730443403,
      "completions/clipped_ratio": 0.0267857142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3898.0,
      "completions/mean_length": 670.4542846679688,
      "completions/mean_terminated_length": 576.1731567382812,
      "completions/min_length": 139.0,
      "completions/min_terminated_length": 139.0,
      "epoch": 2.6623906705539357,
      "grad_norm": 0.14048659801483154,
      "learning_rate": 1e-06,
      "loss": -0.0048,
      "num_tokens": 165931444.0,
      "reward": 0.5837053656578064,
      "reward_std": 0.15379653871059418,
      "rewards/verify_math_reward/mean": 0.5837053656578064,
      "rewards/verify_math_reward/std": 0.49321892857551575,
      "step": 285
    },
    {
      "clip_ratio/high_max": 0.00173973134951666,
      "clip_ratio/high_mean": 0.0006114605039329035,
      "clip_ratio/low_mean": 0.00041383907682757126,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010252995743940119,
      "completions/clipped_ratio": 0.041294642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4040.0,
      "completions/mean_length": 787.6707763671875,
      "completions/mean_terminated_length": 645.1699829101562,
      "completions/min_length": 148.0,
      "completions/min_terminated_length": 148.0,
      "epoch": 2.6717201166180757,
      "grad_norm": 0.12932142615318298,
      "learning_rate": 1e-06,
      "loss": -0.0104,
      "num_tokens": 166574605.0,
      "reward": 0.5223214626312256,
      "reward_std": 0.1613806188106537,
      "rewards/verify_math_reward/mean": 0.5223214030265808,
      "rewards/verify_math_reward/std": 0.49978047609329224,
      "step": 286
    },
    {
      "clip_ratio/high_max": 0.0017241667810594663,
      "clip_ratio/high_mean": 0.0006313373833108926,
      "clip_ratio/low_mean": 0.00058198638180329,
      "clip_ratio/low_min": 3.329036189825274e-05,
      "clip_ratio/region_mean": 0.001213323776028119,
      "completions/clipped_ratio": 0.0424107142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3684.0,
      "completions/mean_length": 738.1261596679688,
      "completions/mean_terminated_length": 589.4091186523438,
      "completions/min_length": 159.0,
      "completions/min_terminated_length": 159.0,
      "epoch": 2.6810495626822157,
      "grad_norm": 0.166813924908638,
      "learning_rate": 1e-06,
      "loss": -0.0005,
      "num_tokens": 167174934.0,
      "reward": 0.5368303656578064,
      "reward_std": 0.1808943897485733,
      "rewards/verify_math_reward/mean": 0.5368303656578064,
      "rewards/verify_math_reward/std": 0.49892017245292664,
      "step": 287
    },
    {
      "clip_ratio/high_max": 0.0014300073598860763,
      "clip_ratio/high_mean": 0.0005297455390973482,
      "clip_ratio/low_mean": 0.00047350670047308085,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010032522441179026,
      "completions/clipped_ratio": 0.0279017857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3678.0,
      "completions/mean_length": 697.0670166015625,
      "completions/mean_terminated_length": 599.5086059570312,
      "completions/min_length": 168.0,
      "completions/min_terminated_length": 168.0,
      "epoch": 2.6903790087463557,
      "grad_norm": 0.14425304532051086,
      "learning_rate": 1e-06,
      "loss": 0.0048,
      "num_tokens": 167781954.0,
      "reward": 0.5848214626312256,
      "reward_std": 0.16506868600845337,
      "rewards/verify_math_reward/mean": 0.5848214030265808,
      "rewards/verify_math_reward/std": 0.49302801489830017,
      "step": 288
    },
    {
      "clip_ratio/high_max": 0.0015350020094047068,
      "clip_ratio/high_mean": 0.0006349265963763173,
      "clip_ratio/low_mean": 0.0006083126745579648,
      "clip_ratio/low_min": 1.2398333637975156e-05,
      "clip_ratio/region_mean": 0.0012432392904884182,
      "completions/clipped_ratio": 0.0345982142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4063.0,
      "completions/mean_length": 700.9699096679688,
      "completions/mean_terminated_length": 579.2982788085938,
      "completions/min_length": 134.0,
      "completions/min_terminated_length": 134.0,
      "epoch": 2.6997084548104957,
      "grad_norm": 0.16011632978916168,
      "learning_rate": 1e-06,
      "loss": -0.0011,
      "num_tokens": 168367335.0,
      "reward": 0.5580357313156128,
      "reward_std": 0.19779597222805023,
      "rewards/verify_math_reward/mean": 0.5580357313156128,
      "rewards/verify_math_reward/std": 0.49689781665802,
      "step": 289
    },
    {
      "clip_ratio/high_max": 0.0021332379037630744,
      "clip_ratio/high_mean": 0.0007598556130687939,
      "clip_ratio/low_mean": 0.000539392859536747,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012992484953429084,
      "completions/clipped_ratio": 0.025669642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4015.0,
      "completions/mean_length": 646.318115234375,
      "completions/mean_terminated_length": 555.4330444335938,
      "completions/min_length": 113.0,
      "completions/min_terminated_length": 113.0,
      "epoch": 2.7090379008746357,
      "grad_norm": 0.15972739458084106,
      "learning_rate": 1e-06,
      "loss": -0.004,
      "num_tokens": 168939676.0,
      "reward": 0.6517857313156128,
      "reward_std": 0.1827288419008255,
      "rewards/verify_math_reward/mean": 0.6517857313156128,
      "rewards/verify_math_reward/std": 0.47667041420936584,
      "step": 290
    },
    {
      "clip_ratio/high_max": 0.0018446759895596188,
      "clip_ratio/high_mean": 0.0005988040938973427,
      "clip_ratio/low_mean": 0.0005522559067685506,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011510599761095364,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3810.0,
      "completions/mean_length": 629.6127319335938,
      "completions/mean_terminated_length": 570.5936889648438,
      "completions/min_length": 155.0,
      "completions/min_terminated_length": 155.0,
      "epoch": 2.7183673469387752,
      "grad_norm": 0.1624128669500351,
      "learning_rate": 1e-06,
      "loss": -0.0033,
      "num_tokens": 169532665.0,
      "reward": 0.6160714626312256,
      "reward_std": 0.16532830893993378,
      "rewards/verify_math_reward/mean": 0.6160714030265808,
      "rewards/verify_math_reward/std": 0.486612468957901,
      "step": 291
    },
    {
      "clip_ratio/high_max": 0.0020158386978437193,
      "clip_ratio/high_mean": 0.000681201014231192,
      "clip_ratio/low_mean": 0.0006710060661134776,
      "clip_ratio/low_min": 1.8312335669179447e-05,
      "clip_ratio/region_mean": 0.001352207091258606,
      "completions/clipped_ratio": 0.0502232142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3901.0,
      "completions/mean_length": 773.9598388671875,
      "completions/mean_terminated_length": 598.2937622070312,
      "completions/min_length": 159.0,
      "completions/min_terminated_length": 159.0,
      "epoch": 2.7276967930029157,
      "grad_norm": 0.16120792925357819,
      "learning_rate": 1e-06,
      "loss": -0.0105,
      "num_tokens": 170144181.0,
      "reward": 0.5078125,
      "reward_std": 0.19756634533405304,
      "rewards/verify_math_reward/mean": 0.5078125,
      "rewards/verify_math_reward/std": 0.5002182126045227,
      "step": 292
    },
    {
      "clip_ratio/high_max": 0.0020143109359196387,
      "clip_ratio/high_mean": 0.0007559222949566902,
      "clip_ratio/low_mean": 0.0005012179080949863,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.00125714020759915,
      "completions/clipped_ratio": 0.0424107142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3833.0,
      "completions/mean_length": 780.6239013671875,
      "completions/mean_terminated_length": 633.7890625,
      "completions/min_length": 152.0,
      "completions/min_terminated_length": 152.0,
      "epoch": 2.7370262390670552,
      "grad_norm": 0.16876362264156342,
      "learning_rate": 1e-06,
      "loss": -0.0193,
      "num_tokens": 170778932.0,
      "reward": 0.590401828289032,
      "reward_std": 0.18840500712394714,
      "rewards/verify_math_reward/mean": 0.5904017686843872,
      "rewards/verify_math_reward/std": 0.49203425645828247,
      "step": 293
    },
    {
      "clip_ratio/high_max": 0.0017424338420823915,
      "clip_ratio/high_mean": 0.0006393464655047865,
      "clip_ratio/low_mean": 0.0004578434029554046,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010971898809657432,
      "completions/clipped_ratio": 0.0390625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3937.0,
      "completions/mean_length": 700.6261596679688,
      "completions/mean_terminated_length": 562.602783203125,
      "completions/min_length": 151.0,
      "completions/min_terminated_length": 151.0,
      "epoch": 2.746355685131195,
      "grad_norm": 0.1508655995130539,
      "learning_rate": 1e-06,
      "loss": 0.0003,
      "num_tokens": 171364165.0,
      "reward": 0.5703125,
      "reward_std": 0.15187835693359375,
      "rewards/verify_math_reward/mean": 0.5703125,
      "rewards/verify_math_reward/std": 0.49530795216560364,
      "step": 294
    },
    {
      "clip_ratio/high_max": 0.0016017416528484318,
      "clip_ratio/high_mean": 0.0006121256719779922,
      "clip_ratio/low_mean": 0.0004792724794242531,
      "clip_ratio/low_min": 1.2361550943751354e-05,
      "clip_ratio/region_mean": 0.0010913981568592135,
      "completions/clipped_ratio": 0.044642857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3830.0,
      "completions/mean_length": 727.005615234375,
      "completions/mean_terminated_length": 569.575927734375,
      "completions/min_length": 155.0,
      "completions/min_terminated_length": 155.0,
      "epoch": 2.755685131195335,
      "grad_norm": 0.15585237741470337,
      "learning_rate": 1e-06,
      "loss": 0.0025,
      "num_tokens": 171937642.0,
      "reward": 0.6395089626312256,
      "reward_std": 0.16476628184318542,
      "rewards/verify_math_reward/mean": 0.6395089030265808,
      "rewards/verify_math_reward/std": 0.4804111421108246,
      "step": 295
    },
    {
      "clip_ratio/high_max": 0.0018939640758617315,
      "clip_ratio/high_mean": 0.0007643671979167266,
      "clip_ratio/low_mean": 0.0005092514757052413,
      "clip_ratio/low_min": 1.817917473090347e-05,
      "clip_ratio/region_mean": 0.001273618639970664,
      "completions/clipped_ratio": 0.0279017857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3962.0,
      "completions/mean_length": 675.0078125,
      "completions/mean_terminated_length": 576.8162841796875,
      "completions/min_length": 164.0,
      "completions/min_terminated_length": 164.0,
      "epoch": 2.765014577259475,
      "grad_norm": 0.1620856523513794,
      "learning_rate": 1e-06,
      "loss": -0.0026,
      "num_tokens": 172528713.0,
      "reward": 0.629464328289032,
      "reward_std": 0.17078480124473572,
      "rewards/verify_math_reward/mean": 0.6294642686843872,
      "rewards/verify_math_reward/std": 0.4832179844379425,
      "step": 296
    },
    {
      "clip_ratio/high_max": 0.0016967893825494684,
      "clip_ratio/high_mean": 0.0006355356690619374,
      "clip_ratio/low_mean": 0.0006271112733884365,
      "clip_ratio/low_min": 3.977091910201125e-05,
      "clip_ratio/region_mean": 0.0012626469506358262,
      "completions/clipped_ratio": 0.0267857142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4054.0,
      "completions/mean_length": 664.739990234375,
      "completions/mean_terminated_length": 570.3015747070312,
      "completions/min_length": 139.0,
      "completions/min_terminated_length": 139.0,
      "epoch": 2.774344023323615,
      "grad_norm": 0.2294016033411026,
      "learning_rate": 1e-06,
      "loss": 0.0055,
      "num_tokens": 173114304.0,
      "reward": 0.5446428656578064,
      "reward_std": 0.1891920119524002,
      "rewards/verify_math_reward/mean": 0.5446428656578064,
      "rewards/verify_math_reward/std": 0.4982811510562897,
      "step": 297
    },
    {
      "clip_ratio/high_max": 0.0016081339344964363,
      "clip_ratio/high_mean": 0.0005451015913422452,
      "clip_ratio/low_mean": 0.000597168831518502,
      "clip_ratio/low_min": 1.990445889532566e-05,
      "clip_ratio/region_mean": 0.0011422704192227684,
      "completions/clipped_ratio": 0.0279017857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3131.0,
      "completions/mean_length": 673.5926513671875,
      "completions/mean_terminated_length": 575.3604736328125,
      "completions/min_length": 144.0,
      "completions/min_terminated_length": 144.0,
      "epoch": 2.783673469387755,
      "grad_norm": 0.1499912440776825,
      "learning_rate": 1e-06,
      "loss": 0.0057,
      "num_tokens": 173707715.0,
      "reward": 0.5814732313156128,
      "reward_std": 0.16427773237228394,
      "rewards/verify_math_reward/mean": 0.5814732313156128,
      "rewards/verify_math_reward/std": 0.4935929775238037,
      "step": 298
    },
    {
      "clip_ratio/high_max": 0.0017524625054647913,
      "clip_ratio/high_mean": 0.0006328346871669055,
      "clip_ratio/low_mean": 0.0006639741823164513,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012968088776688091,
      "completions/clipped_ratio": 0.0345982142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3969.0,
      "completions/mean_length": 682.2890625,
      "completions/mean_terminated_length": 559.9479370117188,
      "completions/min_length": 157.0,
      "completions/min_terminated_length": 157.0,
      "epoch": 2.793002915451895,
      "grad_norm": 0.1889982670545578,
      "learning_rate": 1e-06,
      "loss": -0.0053,
      "num_tokens": 174279902.0,
      "reward": 0.5703125,
      "reward_std": 0.18201345205307007,
      "rewards/verify_math_reward/mean": 0.5703125,
      "rewards/verify_math_reward/std": 0.49530795216560364,
      "step": 299
    },
    {
      "clip_ratio/high_max": 0.0019045324734179303,
      "clip_ratio/high_mean": 0.0007080908726493362,
      "clip_ratio/low_mean": 0.0005070006645837566,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012150915536039975,
      "completions/clipped_ratio": 0.0435267857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4048.0,
      "completions/mean_length": 763.2980346679688,
      "completions/mean_terminated_length": 611.634765625,
      "completions/min_length": 155.0,
      "completions/min_terminated_length": 155.0,
      "epoch": 2.8023323615160347,
      "grad_norm": 0.1669740378856659,
      "learning_rate": 1e-06,
      "loss": 0.0093,
      "num_tokens": 174888033.0,
      "reward": 0.6149553656578064,
      "reward_std": 0.176642507314682,
      "rewards/verify_math_reward/mean": 0.6149553656578064,
      "rewards/verify_math_reward/std": 0.4868776500225067,
      "step": 300
    },
    {
      "clip_ratio/high_max": 0.001616864505194826,
      "clip_ratio/high_mean": 0.0006531047092721565,
      "clip_ratio/low_mean": 0.0003469724724709522,
      "clip_ratio/low_min": 2.8788466806872748e-05,
      "clip_ratio/region_mean": 0.0010000771762861405,
      "completions/clipped_ratio": 0.041294642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2476.0,
      "completions/mean_length": 690.1451416015625,
      "completions/mean_terminated_length": 543.4435424804688,
      "completions/min_length": 152.0,
      "completions/min_terminated_length": 152.0,
      "epoch": 2.811661807580175,
      "grad_norm": 0.14893873035907745,
      "learning_rate": 1e-06,
      "loss": -0.0137,
      "num_tokens": 175435499.0,
      "reward": 0.6049107313156128,
      "reward_std": 0.15582673251628876,
      "rewards/verify_math_reward/mean": 0.6049107313156128,
      "rewards/verify_math_reward/std": 0.48914292454719543,
      "step": 301
    },
    {
      "clip_ratio/high_max": 0.0017115616665250855,
      "clip_ratio/high_mean": 0.0006654281801274919,
      "clip_ratio/low_mean": 0.0005392684470280074,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001204696638524183,
      "completions/clipped_ratio": 0.036830357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4048.0,
      "completions/mean_length": 718.4710083007812,
      "completions/mean_terminated_length": 589.3186645507812,
      "completions/min_length": 145.0,
      "completions/min_terminated_length": 145.0,
      "epoch": 2.8209912536443147,
      "grad_norm": 0.17098160088062286,
      "learning_rate": 1e-06,
      "loss": -0.0095,
      "num_tokens": 176027769.0,
      "reward": 0.5680803656578064,
      "reward_std": 0.17430992424488068,
      "rewards/verify_math_reward/mean": 0.5680803656578064,
      "rewards/verify_math_reward/std": 0.4956200420856476,
      "step": 302
    },
    {
      "clip_ratio/high_max": 0.0019442137636360712,
      "clip_ratio/high_mean": 0.0007267302898981143,
      "clip_ratio/low_mean": 0.0005410526910054614,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012677829945459962,
      "completions/clipped_ratio": 0.0401785714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3773.0,
      "completions/mean_length": 777.5413208007812,
      "completions/mean_terminated_length": 638.6290893554688,
      "completions/min_length": 141.0,
      "completions/min_terminated_length": 141.0,
      "epoch": 2.8303206997084547,
      "grad_norm": 0.16408035159111023,
      "learning_rate": 1e-06,
      "loss": 0.0028,
      "num_tokens": 176666854.0,
      "reward": 0.5602678656578064,
      "reward_std": 0.18870487809181213,
      "rewards/verify_math_reward/mean": 0.5602678656578064,
      "rewards/verify_math_reward/std": 0.4966317415237427,
      "step": 303
    },
    {
      "clip_ratio/high_max": 0.00199945454733097,
      "clip_ratio/high_mean": 0.0007962360687088221,
      "clip_ratio/low_mean": 0.0006074344128137454,
      "clip_ratio/low_min": 3.534127972670831e-05,
      "clip_ratio/region_mean": 0.0014036704669706523,
      "completions/clipped_ratio": 0.0323660714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3009.0,
      "completions/mean_length": 670.0223388671875,
      "completions/mean_terminated_length": 555.4279174804688,
      "completions/min_length": 112.0,
      "completions/min_terminated_length": 112.0,
      "epoch": 2.8396501457725947,
      "grad_norm": 5.749591827392578,
      "learning_rate": 1e-06,
      "loss": -0.007,
      "num_tokens": 177239434.0,
      "reward": 0.6328125,
      "reward_std": 0.18847663700580597,
      "rewards/verify_math_reward/mean": 0.6328125,
      "rewards/verify_math_reward/std": 0.48230743408203125,
      "step": 304
    },
    {
      "clip_ratio/high_max": 0.0016806647399789654,
      "clip_ratio/high_mean": 0.0006731945659339544,
      "clip_ratio/low_mean": 0.0004962061566402554,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011694007080222946,
      "completions/clipped_ratio": 0.046875,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3892.0,
      "completions/mean_length": 741.9832763671875,
      "completions/mean_terminated_length": 577.0316162109375,
      "completions/min_length": 172.0,
      "completions/min_terminated_length": 172.0,
      "epoch": 2.8489795918367347,
      "grad_norm": 0.2282479852437973,
      "learning_rate": 1e-06,
      "loss": 0.0125,
      "num_tokens": 177831483.0,
      "reward": 0.5859375,
      "reward_std": 0.17882534861564636,
      "rewards/verify_math_reward/mean": 0.5859375,
      "rewards/verify_math_reward/std": 0.4928344786167145,
      "step": 305
    },
    {
      "clip_ratio/high_max": 0.001882583059341414,
      "clip_ratio/high_mean": 0.0007573571419925429,
      "clip_ratio/low_mean": 0.0005209851324252668,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012783422935171984,
      "completions/clipped_ratio": 0.041294642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1929.0,
      "completions/mean_length": 687.8471069335938,
      "completions/mean_terminated_length": 541.0465698242188,
      "completions/min_length": 164.0,
      "completions/min_terminated_length": 164.0,
      "epoch": 2.8583090379008746,
      "grad_norm": 0.17052477598190308,
      "learning_rate": 1e-06,
      "loss": -0.0111,
      "num_tokens": 178379378.0,
      "reward": 0.6261160969734192,
      "reward_std": 0.17746736109256744,
      "rewards/verify_math_reward/mean": 0.6261160969734192,
      "rewards/verify_math_reward/std": 0.48410359025001526,
      "step": 306
    },
    {
      "clip_ratio/high_max": 0.0017728673519741278,
      "clip_ratio/high_mean": 0.0006252272169149364,
      "clip_ratio/low_mean": 0.0006276568656176096,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001252884067071136,
      "completions/clipped_ratio": 0.036830357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3863.0,
      "completions/mean_length": 691.1953735351562,
      "completions/mean_terminated_length": 561.0,
      "completions/min_length": 120.0,
      "completions/min_terminated_length": 120.0,
      "epoch": 2.8676384839650146,
      "grad_norm": 0.16564425826072693,
      "learning_rate": 1e-06,
      "loss": -0.0029,
      "num_tokens": 178947081.0,
      "reward": 0.5636160969734192,
      "reward_std": 0.16330061852931976,
      "rewards/verify_math_reward/mean": 0.5636160969734192,
      "rewards/verify_math_reward/std": 0.49621346592903137,
      "step": 307
    },
    {
      "clip_ratio/high_max": 0.001929381880472647,
      "clip_ratio/high_mean": 0.0007164663511503022,
      "clip_ratio/low_mean": 0.0004968561765963386,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001213322513649473,
      "completions/clipped_ratio": 0.0424107142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3813.0,
      "completions/mean_length": 705.3750610351562,
      "completions/mean_terminated_length": 555.2074584960938,
      "completions/min_length": 162.0,
      "completions/min_terminated_length": 162.0,
      "epoch": 2.8769679300291546,
      "grad_norm": 0.17670652270317078,
      "learning_rate": 1e-06,
      "loss": -0.0087,
      "num_tokens": 179510257.0,
      "reward": 0.613839328289032,
      "reward_std": 0.15293031930923462,
      "rewards/verify_math_reward/mean": 0.6138392686843872,
      "rewards/verify_math_reward/std": 0.48714008927345276,
      "step": 308
    },
    {
      "clip_ratio/high_max": 0.0018446204776410013,
      "clip_ratio/high_mean": 0.0006679304315184709,
      "clip_ratio/low_mean": 0.0005754844419243454,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012434148538886802,
      "completions/clipped_ratio": 0.030133928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3943.0,
      "completions/mean_length": 654.8170166015625,
      "completions/mean_terminated_length": 547.8987426757812,
      "completions/min_length": 139.0,
      "completions/min_terminated_length": 139.0,
      "epoch": 2.8862973760932946,
      "grad_norm": 0.1854640245437622,
      "learning_rate": 1e-06,
      "loss": -0.0167,
      "num_tokens": 180073517.0,
      "reward": 0.5558035969734192,
      "reward_std": 0.18201416730880737,
      "rewards/verify_math_reward/mean": 0.5558035969734192,
      "rewards/verify_math_reward/std": 0.49715372920036316,
      "step": 309
    },
    {
      "clip_ratio/high_max": 0.0014908744633430615,
      "clip_ratio/high_mean": 0.0006075170895201154,
      "clip_ratio/low_mean": 0.0005944760378042702,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012019931309623644,
      "completions/clipped_ratio": 0.025669642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3694.0,
      "completions/mean_length": 691.9676513671875,
      "completions/mean_terminated_length": 602.2852783203125,
      "completions/min_length": 138.0,
      "completions/min_terminated_length": 138.0,
      "epoch": 2.8956268221574346,
      "grad_norm": 0.1460346132516861,
      "learning_rate": 1e-06,
      "loss": 0.0102,
      "num_tokens": 180684648.0,
      "reward": 0.5602678656578064,
      "reward_std": 0.17697951197624207,
      "rewards/verify_math_reward/mean": 0.5602678656578064,
      "rewards/verify_math_reward/std": 0.4966317415237427,
      "step": 310
    },
    {
      "clip_ratio/high_max": 0.0016144792025443166,
      "clip_ratio/high_mean": 0.0005706704141630325,
      "clip_ratio/low_mean": 0.00040935545212050783,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009800258740142453,
      "completions/clipped_ratio": 0.033482142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3947.0,
      "completions/mean_length": 660.5480346679688,
      "completions/mean_terminated_length": 541.5369262695312,
      "completions/min_length": 154.0,
      "completions/min_terminated_length": 154.0,
      "epoch": 2.904956268221574,
      "grad_norm": 0.14667034149169922,
      "learning_rate": 1e-06,
      "loss": -0.0159,
      "num_tokens": 181252803.0,
      "reward": 0.6495535969734192,
      "reward_std": 0.14507704973220825,
      "rewards/verify_math_reward/mean": 0.6495535969734192,
      "rewards/verify_math_reward/std": 0.477376252412796,
      "step": 311
    },
    {
      "clip_ratio/high_max": 0.00180345787157421,
      "clip_ratio/high_mean": 0.0005535933951250627,
      "clip_ratio/low_mean": 0.000350333871665498,
      "clip_ratio/low_min": 1.0835645298357122e-05,
      "clip_ratio/region_mean": 0.0009039272681548027,
      "completions/clipped_ratio": 0.0279017857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4082.0,
      "completions/mean_length": 665.2545166015625,
      "completions/mean_terminated_length": 566.7830200195312,
      "completions/min_length": 152.0,
      "completions/min_terminated_length": 152.0,
      "epoch": 2.914285714285714,
      "grad_norm": 0.14752334356307983,
      "learning_rate": 1e-06,
      "loss": 0.001,
      "num_tokens": 181836815.0,
      "reward": 0.6350446939468384,
      "reward_std": 0.13388299942016602,
      "rewards/verify_math_reward/mean": 0.6350446343421936,
      "rewards/verify_math_reward/std": 0.481686532497406,
      "step": 312
    },
    {
      "clip_ratio/high_max": 0.0016972209341474809,
      "clip_ratio/high_mean": 0.0006843325045338133,
      "clip_ratio/low_mean": 0.00048033437133199186,
      "clip_ratio/low_min": 1.7740561816026457e-05,
      "clip_ratio/region_mean": 0.0011646668936009519,
      "completions/clipped_ratio": 0.0546875,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4025.0,
      "completions/mean_length": 795.1808471679688,
      "completions/mean_terminated_length": 604.2243041992188,
      "completions/min_length": 128.0,
      "completions/min_terminated_length": 128.0,
      "epoch": 2.923615160349854,
      "grad_norm": 0.14472293853759766,
      "learning_rate": 1e-06,
      "loss": -0.0274,
      "num_tokens": 182442249.0,
      "reward": 0.5546875,
      "reward_std": 0.169460728764534,
      "rewards/verify_math_reward/mean": 0.5546875,
      "rewards/verify_math_reward/std": 0.4972778558731079,
      "step": 313
    },
    {
      "clip_ratio/high_max": 0.002174861918319948,
      "clip_ratio/high_mean": 0.0008585118412156589,
      "clip_ratio/low_mean": 0.0007678492438571993,
      "clip_ratio/low_min": 6.706338263029465e-05,
      "clip_ratio/region_mean": 0.0016263610741589218,
      "completions/clipped_ratio": 0.0379464285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3953.0,
      "completions/mean_length": 731.5000610351562,
      "completions/mean_terminated_length": 598.79345703125,
      "completions/min_length": 163.0,
      "completions/min_terminated_length": 163.0,
      "epoch": 2.932944606413994,
      "grad_norm": 0.19401288032531738,
      "learning_rate": 1e-06,
      "loss": 0.0004,
      "num_tokens": 183047881.0,
      "reward": 0.5535714626312256,
      "reward_std": 0.2113219052553177,
      "rewards/verify_math_reward/mean": 0.5535714030265808,
      "rewards/verify_math_reward/std": 0.4973995089530945,
      "step": 314
    },
    {
      "clip_ratio/high_max": 0.0018430207128403708,
      "clip_ratio/high_mean": 0.0007873630984249758,
      "clip_ratio/low_mean": 0.0005917095313634491,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001379072607960552,
      "completions/clipped_ratio": 0.0435267857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3411.0,
      "completions/mean_length": 711.974365234375,
      "completions/mean_terminated_length": 557.9755249023438,
      "completions/min_length": 118.0,
      "completions/min_terminated_length": 118.0,
      "epoch": 2.942274052478134,
      "grad_norm": 0.16679641604423523,
      "learning_rate": 1e-06,
      "loss": -0.0196,
      "num_tokens": 183620778.0,
      "reward": 0.5703125,
      "reward_std": 0.18329225480556488,
      "rewards/verify_math_reward/mean": 0.5703125,
      "rewards/verify_math_reward/std": 0.49530795216560364,
      "step": 315
    },
    {
      "clip_ratio/high_max": 0.0018565994505479466,
      "clip_ratio/high_mean": 0.000691774314873328,
      "clip_ratio/low_mean": 0.0005710573195756297,
      "clip_ratio/low_min": 4.674640695156995e-05,
      "clip_ratio/region_mean": 0.0012628316508198623,
      "completions/clipped_ratio": 0.021205357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2706.0,
      "completions/mean_length": 626.6138916015625,
      "completions/mean_terminated_length": 551.4503784179688,
      "completions/min_length": 176.0,
      "completions/min_terminated_length": 176.0,
      "epoch": 2.951603498542274,
      "grad_norm": 0.17913031578063965,
      "learning_rate": 1e-06,
      "loss": -0.0041,
      "num_tokens": 184199472.0,
      "reward": 0.6339285969734192,
      "reward_std": 0.18355371057987213,
      "rewards/verify_math_reward/mean": 0.6339285969734192,
      "rewards/verify_math_reward/std": 0.48199835419654846,
      "step": 316
    },
    {
      "clip_ratio/high_max": 0.0019007811752089765,
      "clip_ratio/high_mean": 0.0007266173579409951,
      "clip_ratio/low_mean": 0.0006725180865032598,
      "clip_ratio/low_min": 3.9150353586592246e-05,
      "clip_ratio/region_mean": 0.0013991354608151596,
      "completions/clipped_ratio": 0.0357142857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3775.0,
      "completions/mean_length": 740.2120971679688,
      "completions/mean_terminated_length": 615.9236450195312,
      "completions/min_length": 129.0,
      "completions/min_terminated_length": 129.0,
      "epoch": 2.960932944606414,
      "grad_norm": 0.17382897436618805,
      "learning_rate": 1e-06,
      "loss": 0.0038,
      "num_tokens": 184818942.0,
      "reward": 0.5636160969734192,
      "reward_std": 0.22244179248809814,
      "rewards/verify_math_reward/mean": 0.5636160969734192,
      "rewards/verify_math_reward/std": 0.49621346592903137,
      "step": 317
    },
    {
      "clip_ratio/high_max": 0.0022227749395824503,
      "clip_ratio/high_mean": 0.0008015793300728546,
      "clip_ratio/low_mean": 0.0006286112911766395,
      "clip_ratio/low_min": 1.3640331417263951e-05,
      "clip_ratio/region_mean": 0.0014301905903266743,
      "completions/clipped_ratio": 0.024553571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2776.0,
      "completions/mean_length": 617.2410888671875,
      "completions/mean_terminated_length": 529.675048828125,
      "completions/min_length": 148.0,
      "completions/min_terminated_length": 148.0,
      "epoch": 2.970262390670554,
      "grad_norm": 0.16559240221977234,
      "learning_rate": 1e-06,
      "loss": -0.0096,
      "num_tokens": 185371030.0,
      "reward": 0.645089328289032,
      "reward_std": 0.17701905965805054,
      "rewards/verify_math_reward/mean": 0.6450892686843872,
      "rewards/verify_math_reward/std": 0.4787535071372986,
      "step": 318
    },
    {
      "clip_ratio/high_max": 0.0020268753651180305,
      "clip_ratio/high_mean": 0.0007223895572678884,
      "clip_ratio/low_mean": 0.0005586875458902796,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001281077100429684,
      "completions/clipped_ratio": 0.036830357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3976.0,
      "completions/mean_length": 707.771240234375,
      "completions/mean_terminated_length": 578.209716796875,
      "completions/min_length": 166.0,
      "completions/min_terminated_length": 166.0,
      "epoch": 2.979591836734694,
      "grad_norm": 0.17613056302070618,
      "learning_rate": 1e-06,
      "loss": 0.0188,
      "num_tokens": 185959809.0,
      "reward": 0.6015625,
      "reward_std": 0.18498292565345764,
      "rewards/verify_math_reward/mean": 0.6015625,
      "rewards/verify_math_reward/std": 0.48984986543655396,
      "step": 319
    },
    {
      "clip_ratio/high_max": 0.0018941805028589442,
      "clip_ratio/high_mean": 0.0007335241207329091,
      "clip_ratio/low_mean": 0.0004958659656040254,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012293900945223868,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4043.0,
      "completions/mean_length": 662.2310791015625,
      "completions/mean_terminated_length": 583.83447265625,
      "completions/min_length": 144.0,
      "completions/min_terminated_length": 144.0,
      "epoch": 2.9889212827988336,
      "grad_norm": 0.15626439452171326,
      "learning_rate": 1e-06,
      "loss": 0.0036,
      "num_tokens": 186567952.0,
      "reward": 0.6584821939468384,
      "reward_std": 0.1782250851392746,
      "rewards/verify_math_reward/mean": 0.6584821343421936,
      "rewards/verify_math_reward/std": 0.4744836091995239,
      "step": 320
    },
    {
      "clip_ratio/high_max": 0.0019759551250899676,
      "clip_ratio/high_mean": 0.0006801464633099386,
      "clip_ratio/low_mean": 0.00046020287391002057,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011403493299440015,
      "completions/clipped_ratio": 0.05965909090909094,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2948.0,
      "completions/mean_length": 757.54833984375,
      "completions/mean_terminated_length": 545.7432250976562,
      "completions/min_length": 112.0,
      "completions/min_terminated_length": 112.0,
      "epoch": 2.9982507288629736,
      "grad_norm": 0.15620289742946625,
      "learning_rate": 1e-06,
      "loss": -0.0242,
      "num_tokens": 187144377.0,
      "reward": 0.6183035969734192,
      "reward_std": 0.14376364648342133,
      "rewards/verify_math_reward/mean": 0.6183035969734192,
      "rewards/verify_math_reward/std": 0.4860740303993225,
      "step": 321
    },
    {
      "clip_ratio/high_max": 0.001879252122307662,
      "clip_ratio/high_mean": 0.0007739460415905342,
      "clip_ratio/low_mean": 0.0005718898901250213,
      "clip_ratio/low_min": 1.1040452591259964e-05,
      "clip_ratio/region_mean": 0.0013458359462674707,
      "completions/clipped_ratio": 0.0658482142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3783.0,
      "completions/mean_length": 873.4129638671875,
      "completions/mean_terminated_length": 646.2532348632812,
      "completions/min_length": 117.0,
      "completions/min_terminated_length": 117.0,
      "epoch": 3.00932944606414,
      "grad_norm": 0.14187264442443848,
      "learning_rate": 1e-06,
      "loss": -0.0411,
      "num_tokens": 187775579.0,
      "reward": 0.5189732313156128,
      "reward_std": 0.19692833721637726,
      "rewards/verify_math_reward/mean": 0.5189732313156128,
      "rewards/verify_math_reward/std": 0.49991893768310547,
      "step": 322
    },
    {
      "clip_ratio/high_max": 0.0018968095246236771,
      "clip_ratio/high_mean": 0.0007414832389258663,
      "clip_ratio/low_mean": 0.0005251816055533709,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012666648508457001,
      "completions/clipped_ratio": 0.0424107142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3351.0,
      "completions/mean_length": 741.9219360351562,
      "completions/mean_terminated_length": 593.3729858398438,
      "completions/min_length": 89.0,
      "completions/min_terminated_length": 89.0,
      "epoch": 3.01865889212828,
      "grad_norm": 0.170277401804924,
      "learning_rate": 1e-06,
      "loss": -0.0171,
      "num_tokens": 188367189.0,
      "reward": 0.6227678656578064,
      "reward_std": 0.17833498120307922,
      "rewards/verify_math_reward/mean": 0.6227678656578064,
      "rewards/verify_math_reward/std": 0.4849644899368286,
      "step": 323
    },
    {
      "clip_ratio/high_max": 0.001739174982503755,
      "clip_ratio/high_mean": 0.0005863178848812822,
      "clip_ratio/low_mean": 0.00048157983019336825,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010678976977942511,
      "completions/clipped_ratio": 0.0479910714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3532.0,
      "completions/mean_length": 733.2902221679688,
      "completions/mean_terminated_length": 563.77490234375,
      "completions/min_length": 151.0,
      "completions/min_terminated_length": 151.0,
      "epoch": 3.02798833819242,
      "grad_norm": 0.15343502163887024,
      "learning_rate": 1e-06,
      "loss": -0.0118,
      "num_tokens": 188937753.0,
      "reward": 0.5837053656578064,
      "reward_std": 0.16179178655147552,
      "rewards/verify_math_reward/mean": 0.5837053656578064,
      "rewards/verify_math_reward/std": 0.49321892857551575,
      "step": 324
    },
    {
      "clip_ratio/high_max": 0.0017487070144852623,
      "clip_ratio/high_mean": 0.0006142963193269679,
      "clip_ratio/low_mean": 0.0005022132290832815,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011165095456817653,
      "completions/clipped_ratio": 0.029017857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3831.0,
      "completions/mean_length": 664.40625,
      "completions/mean_terminated_length": 561.8528442382812,
      "completions/min_length": 124.0,
      "completions/min_terminated_length": 124.0,
      "epoch": 3.03731778425656,
      "grad_norm": 0.16153734922409058,
      "learning_rate": 1e-06,
      "loss": -0.018,
      "num_tokens": 189517157.0,
      "reward": 0.6205357313156128,
      "reward_std": 0.15932045876979828,
      "rewards/verify_math_reward/mean": 0.6205357313156128,
      "rewards/verify_math_reward/std": 0.4855247139930725,
      "step": 325
    },
    {
      "clip_ratio/high_max": 0.0015828797986614518,
      "clip_ratio/high_mean": 0.0005282797174004372,
      "clip_ratio/low_mean": 0.00034335495331561106,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008716346837900346,
      "completions/clipped_ratio": 0.0379464285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2692.0,
      "completions/mean_length": 670.6138916015625,
      "completions/mean_terminated_length": 535.5057983398438,
      "completions/min_length": 152.0,
      "completions/min_terminated_length": 152.0,
      "epoch": 3.0466472303206995,
      "grad_norm": 0.13909634947776794,
      "learning_rate": 1e-06,
      "loss": -0.0206,
      "num_tokens": 190059027.0,
      "reward": 0.6361607313156128,
      "reward_std": 0.12869539856910706,
      "rewards/verify_math_reward/mean": 0.6361607313156128,
      "rewards/verify_math_reward/std": 0.4813718795776367,
      "step": 326
    },
    {
      "clip_ratio/high_max": 0.001953350627445616,
      "clip_ratio/high_mean": 0.0006827437955507776,
      "clip_ratio/low_mean": 0.0005384850701375399,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012212288565933704,
      "completions/clipped_ratio": 0.0479910714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3505.0,
      "completions/mean_length": 766.1797485351562,
      "completions/mean_terminated_length": 598.3223876953125,
      "completions/min_length": 137.0,
      "completions/min_terminated_length": 137.0,
      "epoch": 3.0559766763848395,
      "grad_norm": 0.16334910690784454,
      "learning_rate": 1e-06,
      "loss": -0.0051,
      "num_tokens": 190661548.0,
      "reward": 0.59375,
      "reward_std": 0.17979811131954193,
      "rewards/verify_math_reward/mean": 0.59375,
      "rewards/verify_math_reward/std": 0.4914066195487976,
      "step": 327
    },
    {
      "clip_ratio/high_max": 0.0017032836185535416,
      "clip_ratio/high_mean": 0.0006889922497066436,
      "clip_ratio/low_mean": 0.0005080533292129985,
      "clip_ratio/low_min": 1.1794678357546218e-05,
      "clip_ratio/region_mean": 0.001197045603475999,
      "completions/clipped_ratio": 0.0345982142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3907.0,
      "completions/mean_length": 688.1049194335938,
      "completions/mean_terminated_length": 565.9722290039062,
      "completions/min_length": 159.0,
      "completions/min_terminated_length": 159.0,
      "epoch": 3.0653061224489795,
      "grad_norm": 0.19825489819049835,
      "learning_rate": 1e-06,
      "loss": -0.0169,
      "num_tokens": 191243866.0,
      "reward": 0.6205357313156128,
      "reward_std": 0.1688220202922821,
      "rewards/verify_math_reward/mean": 0.6205357313156128,
      "rewards/verify_math_reward/std": 0.4855247139930725,
      "step": 328
    },
    {
      "clip_ratio/high_max": 0.0013345623265195172,
      "clip_ratio/high_mean": 0.0004686905158450827,
      "clip_ratio/low_mean": 0.0005615261945877137,
      "clip_ratio/low_min": 2.133105772372801e-05,
      "clip_ratio/region_mean": 0.0010302167138434015,
      "completions/clipped_ratio": 0.036830357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2409.0,
      "completions/mean_length": 656.3917846679688,
      "completions/mean_terminated_length": 524.8656005859375,
      "completions/min_length": 145.0,
      "completions/min_terminated_length": 145.0,
      "epoch": 3.0746355685131195,
      "grad_norm": 0.15823489427566528,
      "learning_rate": 1e-06,
      "loss": -0.0091,
      "num_tokens": 191787633.0,
      "reward": 0.6439732313156128,
      "reward_std": 0.11693863570690155,
      "rewards/verify_math_reward/mean": 0.6439732313156128,
      "rewards/verify_math_reward/std": 0.47909072041511536,
      "step": 329
    },
    {
      "clip_ratio/high_max": 0.001635613909456879,
      "clip_ratio/high_mean": 0.0006411001641026814,
      "clip_ratio/low_mean": 0.0006498393977381056,
      "clip_ratio/low_min": 2.5415084564883728e-05,
      "clip_ratio/region_mean": 0.0012909395663882606,
      "completions/clipped_ratio": 0.0345982142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4047.0,
      "completions/mean_length": 717.8895263671875,
      "completions/mean_terminated_length": 596.8242797851562,
      "completions/min_length": 137.0,
      "completions/min_terminated_length": 137.0,
      "epoch": 3.0839650145772595,
      "grad_norm": 0.1525290608406067,
      "learning_rate": 1e-06,
      "loss": -0.0027,
      "num_tokens": 192399622.0,
      "reward": 0.5334821939468384,
      "reward_std": 0.15778063237667084,
      "rewards/verify_math_reward/mean": 0.5334821343421936,
      "rewards/verify_math_reward/std": 0.49915632605552673,
      "step": 330
    },
    {
      "clip_ratio/high_max": 0.002810267309541814,
      "clip_ratio/high_mean": 0.0009091607262234902,
      "clip_ratio/low_mean": 0.0005943442874922766,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0015035050055303145,
      "completions/clipped_ratio": 0.041294642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3285.0,
      "completions/mean_length": 698.388427734375,
      "completions/mean_terminated_length": 552.0419311523438,
      "completions/min_length": 134.0,
      "completions/min_terminated_length": 134.0,
      "epoch": 3.0932944606413995,
      "grad_norm": 0.1841270923614502,
      "learning_rate": 1e-06,
      "loss": -0.0076,
      "num_tokens": 192960602.0,
      "reward": 0.6640625,
      "reward_std": 0.18806366622447968,
      "rewards/verify_math_reward/mean": 0.6640625,
      "rewards/verify_math_reward/std": 0.4725809693336487,
      "step": 331
    },
    {
      "clip_ratio/high_max": 0.0013865030960005242,
      "clip_ratio/high_mean": 0.000526547885783657,
      "clip_ratio/low_mean": 0.00039444234425900504,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009209902309521567,
      "completions/clipped_ratio": 0.0323660714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4001.0,
      "completions/mean_length": 673.2689819335938,
      "completions/mean_terminated_length": 558.7831420898438,
      "completions/min_length": 124.0,
      "completions/min_terminated_length": 124.0,
      "epoch": 3.1026239067055394,
      "grad_norm": 0.14227311313152313,
      "learning_rate": 1e-06,
      "loss": -0.0026,
      "num_tokens": 193532283.0,
      "reward": 0.6428571939468384,
      "reward_std": 0.13996823132038116,
      "rewards/verify_math_reward/mean": 0.6428571343421936,
      "rewards/verify_math_reward/std": 0.4794250428676605,
      "step": 332
    },
    {
      "clip_ratio/high_max": 0.0016808614018373191,
      "clip_ratio/high_mean": 0.0005795997749373782,
      "clip_ratio/low_mean": 0.0004085123721324635,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009881121695798356,
      "completions/clipped_ratio": 0.0200892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2650.0,
      "completions/mean_length": 593.1730346679688,
      "completions/mean_terminated_length": 521.361083984375,
      "completions/min_length": 138.0,
      "completions/min_terminated_length": 138.0,
      "epoch": 3.1119533527696794,
      "grad_norm": 0.14072465896606445,
      "learning_rate": 1e-06,
      "loss": 0.0003,
      "num_tokens": 194085366.0,
      "reward": 0.6584821939468384,
      "reward_std": 0.13587605953216553,
      "rewards/verify_math_reward/mean": 0.6584821343421936,
      "rewards/verify_math_reward/std": 0.4744836091995239,
      "step": 333
    },
    {
      "clip_ratio/high_max": 0.0016859312891028821,
      "clip_ratio/high_mean": 0.000606731589869014,
      "clip_ratio/low_mean": 0.0006843964201834751,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012911280027765315,
      "completions/clipped_ratio": 0.046875,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2599.0,
      "completions/mean_length": 730.029052734375,
      "completions/mean_terminated_length": 564.4894409179688,
      "completions/min_length": 159.0,
      "completions/min_terminated_length": 159.0,
      "epoch": 3.1212827988338194,
      "grad_norm": 0.1528729945421219,
      "learning_rate": 1e-06,
      "loss": 0.0052,
      "num_tokens": 194668272.0,
      "reward": 0.5881696939468384,
      "reward_std": 0.16645807027816772,
      "rewards/verify_math_reward/mean": 0.5881696343421936,
      "rewards/verify_math_reward/std": 0.4924395978450775,
      "step": 334
    },
    {
      "clip_ratio/high_max": 0.0016520459721505176,
      "clip_ratio/high_mean": 0.0006331761796900537,
      "clip_ratio/low_mean": 0.0005587463592746644,
      "clip_ratio/low_min": 1.7099862816394307e-05,
      "clip_ratio/region_mean": 0.001191922536236234,
      "completions/clipped_ratio": 0.021205357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3052.0,
      "completions/mean_length": 594.6585083007812,
      "completions/mean_terminated_length": 518.802734375,
      "completions/min_length": 132.0,
      "completions/min_terminated_length": 132.0,
      "epoch": 3.130612244897959,
      "grad_norm": 0.17078544199466705,
      "learning_rate": 1e-06,
      "loss": 0.0051,
      "num_tokens": 195204806.0,
      "reward": 0.6819196939468384,
      "reward_std": 0.16244256496429443,
      "rewards/verify_math_reward/mean": 0.6819196343421936,
      "rewards/verify_math_reward/std": 0.46599099040031433,
      "step": 335
    },
    {
      "clip_ratio/high_max": 0.001948820790858008,
      "clip_ratio/high_mean": 0.0006329362786345882,
      "clip_ratio/low_mean": 0.0006931475700184819,
      "clip_ratio/low_min": 3.0884128136676736e-05,
      "clip_ratio/region_mean": 0.0013260838386486284,
      "completions/clipped_ratio": 0.0279017857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2983.0,
      "completions/mean_length": 683.0938110351562,
      "completions/mean_terminated_length": 585.1343383789062,
      "completions/min_length": 147.0,
      "completions/min_terminated_length": 147.0,
      "epoch": 3.139941690962099,
      "grad_norm": 0.16676604747772217,
      "learning_rate": 1e-06,
      "loss": -0.0021,
      "num_tokens": 195804794.0,
      "reward": 0.5758928656578064,
      "reward_std": 0.18892493844032288,
      "rewards/verify_math_reward/mean": 0.5758928656578064,
      "rewards/verify_math_reward/std": 0.49448272585868835,
      "step": 336
    },
    {
      "clip_ratio/high_max": 0.0018137805673177354,
      "clip_ratio/high_mean": 0.0005947059671598254,
      "clip_ratio/low_mean": 0.0005975330741421203,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011922390367544722,
      "completions/clipped_ratio": 0.041294642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2237.0,
      "completions/mean_length": 670.575927734375,
      "completions/mean_terminated_length": 523.0314331054688,
      "completions/min_length": 170.0,
      "completions/min_terminated_length": 170.0,
      "epoch": 3.149271137026239,
      "grad_norm": 0.16413962841033936,
      "learning_rate": 1e-06,
      "loss": -0.0215,
      "num_tokens": 196341382.0,
      "reward": 0.6741071939468384,
      "reward_std": 0.13685427606105804,
      "rewards/verify_math_reward/mean": 0.6741071343421936,
      "rewards/verify_math_reward/std": 0.4689692556858063,
      "step": 337
    },
    {
      "clip_ratio/high_max": 0.0020174984420009423,
      "clip_ratio/high_mean": 0.000718235234671738,
      "clip_ratio/low_mean": 0.0005036646816733992,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012218999254400842,
      "completions/clipped_ratio": 0.0703125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4047.0,
      "completions/mean_length": 884.7723388671875,
      "completions/mean_terminated_length": 641.9063720703125,
      "completions/min_length": 93.0,
      "completions/min_terminated_length": 93.0,
      "epoch": 3.158600583090379,
      "grad_norm": 0.15254896879196167,
      "learning_rate": 1e-06,
      "loss": -0.0145,
      "num_tokens": 196990674.0,
      "reward": 0.494419664144516,
      "reward_std": 0.1596214473247528,
      "rewards/verify_math_reward/mean": 0.4944196343421936,
      "rewards/verify_math_reward/std": 0.5002480745315552,
      "step": 338
    },
    {
      "clip_ratio/high_max": 0.0017272997283726,
      "clip_ratio/high_mean": 0.000542580040928442,
      "clip_ratio/low_mean": 0.0003449479145274381,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008875279399944702,
      "completions/clipped_ratio": 0.046875,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3526.0,
      "completions/mean_length": 761.5792846679688,
      "completions/mean_terminated_length": 597.59130859375,
      "completions/min_length": 150.0,
      "completions/min_terminated_length": 150.0,
      "epoch": 3.167930029154519,
      "grad_norm": 0.14441409707069397,
      "learning_rate": 1e-06,
      "loss": -0.0018,
      "num_tokens": 197593545.0,
      "reward": 0.6037946939468384,
      "reward_std": 0.14534735679626465,
      "rewards/verify_math_reward/mean": 0.6037946343421936,
      "rewards/verify_math_reward/std": 0.48938122391700745,
      "step": 339
    },
    {
      "clip_ratio/high_max": 0.00170107834492228,
      "clip_ratio/high_mean": 0.0005152284193172818,
      "clip_ratio/low_mean": 0.000506231612234842,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010214600151812192,
      "completions/clipped_ratio": 0.044642857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3342.0,
      "completions/mean_length": 729.700927734375,
      "completions/mean_terminated_length": 572.3971557617188,
      "completions/min_length": 146.0,
      "completions/min_terminated_length": 146.0,
      "epoch": 3.177259475218659,
      "grad_norm": 0.1728869378566742,
      "learning_rate": 1e-06,
      "loss": -0.0024,
      "num_tokens": 198168413.0,
      "reward": 0.6339285969734192,
      "reward_std": 0.16506867110729218,
      "rewards/verify_math_reward/mean": 0.6339285969734192,
      "rewards/verify_math_reward/std": 0.48199835419654846,
      "step": 340
    },
    {
      "clip_ratio/high_max": 0.0016669321157678496,
      "clip_ratio/high_mean": 0.0006166034363559447,
      "clip_ratio/low_mean": 0.0004913312923235935,
      "clip_ratio/low_min": 1.768033871485386e-05,
      "clip_ratio/region_mean": 0.0011079347132181283,
      "completions/clipped_ratio": 0.03125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4016.0,
      "completions/mean_length": 697.200927734375,
      "completions/mean_terminated_length": 587.5621948242188,
      "completions/min_length": 145.0,
      "completions/min_terminated_length": 145.0,
      "epoch": 3.186588921282799,
      "grad_norm": 0.1714445948600769,
      "learning_rate": 1e-06,
      "loss": -0.002,
      "num_tokens": 198761089.0,
      "reward": 0.6116071939468384,
      "reward_std": 0.15785479545593262,
      "rewards/verify_math_reward/mean": 0.6116071343421936,
      "rewards/verify_math_reward/std": 0.48765692114830017,
      "step": 341
    },
    {
      "clip_ratio/high_max": 0.0018787250737659633,
      "clip_ratio/high_mean": 0.0007277342265297193,
      "clip_ratio/low_mean": 0.0005612047139038623,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012889389363408554,
      "completions/clipped_ratio": 0.044642857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3681.0,
      "completions/mean_length": 762.3873291015625,
      "completions/mean_terminated_length": 606.6109619140625,
      "completions/min_length": 161.0,
      "completions/min_terminated_length": 161.0,
      "epoch": 3.195918367346939,
      "grad_norm": 0.15971846878528595,
      "learning_rate": 1e-06,
      "loss": -0.0015,
      "num_tokens": 199373820.0,
      "reward": 0.582589328289032,
      "reward_std": 0.1716071218252182,
      "rewards/verify_math_reward/mean": 0.5825892686843872,
      "rewards/verify_math_reward/std": 0.493407279253006,
      "step": 342
    },
    {
      "clip_ratio/high_max": 0.0019077946344623342,
      "clip_ratio/high_mean": 0.0007565264622826362,
      "clip_ratio/low_mean": 0.0004942814248352079,
      "clip_ratio/low_min": 1.1316313248244114e-05,
      "clip_ratio/region_mean": 0.0012508078943938017,
      "completions/clipped_ratio": 0.030133928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4024.0,
      "completions/mean_length": 684.2467041015625,
      "completions/mean_terminated_length": 578.2427978515625,
      "completions/min_length": 145.0,
      "completions/min_terminated_length": 145.0,
      "epoch": 3.205247813411079,
      "grad_norm": 0.16806165874004364,
      "learning_rate": 1e-06,
      "loss": 0.0085,
      "num_tokens": 199962417.0,
      "reward": 0.637276828289032,
      "reward_std": 0.15710733830928802,
      "rewards/verify_math_reward/mean": 0.6372767686843872,
      "rewards/verify_math_reward/std": 0.481054425239563,
      "step": 343
    },
    {
      "clip_ratio/high_max": 0.0015897958546702284,
      "clip_ratio/high_mean": 0.0005700374276784714,
      "clip_ratio/low_mean": 0.0005594242948063766,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001129461725213332,
      "completions/clipped_ratio": 0.0345982142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2293.0,
      "completions/mean_length": 711.3158569335938,
      "completions/mean_terminated_length": 590.0150146484375,
      "completions/min_length": 167.0,
      "completions/min_terminated_length": 167.0,
      "epoch": 3.2145772594752184,
      "grad_norm": 0.15745463967323303,
      "learning_rate": 1e-06,
      "loss": -0.0122,
      "num_tokens": 200561340.0,
      "reward": 0.5658482313156128,
      "reward_std": 0.15503577888011932,
      "rewards/verify_math_reward/mean": 0.5658482313156128,
      "rewards/verify_math_reward/std": 0.49592188000679016,
      "step": 344
    },
    {
      "clip_ratio/high_max": 0.0017465214186813682,
      "clip_ratio/high_mean": 0.0007276929718500469,
      "clip_ratio/low_mean": 0.0005883022058696952,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0013159951849956997,
      "completions/clipped_ratio": 0.0502232142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2612.0,
      "completions/mean_length": 787.1819458007812,
      "completions/mean_terminated_length": 612.2150268554688,
      "completions/min_length": 125.0,
      "completions/min_terminated_length": 125.0,
      "epoch": 3.2239067055393584,
      "grad_norm": 0.16699855029582977,
      "learning_rate": 1e-06,
      "loss": -0.0143,
      "num_tokens": 201177119.0,
      "reward": 0.5502232313156128,
      "reward_std": 0.17239172756671906,
      "rewards/verify_math_reward/mean": 0.5502232313156128,
      "rewards/verify_math_reward/std": 0.49774909019470215,
      "step": 345
    },
    {
      "clip_ratio/high_max": 0.00131191932814545,
      "clip_ratio/high_mean": 0.00046841485527693294,
      "clip_ratio/low_mean": 0.00036787522867598454,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008362900734937284,
      "completions/clipped_ratio": 0.0234375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3025.0,
      "completions/mean_length": 629.9754638671875,
      "completions/mean_terminated_length": 546.7908325195312,
      "completions/min_length": 139.0,
      "completions/min_terminated_length": 139.0,
      "epoch": 3.2332361516034984,
      "grad_norm": 0.17488166689872742,
      "learning_rate": 1e-06,
      "loss": -0.0013,
      "num_tokens": 201739513.0,
      "reward": 0.6897321939468384,
      "reward_std": 0.11836010217666626,
      "rewards/verify_math_reward/mean": 0.6897321343421936,
      "rewards/verify_math_reward/std": 0.4628615975379944,
      "step": 346
    },
    {
      "clip_ratio/high_max": 0.0015291980453184806,
      "clip_ratio/high_mean": 0.0004885729395027738,
      "clip_ratio/low_mean": 0.0003527211426899157,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008412940915150102,
      "completions/clipped_ratio": 0.0345982142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3835.0,
      "completions/mean_length": 691.177490234375,
      "completions/mean_terminated_length": 569.1549072265625,
      "completions/min_length": 140.0,
      "completions/min_terminated_length": 140.0,
      "epoch": 3.2425655976676384,
      "grad_norm": 0.1976059377193451,
      "learning_rate": 1e-06,
      "loss": -0.0016,
      "num_tokens": 202323064.0,
      "reward": 0.5926339626312256,
      "reward_std": 0.12576760351657867,
      "rewards/verify_math_reward/mean": 0.5926339030265808,
      "rewards/verify_math_reward/std": 0.49161848425865173,
      "step": 347
    },
    {
      "clip_ratio/high_max": 0.002212184146628715,
      "clip_ratio/high_mean": 0.0008297939384647179,
      "clip_ratio/low_mean": 0.0005541007749343407,
      "clip_ratio/low_min": 1.3337601558305323e-05,
      "clip_ratio/region_mean": 0.001383894748869352,
      "completions/clipped_ratio": 0.030133928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4032.0,
      "completions/mean_length": 645.5814819335938,
      "completions/mean_terminated_length": 538.3762817382812,
      "completions/min_length": 114.0,
      "completions/min_terminated_length": 114.0,
      "epoch": 3.2518950437317784,
      "grad_norm": 0.1889650672674179,
      "learning_rate": 1e-06,
      "loss": -0.0107,
      "num_tokens": 202882113.0,
      "reward": 0.6350446939468384,
      "reward_std": 0.18054921925067902,
      "rewards/verify_math_reward/mean": 0.6350446343421936,
      "rewards/verify_math_reward/std": 0.481686532497406,
      "step": 348
    },
    {
      "clip_ratio/high_max": 0.0015015474346000701,
      "clip_ratio/high_mean": 0.0005599265214186744,
      "clip_ratio/low_mean": 0.0006452595171140274,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012051860612700693,
      "completions/clipped_ratio": 0.0580357142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4080.0,
      "completions/mean_length": 843.4152221679688,
      "completions/mean_terminated_length": 643.0189819335938,
      "completions/min_length": 118.0,
      "completions/min_terminated_length": 118.0,
      "epoch": 3.2612244897959184,
      "grad_norm": 0.16263212263584137,
      "learning_rate": 1e-06,
      "loss": 0.0009,
      "num_tokens": 203519669.0,
      "reward": 0.4799107313156128,
      "reward_std": 0.17321588099002838,
      "rewards/verify_math_reward/mean": 0.4799107015132904,
      "rewards/verify_math_reward/std": 0.4998753070831299,
      "step": 349
    },
    {
      "clip_ratio/high_max": 0.0015449030106537975,
      "clip_ratio/high_mean": 0.0005975261556159239,
      "clip_ratio/low_mean": 0.0005389400521380594,
      "clip_ratio/low_min": 3.970774923800491e-05,
      "clip_ratio/region_mean": 0.0011364662168489303,
      "completions/clipped_ratio": 0.033482142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4019.0,
      "completions/mean_length": 683.8873291015625,
      "completions/mean_terminated_length": 565.6847534179688,
      "completions/min_length": 109.0,
      "completions/min_terminated_length": 109.0,
      "epoch": 3.2705539358600584,
      "grad_norm": 0.14683841168880463,
      "learning_rate": 1e-06,
      "loss": -0.0072,
      "num_tokens": 204102016.0,
      "reward": 0.5848214626312256,
      "reward_std": 0.15405938029289246,
      "rewards/verify_math_reward/mean": 0.5848214030265808,
      "rewards/verify_math_reward/std": 0.49302801489830017,
      "step": 350
    },
    {
      "clip_ratio/high_max": 0.0017227121534233447,
      "clip_ratio/high_mean": 0.0005658843256242108,
      "clip_ratio/low_mean": 0.00048291549046552973,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010487997969903518,
      "completions/clipped_ratio": 0.056919642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1883.0,
      "completions/mean_length": 751.5647583007812,
      "completions/mean_terminated_length": 549.7112426757812,
      "completions/min_length": 128.0,
      "completions/min_terminated_length": 128.0,
      "epoch": 3.2798833819241984,
      "grad_norm": 0.2624254822731018,
      "learning_rate": 1e-06,
      "loss": -0.0104,
      "num_tokens": 204653178.0,
      "reward": 0.5680803656578064,
      "reward_std": 0.139596626162529,
      "rewards/verify_math_reward/mean": 0.5680803656578064,
      "rewards/verify_math_reward/std": 0.4956200420856476,
      "step": 351
    },
    {
      "clip_ratio/high_max": 0.002103293405525619,
      "clip_ratio/high_mean": 0.0008459727014269447,
      "clip_ratio/low_mean": 0.0005525255082829972,
      "clip_ratio/low_min": 1.8603959688334726e-05,
      "clip_ratio/region_mean": 0.0013984982288093306,
      "completions/clipped_ratio": 0.052455357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3596.0,
      "completions/mean_length": 829.4297485351562,
      "completions/mean_terminated_length": 648.5948486328125,
      "completions/min_length": 141.0,
      "completions/min_terminated_length": 141.0,
      "epoch": 3.2892128279883384,
      "grad_norm": 0.2469792664051056,
      "learning_rate": 1e-06,
      "loss": -0.0298,
      "num_tokens": 205298483.0,
      "reward": 0.5859375,
      "reward_std": 0.19163267314434052,
      "rewards/verify_math_reward/mean": 0.5859375,
      "rewards/verify_math_reward/std": 0.4928344786167145,
      "step": 352
    },
    {
      "clip_ratio/high_max": 0.002046771056484431,
      "clip_ratio/high_mean": 0.0007821938470442547,
      "clip_ratio/low_mean": 0.0005993165068503004,
      "clip_ratio/low_min": 4.450034612091258e-05,
      "clip_ratio/region_mean": 0.0013815103739034384,
      "completions/clipped_ratio": 0.046875,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1960.0,
      "completions/mean_length": 738.4074096679688,
      "completions/mean_terminated_length": 573.2798461914062,
      "completions/min_length": 116.0,
      "completions/min_terminated_length": 116.0,
      "epoch": 3.298542274052478,
      "grad_norm": 0.18035072088241577,
      "learning_rate": 1e-06,
      "loss": -0.0097,
      "num_tokens": 205881584.0,
      "reward": 0.640625,
      "reward_std": 0.1795370876789093,
      "rewards/verify_math_reward/mean": 0.640625,
      "rewards/verify_math_reward/std": 0.48008525371551514,
      "step": 353
    },
    {
      "clip_ratio/high_max": 0.0022992012891336344,
      "clip_ratio/high_mean": 0.0008977718935057055,
      "clip_ratio/low_mean": 0.0005703734134385741,
      "clip_ratio/low_min": 3.3743564927135594e-05,
      "clip_ratio/region_mean": 0.0014681453103548847,
      "completions/clipped_ratio": 0.0345982142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2866.0,
      "completions/mean_length": 713.4152221679688,
      "completions/mean_terminated_length": 592.1895751953125,
      "completions/min_length": 142.0,
      "completions/min_terminated_length": 142.0,
      "epoch": 3.307871720116618,
      "grad_norm": 0.1876998245716095,
      "learning_rate": 1e-06,
      "loss": 0.0073,
      "num_tokens": 206484748.0,
      "reward": 0.5814732313156128,
      "reward_std": 0.19425947964191437,
      "rewards/verify_math_reward/mean": 0.5814732313156128,
      "rewards/verify_math_reward/std": 0.4935929775238037,
      "step": 354
    },
    {
      "clip_ratio/high_max": 0.001361861559416866,
      "clip_ratio/high_mean": 0.00046019937053642934,
      "clip_ratio/low_mean": 0.00038518078054039506,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008453801601717714,
      "completions/clipped_ratio": 0.0279017857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1977.0,
      "completions/mean_length": 601.1495971679688,
      "completions/mean_terminated_length": 500.8381042480469,
      "completions/min_length": 122.0,
      "completions/min_terminated_length": 122.0,
      "epoch": 3.317201166180758,
      "grad_norm": 0.14762645959854126,
      "learning_rate": 1e-06,
      "loss": -0.0044,
      "num_tokens": 206998346.0,
      "reward": 0.707589328289032,
      "reward_std": 0.12253419309854507,
      "rewards/verify_math_reward/mean": 0.7075892686843872,
      "rewards/verify_math_reward/std": 0.45512402057647705,
      "step": 355
    },
    {
      "clip_ratio/high_max": 0.0020514542047749273,
      "clip_ratio/high_mean": 0.0008852325827319873,
      "clip_ratio/low_mean": 0.0005880790904484456,
      "clip_ratio/low_min": 1.4937858395569492e-05,
      "clip_ratio/region_mean": 0.0014733116477145813,
      "completions/clipped_ratio": 0.0424107142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3683.0,
      "completions/mean_length": 721.4219360351562,
      "completions/mean_terminated_length": 571.9650268554688,
      "completions/min_length": 125.0,
      "completions/min_terminated_length": 125.0,
      "epoch": 3.326530612244898,
      "grad_norm": 0.19541537761688232,
      "learning_rate": 1e-06,
      "loss": -0.009,
      "num_tokens": 207579852.0,
      "reward": 0.6729910969734192,
      "reward_std": 0.20410872995853424,
      "rewards/verify_math_reward/mean": 0.6729910969734192,
      "rewards/verify_math_reward/std": 0.46938255429267883,
      "step": 356
    },
    {
      "clip_ratio/high_max": 0.0015374102658824995,
      "clip_ratio/high_mean": 0.0005859010707354173,
      "clip_ratio/low_mean": 0.0004981515567124006,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010840526410902385,
      "completions/clipped_ratio": 0.0379464285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3641.0,
      "completions/mean_length": 748.0814819335938,
      "completions/mean_terminated_length": 616.0289916992188,
      "completions/min_length": 121.0,
      "completions/min_terminated_length": 121.0,
      "epoch": 3.335860058309038,
      "grad_norm": 0.1314224898815155,
      "learning_rate": 1e-06,
      "loss": -0.0121,
      "num_tokens": 208207973.0,
      "reward": 0.5680803656578064,
      "reward_std": 0.14887316524982452,
      "rewards/verify_math_reward/mean": 0.5680803656578064,
      "rewards/verify_math_reward/std": 0.4956200420856476,
      "step": 357
    },
    {
      "clip_ratio/high_max": 0.0016378237523895223,
      "clip_ratio/high_mean": 0.0006037754192220746,
      "clip_ratio/low_mean": 0.00047187791460601147,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001075653333828086,
      "completions/clipped_ratio": 0.0357142857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2169.0,
      "completions/mean_length": 656.7522583007812,
      "completions/mean_terminated_length": 529.3726806640625,
      "completions/min_length": 167.0,
      "completions/min_terminated_length": 167.0,
      "epoch": 3.345189504373178,
      "grad_norm": 0.16750304400920868,
      "learning_rate": 1e-06,
      "loss": -0.0146,
      "num_tokens": 208749119.0,
      "reward": 0.6428571939468384,
      "reward_std": 0.15053103864192963,
      "rewards/verify_math_reward/mean": 0.6428571343421936,
      "rewards/verify_math_reward/std": 0.4794250428676605,
      "step": 358
    },
    {
      "clip_ratio/high_max": 0.001733257890009554,
      "clip_ratio/high_mean": 0.0007255245109263342,
      "clip_ratio/low_mean": 0.0005510935743586742,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001276618058909662,
      "completions/clipped_ratio": 0.0424107142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3990.0,
      "completions/mean_length": 762.9185791015625,
      "completions/mean_terminated_length": 615.299560546875,
      "completions/min_length": 118.0,
      "completions/min_terminated_length": 118.0,
      "epoch": 3.354518950437318,
      "grad_norm": 0.1626165807247162,
      "learning_rate": 1e-06,
      "loss": -0.015,
      "num_tokens": 209371766.0,
      "reward": 0.5881696939468384,
      "reward_std": 0.1692011058330536,
      "rewards/verify_math_reward/mean": 0.5881696343421936,
      "rewards/verify_math_reward/std": 0.4924395978450775,
      "step": 359
    },
    {
      "clip_ratio/high_max": 0.001696944389550481,
      "clip_ratio/high_mean": 0.000568242248846218,
      "clip_ratio/low_mean": 0.0005102288118905562,
      "clip_ratio/low_min": 1.1991557585133705e-05,
      "clip_ratio/region_mean": 0.0010784710648295004,
      "completions/clipped_ratio": 0.0479910714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3971.0,
      "completions/mean_length": 750.646240234375,
      "completions/mean_terminated_length": 582.005859375,
      "completions/min_length": 120.0,
      "completions/min_terminated_length": 120.0,
      "epoch": 3.363848396501458,
      "grad_norm": 0.1634666621685028,
      "learning_rate": 1e-06,
      "loss": -0.0007,
      "num_tokens": 209959129.0,
      "reward": 0.6261160969734192,
      "reward_std": 0.15488353371620178,
      "rewards/verify_math_reward/mean": 0.6261160969734192,
      "rewards/verify_math_reward/std": 0.48410359025001526,
      "step": 360
    },
    {
      "clip_ratio/high_max": 0.0017860847256088164,
      "clip_ratio/high_mean": 0.000683918433423969,
      "clip_ratio/low_mean": 0.000540567531970737,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012244859753991477,
      "completions/clipped_ratio": 0.0345982142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2937.0,
      "completions/mean_length": 655.0178833007812,
      "completions/mean_terminated_length": 531.6994018554688,
      "completions/min_length": 138.0,
      "completions/min_terminated_length": 138.0,
      "epoch": 3.373177842565598,
      "grad_norm": 0.19383962452411652,
      "learning_rate": 1e-06,
      "loss": -0.025,
      "num_tokens": 210519769.0,
      "reward": 0.6361607313156128,
      "reward_std": 0.15931904315948486,
      "rewards/verify_math_reward/mean": 0.6361607313156128,
      "rewards/verify_math_reward/std": 0.4813718795776367,
      "step": 361
    },
    {
      "clip_ratio/high_max": 0.0021199771654210053,
      "clip_ratio/high_mean": 0.0008998619014164433,
      "clip_ratio/low_mean": 0.000567416234389384,
      "clip_ratio/low_min": 1.3975849469716195e-05,
      "clip_ratio/region_mean": 0.0014672781180706806,
      "completions/clipped_ratio": 0.044642857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3841.0,
      "completions/mean_length": 688.5949096679688,
      "completions/mean_terminated_length": 529.3703002929688,
      "completions/min_length": 146.0,
      "completions/min_terminated_length": 146.0,
      "epoch": 3.3825072886297374,
      "grad_norm": 0.4614749252796173,
      "learning_rate": 1e-06,
      "loss": -0.0146,
      "num_tokens": 211059062.0,
      "reward": 0.6573660969734192,
      "reward_std": 0.1775440275669098,
      "rewards/verify_math_reward/mean": 0.6573660969734192,
      "rewards/verify_math_reward/std": 0.47485533356666565,
      "step": 362
    },
    {
      "clip_ratio/high_max": 0.001744619985402096,
      "clip_ratio/high_mean": 0.0006723825445078546,
      "clip_ratio/low_mean": 0.00045334602782531874,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011257286096224561,
      "completions/clipped_ratio": 0.029017857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3811.0,
      "completions/mean_length": 699.0892944335938,
      "completions/mean_terminated_length": 597.5723876953125,
      "completions/min_length": 130.0,
      "completions/min_terminated_length": 130.0,
      "epoch": 3.3918367346938774,
      "grad_norm": 0.16083276271820068,
      "learning_rate": 1e-06,
      "loss": -0.0097,
      "num_tokens": 211669046.0,
      "reward": 0.5580357313156128,
      "reward_std": 0.17333240807056427,
      "rewards/verify_math_reward/mean": 0.5580357313156128,
      "rewards/verify_math_reward/std": 0.49689778685569763,
      "step": 363
    },
    {
      "clip_ratio/high_max": 0.0016588986036367714,
      "clip_ratio/high_mean": 0.0005556809283007169,
      "clip_ratio/low_mean": 0.000585030695219757,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011407116217014845,
      "completions/clipped_ratio": 0.041294642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3009.0,
      "completions/mean_length": 692.0625610351562,
      "completions/mean_terminated_length": 545.4435424804688,
      "completions/min_length": 152.0,
      "completions/min_terminated_length": 152.0,
      "epoch": 3.4011661807580174,
      "grad_norm": 0.1567852795124054,
      "learning_rate": 1e-06,
      "loss": -0.0167,
      "num_tokens": 212235430.0,
      "reward": 0.6015625,
      "reward_std": 0.14402396976947784,
      "rewards/verify_math_reward/mean": 0.6015625,
      "rewards/verify_math_reward/std": 0.48984986543655396,
      "step": 364
    },
    {
      "clip_ratio/high_max": 0.001806083724659402,
      "clip_ratio/high_mean": 0.0006226782388694119,
      "clip_ratio/low_mean": 0.0004972527485733735,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011199309992662165,
      "completions/clipped_ratio": 0.0357142857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3987.0,
      "completions/mean_length": 699.0256958007812,
      "completions/mean_terminated_length": 573.2117919921875,
      "completions/min_length": 105.0,
      "completions/min_terminated_length": 105.0,
      "epoch": 3.4104956268221573,
      "grad_norm": 0.17135797441005707,
      "learning_rate": 1e-06,
      "loss": -0.0129,
      "num_tokens": 212815645.0,
      "reward": 0.6350446939468384,
      "reward_std": 0.160858154296875,
      "rewards/verify_math_reward/mean": 0.6350446343421936,
      "rewards/verify_math_reward/std": 0.481686532497406,
      "step": 365
    },
    {
      "clip_ratio/high_max": 0.002020967825956177,
      "clip_ratio/high_mean": 0.0008052228022279451,
      "clip_ratio/low_mean": 0.0006546931972479797,
      "clip_ratio/low_min": 6.945713084860472e-05,
      "clip_ratio/region_mean": 0.0014599159840145148,
      "completions/clipped_ratio": 0.044642857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2577.0,
      "completions/mean_length": 745.9564819335938,
      "completions/mean_terminated_length": 589.412353515625,
      "completions/min_length": 177.0,
      "completions/min_terminated_length": 177.0,
      "epoch": 3.4198250728862973,
      "grad_norm": 0.17605097591876984,
      "learning_rate": 1e-06,
      "loss": -0.0049,
      "num_tokens": 213422182.0,
      "reward": 0.5680803656578064,
      "reward_std": 0.188966304063797,
      "rewards/verify_math_reward/mean": 0.5680803656578064,
      "rewards/verify_math_reward/std": 0.4956200420856476,
      "step": 366
    },
    {
      "clip_ratio/high_max": 0.001761342540703481,
      "clip_ratio/high_mean": 0.0006754804999218322,
      "clip_ratio/low_mean": 0.0005779823604825651,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012534628731373232,
      "completions/clipped_ratio": 0.0479910714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4049.0,
      "completions/mean_length": 767.8092041015625,
      "completions/mean_terminated_length": 600.0339965820312,
      "completions/min_length": 139.0,
      "completions/min_terminated_length": 139.0,
      "epoch": 3.4291545189504373,
      "grad_norm": 0.22071969509124756,
      "learning_rate": 1e-06,
      "loss": -0.0145,
      "num_tokens": 214032219.0,
      "reward": 0.5703125,
      "reward_std": 0.17127405107021332,
      "rewards/verify_math_reward/mean": 0.5703125,
      "rewards/verify_math_reward/std": 0.49530795216560364,
      "step": 367
    },
    {
      "clip_ratio/high_max": 0.002016289272432914,
      "clip_ratio/high_mean": 0.0007822973329894012,
      "clip_ratio/low_mean": 0.0005686713811883237,
      "clip_ratio/low_min": 1.843114114308264e-05,
      "clip_ratio/region_mean": 0.0013509687087207567,
      "completions/clipped_ratio": 0.029017857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3529.0,
      "completions/mean_length": 641.6908569335938,
      "completions/mean_terminated_length": 538.4586181640625,
      "completions/min_length": 133.0,
      "completions/min_terminated_length": 133.0,
      "epoch": 3.4384839650145773,
      "grad_norm": 0.17655901610851288,
      "learning_rate": 1e-06,
      "loss": -0.0027,
      "num_tokens": 214584758.0,
      "reward": 0.6875000596046448,
      "reward_std": 0.1562718003988266,
      "rewards/verify_math_reward/mean": 0.6875,
      "rewards/verify_math_reward/std": 0.4637712836265564,
      "step": 368
    },
    {
      "clip_ratio/high_max": 0.0018473334785085171,
      "clip_ratio/high_mean": 0.000736389138182858,
      "clip_ratio/low_mean": 0.0005798566762678092,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001316245790803805,
      "completions/clipped_ratio": 0.0546875,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3853.0,
      "completions/mean_length": 747.1964721679688,
      "completions/mean_terminated_length": 553.4639892578125,
      "completions/min_length": 149.0,
      "completions/min_terminated_length": 149.0,
      "epoch": 3.4478134110787173,
      "grad_norm": 0.1764877438545227,
      "learning_rate": 1e-06,
      "loss": -0.0118,
      "num_tokens": 215153894.0,
      "reward": 0.578125,
      "reward_std": 0.17141534388065338,
      "rewards/verify_math_reward/mean": 0.578125,
      "rewards/verify_math_reward/std": 0.4941346049308777,
      "step": 369
    },
    {
      "clip_ratio/high_max": 0.0019431889377301559,
      "clip_ratio/high_mean": 0.0007811395007593092,
      "clip_ratio/low_mean": 0.0004999987040719134,
      "clip_ratio/low_min": 2.7250073799223173e-05,
      "clip_ratio/region_mean": 0.0012811382148356643,
      "completions/clipped_ratio": 0.0502232142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3659.0,
      "completions/mean_length": 759.497802734375,
      "completions/mean_terminated_length": 583.0669555664062,
      "completions/min_length": 144.0,
      "completions/min_terminated_length": 144.0,
      "epoch": 3.4571428571428573,
      "grad_norm": 0.1896611750125885,
      "learning_rate": 1e-06,
      "loss": -0.0146,
      "num_tokens": 215735948.0,
      "reward": 0.582589328289032,
      "reward_std": 0.15631386637687683,
      "rewards/verify_math_reward/mean": 0.5825892686843872,
      "rewards/verify_math_reward/std": 0.493407279253006,
      "step": 370
    },
    {
      "clip_ratio/high_max": 0.001472057312639663,
      "clip_ratio/high_mean": 0.0005646050176437711,
      "clip_ratio/low_mean": 0.0006057319351384649,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011703369455062784,
      "completions/clipped_ratio": 0.0323660714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2850.0,
      "completions/mean_length": 637.8939819335938,
      "completions/mean_terminated_length": 522.2249145507812,
      "completions/min_length": 120.0,
      "completions/min_terminated_length": 120.0,
      "epoch": 3.466472303206997,
      "grad_norm": 0.19203650951385498,
      "learning_rate": 1e-06,
      "loss": -0.0115,
      "num_tokens": 216273925.0,
      "reward": 0.6473214626312256,
      "reward_std": 0.17930956184864044,
      "rewards/verify_math_reward/mean": 0.6473214030265808,
      "rewards/verify_math_reward/std": 0.47807058691978455,
      "step": 371
    },
    {
      "clip_ratio/high_max": 0.002444472564093303,
      "clip_ratio/high_mean": 0.0008564699528506026,
      "clip_ratio/low_mean": 0.0005713572008971823,
      "clip_ratio/low_min": 5.474257341120392e-05,
      "clip_ratio/region_mean": 0.0014278271701186895,
      "completions/clipped_ratio": 0.049107142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3622.0,
      "completions/mean_length": 741.2332763671875,
      "completions/mean_terminated_length": 567.982421875,
      "completions/min_length": 131.0,
      "completions/min_terminated_length": 131.0,
      "epoch": 3.4758017492711373,
      "grad_norm": 0.19386622309684753,
      "learning_rate": 1e-06,
      "loss": -0.014,
      "num_tokens": 216853838.0,
      "reward": 0.6004464626312256,
      "reward_std": 0.18768569827079773,
      "rewards/verify_math_reward/mean": 0.6004464030265808,
      "rewards/verify_math_reward/std": 0.49008017778396606,
      "step": 372
    },
    {
      "clip_ratio/high_max": 0.001838492895331001,
      "clip_ratio/high_mean": 0.0006286235993684386,
      "clip_ratio/low_mean": 0.0004937572757626185,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011223808833165094,
      "completions/clipped_ratio": 0.044642857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3755.0,
      "completions/mean_length": 713.2857666015625,
      "completions/mean_terminated_length": 555.2149658203125,
      "completions/min_length": 153.0,
      "completions/min_terminated_length": 153.0,
      "epoch": 3.485131195335277,
      "grad_norm": 0.16348233819007874,
      "learning_rate": 1e-06,
      "loss": 0.0125,
      "num_tokens": 217418470.0,
      "reward": 0.6350446939468384,
      "reward_std": 0.13917727768421173,
      "rewards/verify_math_reward/mean": 0.6350446343421936,
      "rewards/verify_math_reward/std": 0.481686532497406,
      "step": 373
    },
    {
      "clip_ratio/high_max": 0.0015413453347719042,
      "clip_ratio/high_mean": 0.0005610847715615819,
      "clip_ratio/low_mean": 0.0005258913606667193,
      "clip_ratio/low_min": 2.9050039302092046e-05,
      "clip_ratio/region_mean": 0.0010869761172216386,
      "completions/clipped_ratio": 0.0457589285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2568.0,
      "completions/mean_length": 715.450927734375,
      "completions/mean_terminated_length": 553.3427124023438,
      "completions/min_length": 176.0,
      "completions/min_terminated_length": 176.0,
      "epoch": 3.494460641399417,
      "grad_norm": 0.18832284212112427,
      "learning_rate": 1e-06,
      "loss": -0.0157,
      "num_tokens": 217989162.0,
      "reward": 0.606026828289032,
      "reward_std": 0.16307058930397034,
      "rewards/verify_math_reward/mean": 0.6060267686843872,
      "rewards/verify_math_reward/std": 0.48890194296836853,
      "step": 374
    },
    {
      "clip_ratio/high_max": 0.0014378242558450438,
      "clip_ratio/high_mean": 0.0004859341643168591,
      "clip_ratio/low_mean": 0.0004664748057621182,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009524089491605991,
      "completions/clipped_ratio": 0.0457589285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2782.0,
      "completions/mean_length": 721.2176513671875,
      "completions/mean_terminated_length": 559.385986328125,
      "completions/min_length": 127.0,
      "completions/min_terminated_length": 127.0,
      "epoch": 3.503790087463557,
      "grad_norm": 0.16173528134822845,
      "learning_rate": 1e-06,
      "loss": -0.0233,
      "num_tokens": 218545413.0,
      "reward": 0.5647321939468384,
      "reward_std": 0.13504233956336975,
      "rewards/verify_math_reward/mean": 0.5647321343421936,
      "rewards/verify_math_reward/std": 0.49606895446777344,
      "step": 375
    },
    {
      "clip_ratio/high_max": 0.0018496222655812744,
      "clip_ratio/high_mean": 0.0007505446428694995,
      "clip_ratio/low_mean": 0.0005054371913502109,
      "clip_ratio/low_min": 6.251193826756207e-05,
      "clip_ratio/region_mean": 0.0012559818023873959,
      "completions/clipped_ratio": 0.0379464285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3944.0,
      "completions/mean_length": 734.880615234375,
      "completions/mean_terminated_length": 602.3074340820312,
      "completions/min_length": 135.0,
      "completions/min_terminated_length": 135.0,
      "epoch": 3.513119533527697,
      "grad_norm": 0.18296431005001068,
      "learning_rate": 1e-06,
      "loss": -0.0127,
      "num_tokens": 219153018.0,
      "reward": 0.6484375,
      "reward_std": 0.1685284972190857,
      "rewards/verify_math_reward/mean": 0.6484375,
      "rewards/verify_math_reward/std": 0.4777248501777649,
      "step": 376
    },
    {
      "clip_ratio/high_max": 0.00206367682403652,
      "clip_ratio/high_mean": 0.0007036374481685925,
      "clip_ratio/low_mean": 0.000642717122900649,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0013463545838021673,
      "completions/clipped_ratio": 0.0457589285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3806.0,
      "completions/mean_length": 760.6295166015625,
      "completions/mean_terminated_length": 600.6876831054688,
      "completions/min_length": 83.0,
      "completions/min_terminated_length": 83.0,
      "epoch": 3.522448979591837,
      "grad_norm": 0.1939503252506256,
      "learning_rate": 1e-06,
      "loss": -0.0124,
      "num_tokens": 219762902.0,
      "reward": 0.5993303656578064,
      "reward_std": 0.19659315049648285,
      "rewards/verify_math_reward/mean": 0.5993303656578064,
      "rewards/verify_math_reward/std": 0.49030786752700806,
      "step": 377
    },
    {
      "clip_ratio/high_max": 0.001997565930651035,
      "clip_ratio/high_mean": 0.0006845299285487272,
      "clip_ratio/low_mean": 0.0004570088676700834,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011415387962188106,
      "completions/clipped_ratio": 0.0558035714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4068.0,
      "completions/mean_length": 773.9631958007812,
      "completions/mean_terminated_length": 577.6253051757812,
      "completions/min_length": 170.0,
      "completions/min_terminated_length": 170.0,
      "epoch": 3.5317784256559768,
      "grad_norm": 0.1786443591117859,
      "learning_rate": 1e-06,
      "loss": 0.0011,
      "num_tokens": 220346429.0,
      "reward": 0.5948660969734192,
      "reward_std": 0.16927777230739594,
      "rewards/verify_math_reward/mean": 0.5948660969734192,
      "rewards/verify_math_reward/std": 0.49119213223457336,
      "step": 378
    },
    {
      "clip_ratio/high_max": 0.0020954741012246814,
      "clip_ratio/high_mean": 0.000722040294931503,
      "clip_ratio/low_mean": 0.0006658451802650234,
      "clip_ratio/low_min": 1.5451174476766028e-05,
      "clip_ratio/region_mean": 0.0013878854806534946,
      "completions/clipped_ratio": 0.0401785714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3858.0,
      "completions/mean_length": 703.0636596679688,
      "completions/mean_terminated_length": 561.03369140625,
      "completions/min_length": 142.0,
      "completions/min_terminated_length": 142.0,
      "epoch": 3.5411078717201168,
      "grad_norm": 0.21019017696380615,
      "learning_rate": 1e-06,
      "loss": -0.0123,
      "num_tokens": 220913558.0,
      "reward": 0.6495535969734192,
      "reward_std": 0.17235782742500305,
      "rewards/verify_math_reward/mean": 0.6495535969734192,
      "rewards/verify_math_reward/std": 0.47737622261047363,
      "step": 379
    },
    {
      "clip_ratio/high_max": 0.0021704917089664377,
      "clip_ratio/high_mean": 0.0008289779198094038,
      "clip_ratio/low_mean": 0.0005503197139660188,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0013792976242257282,
      "completions/clipped_ratio": 0.056919642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4065.0,
      "completions/mean_length": 758.1506958007812,
      "completions/mean_terminated_length": 556.6946411132812,
      "completions/min_length": 145.0,
      "completions/min_terminated_length": 145.0,
      "epoch": 3.5504373177842563,
      "grad_norm": 0.1875956654548645,
      "learning_rate": 1e-06,
      "loss": -0.013,
      "num_tokens": 221484045.0,
      "reward": 0.5881696939468384,
      "reward_std": 0.16822652518749237,
      "rewards/verify_math_reward/mean": 0.5881696343421936,
      "rewards/verify_math_reward/std": 0.4924395978450775,
      "step": 380
    },
    {
      "clip_ratio/high_max": 0.0017003353750624228,
      "clip_ratio/high_mean": 0.0006937202306289691,
      "clip_ratio/low_mean": 0.0005314323825587053,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012251526059117168,
      "completions/clipped_ratio": 0.0379464285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3164.0,
      "completions/mean_length": 674.8828125,
      "completions/mean_terminated_length": 539.943115234375,
      "completions/min_length": 155.0,
      "completions/min_terminated_length": 155.0,
      "epoch": 3.5597667638483967,
      "grad_norm": 0.19282668828964233,
      "learning_rate": 1e-06,
      "loss": -0.0275,
      "num_tokens": 222032708.0,
      "reward": 0.6852678656578064,
      "reward_std": 0.17795519530773163,
      "rewards/verify_math_reward/mean": 0.6852678656578064,
      "rewards/verify_math_reward/std": 0.46466848254203796,
      "step": 381
    },
    {
      "clip_ratio/high_max": 0.002126079736626707,
      "clip_ratio/high_mean": 0.0008324973296112148,
      "clip_ratio/low_mean": 0.0005921694246353582,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0014246667305997107,
      "completions/clipped_ratio": 0.0580357142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1797.0,
      "completions/mean_length": 735.1629638671875,
      "completions/mean_terminated_length": 528.09716796875,
      "completions/min_length": 147.0,
      "completions/min_terminated_length": 147.0,
      "epoch": 3.5690962099125363,
      "grad_norm": 0.20518554747104645,
      "learning_rate": 1e-06,
      "loss": -0.0371,
      "num_tokens": 222569254.0,
      "reward": 0.6104910969734192,
      "reward_std": 0.18193930387496948,
      "rewards/verify_math_reward/mean": 0.6104910969734192,
      "rewards/verify_math_reward/std": 0.48791128396987915,
      "step": 382
    },
    {
      "clip_ratio/high_max": 0.0016663078095007222,
      "clip_ratio/high_mean": 0.0006392496961780125,
      "clip_ratio/low_mean": 0.0004713299344984989,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011105796256742906,
      "completions/clipped_ratio": 0.033482142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3017.0,
      "completions/mean_length": 669.7098388671875,
      "completions/mean_terminated_length": 551.0161743164062,
      "completions/min_length": 104.0,
      "completions/min_terminated_length": 104.0,
      "epoch": 3.5784256559766763,
      "grad_norm": 0.182663694024086,
      "learning_rate": 1e-06,
      "loss": -0.0088,
      "num_tokens": 223142442.0,
      "reward": 0.629464328289032,
      "reward_std": 0.1594713181257248,
      "rewards/verify_math_reward/mean": 0.6294642686843872,
      "rewards/verify_math_reward/std": 0.4832179844379425,
      "step": 383
    },
    {
      "clip_ratio/high_max": 0.0018950456214952283,
      "clip_ratio/high_mean": 0.0006564268132933648,
      "clip_ratio/low_mean": 0.0005129431101522641,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011693699016177561,
      "completions/clipped_ratio": 0.0479910714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1848.0,
      "completions/mean_length": 691.3158569335938,
      "completions/mean_terminated_length": 519.6846313476562,
      "completions/min_length": 144.0,
      "completions/min_terminated_length": 144.0,
      "epoch": 3.5877551020408163,
      "grad_norm": 0.17683209478855133,
      "learning_rate": 1e-06,
      "loss": -0.0094,
      "num_tokens": 223679253.0,
      "reward": 0.6506696939468384,
      "reward_std": 0.14597675204277039,
      "rewards/verify_math_reward/mean": 0.6506696343421936,
      "rewards/verify_math_reward/std": 0.47702476382255554,
      "step": 384
    },
    {
      "clip_ratio/high_max": 0.0014951238190406002,
      "clip_ratio/high_mean": 0.0005862280304427259,
      "clip_ratio/low_mean": 0.0006051687805666006,
      "clip_ratio/low_min": 2.9754819479421712e-05,
      "clip_ratio/region_mean": 0.0011913968191947788,
      "completions/clipped_ratio": 0.0390625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2948.0,
      "completions/mean_length": 675.0692138671875,
      "completions/mean_terminated_length": 536.0069580078125,
      "completions/min_length": 157.0,
      "completions/min_terminated_length": 157.0,
      "epoch": 3.5970845481049563,
      "grad_norm": 0.1987566351890564,
      "learning_rate": 1e-06,
      "loss": -0.0011,
      "num_tokens": 224234027.0,
      "reward": 0.606026828289032,
      "reward_std": 0.15458042919635773,
      "rewards/verify_math_reward/mean": 0.6060267686843872,
      "rewards/verify_math_reward/std": 0.48890194296836853,
      "step": 385
    },
    {
      "clip_ratio/high_max": 0.001807525932235876,
      "clip_ratio/high_mean": 0.0007027957963146036,
      "clip_ratio/low_mean": 0.0005485896253958344,
      "clip_ratio/low_min": 2.8737048523908015e-05,
      "clip_ratio/region_mean": 0.0012513854071585229,
      "completions/clipped_ratio": 0.0390625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2665.0,
      "completions/mean_length": 698.5178833007812,
      "completions/mean_terminated_length": 560.4088134765625,
      "completions/min_length": 174.0,
      "completions/min_terminated_length": 174.0,
      "epoch": 3.6064139941690962,
      "grad_norm": 0.169415682554245,
      "learning_rate": 1e-06,
      "loss": -0.0158,
      "num_tokens": 224799827.0,
      "reward": 0.5803571939468384,
      "reward_std": 0.17153297364711761,
      "rewards/verify_math_reward/mean": 0.5803571343421936,
      "rewards/verify_math_reward/std": 0.4937761425971985,
      "step": 386
    },
    {
      "clip_ratio/high_max": 0.0017395130635122769,
      "clip_ratio/high_mean": 0.0005787375794170657,
      "clip_ratio/low_mean": 0.0004144340646234923,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009931716522260103,
      "completions/clipped_ratio": 0.0357142857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2454.0,
      "completions/mean_length": 700.8873291015625,
      "completions/mean_terminated_length": 575.1423950195312,
      "completions/min_length": 170.0,
      "completions/min_terminated_length": 170.0,
      "epoch": 3.6157434402332362,
      "grad_norm": 0.1567336469888687,
      "learning_rate": 1e-06,
      "loss": -0.0163,
      "num_tokens": 225387134.0,
      "reward": 0.6640625,
      "reward_std": 0.1749839186668396,
      "rewards/verify_math_reward/mean": 0.6640625,
      "rewards/verify_math_reward/std": 0.4725809693336487,
      "step": 387
    },
    {
      "clip_ratio/high_max": 0.0016087754302134272,
      "clip_ratio/high_mean": 0.0006124790961621329,
      "clip_ratio/low_mean": 0.0005558429238590179,
      "clip_ratio/low_min": 2.7581641916185617e-05,
      "clip_ratio/region_mean": 0.0011683220400300343,
      "completions/clipped_ratio": 0.0435267857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2574.0,
      "completions/mean_length": 723.3928833007812,
      "completions/mean_terminated_length": 569.9136352539062,
      "completions/min_length": 143.0,
      "completions/min_terminated_length": 143.0,
      "epoch": 3.6250728862973762,
      "grad_norm": 0.1619538515806198,
      "learning_rate": 1e-06,
      "loss": -0.0034,
      "num_tokens": 225964886.0,
      "reward": 0.566964328289032,
      "reward_std": 0.18163184821605682,
      "rewards/verify_math_reward/mean": 0.5669642686843872,
      "rewards/verify_math_reward/std": 0.49577224254608154,
      "step": 388
    },
    {
      "clip_ratio/high_max": 0.0016635123356536496,
      "clip_ratio/high_mean": 0.0006454615149777965,
      "clip_ratio/low_mean": 0.0005227474130151677,
      "clip_ratio/low_min": 1.8667860786081292e-05,
      "clip_ratio/region_mean": 0.0011682089389069006,
      "completions/clipped_ratio": 0.024553571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4074.0,
      "completions/mean_length": 595.5178833007812,
      "completions/mean_terminated_length": 507.4049987792969,
      "completions/min_length": 112.0,
      "completions/min_terminated_length": 112.0,
      "epoch": 3.6344023323615158,
      "grad_norm": 0.1991020143032074,
      "learning_rate": 1e-06,
      "loss": 0.0087,
      "num_tokens": 226497678.0,
      "reward": 0.652901828289032,
      "reward_std": 0.16085928678512573,
      "rewards/verify_math_reward/mean": 0.6529017686843872,
      "rewards/verify_math_reward/std": 0.47631320357322693,
      "step": 389
    },
    {
      "clip_ratio/high_max": 0.002146697115676943,
      "clip_ratio/high_mean": 0.0007646301892236806,
      "clip_ratio/low_mean": 0.0006863439302833285,
      "clip_ratio/low_min": 5.6588668485346716e-05,
      "clip_ratio/region_mean": 0.001450974123144988,
      "completions/clipped_ratio": 0.046875,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2704.0,
      "completions/mean_length": 743.0045166015625,
      "completions/mean_terminated_length": 578.10302734375,
      "completions/min_length": 163.0,
      "completions/min_terminated_length": 163.0,
      "epoch": 3.643731778425656,
      "grad_norm": 0.19823414087295532,
      "learning_rate": 1e-06,
      "loss": -0.0203,
      "num_tokens": 227071610.0,
      "reward": 0.559151828289032,
      "reward_std": 0.1804393231868744,
      "rewards/verify_math_reward/mean": 0.5591517686843872,
      "rewards/verify_math_reward/std": 0.496766060590744,
      "step": 390
    },
    {
      "clip_ratio/high_max": 0.001862110133515671,
      "clip_ratio/high_mean": 0.0006820079397584777,
      "clip_ratio/low_mean": 0.0004486233756324509,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011306313062959816,
      "completions/clipped_ratio": 0.0390625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3934.0,
      "completions/mean_length": 705.8560791015625,
      "completions/mean_terminated_length": 568.0452880859375,
      "completions/min_length": 154.0,
      "completions/min_terminated_length": 154.0,
      "epoch": 3.6530612244897958,
      "grad_norm": 0.17820972204208374,
      "learning_rate": 1e-06,
      "loss": -0.0092,
      "num_tokens": 227647241.0,
      "reward": 0.652901828289032,
      "reward_std": 0.1803637593984604,
      "rewards/verify_math_reward/mean": 0.6529017686843872,
      "rewards/verify_math_reward/std": 0.47631320357322693,
      "step": 391
    },
    {
      "clip_ratio/high_max": 0.001610847579286201,
      "clip_ratio/high_mean": 0.0005727976622438291,
      "clip_ratio/low_mean": 0.00037567990921161254,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009484775582677685,
      "completions/clipped_ratio": 0.052455357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3027.0,
      "completions/mean_length": 757.7767944335938,
      "completions/mean_terminated_length": 572.9752807617188,
      "completions/min_length": 138.0,
      "completions/min_terminated_length": 138.0,
      "epoch": 3.6623906705539357,
      "grad_norm": 0.15047359466552734,
      "learning_rate": 1e-06,
      "loss": -0.013,
      "num_tokens": 228232681.0,
      "reward": 0.5792410969734192,
      "reward_std": 0.12238264083862305,
      "rewards/verify_math_reward/mean": 0.5792410969734192,
      "rewards/verify_math_reward/std": 0.49395665526390076,
      "step": 392
    },
    {
      "clip_ratio/high_max": 0.0017563890069141053,
      "clip_ratio/high_mean": 0.0006777052003599238,
      "clip_ratio/low_mean": 0.00045894521463196725,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011366504077159334,
      "completions/clipped_ratio": 0.0345982142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3657.0,
      "completions/mean_length": 663.0535888671875,
      "completions/mean_terminated_length": 540.0231323242188,
      "completions/min_length": 101.0,
      "completions/min_terminated_length": 101.0,
      "epoch": 3.6717201166180757,
      "grad_norm": 0.17460283637046814,
      "learning_rate": 1e-06,
      "loss": -0.0155,
      "num_tokens": 228794985.0,
      "reward": 0.6629464626312256,
      "reward_std": 0.16337549686431885,
      "rewards/verify_math_reward/mean": 0.6629464030265808,
      "rewards/verify_math_reward/std": 0.47296738624572754,
      "step": 393
    },
    {
      "clip_ratio/high_max": 0.0020423363166628405,
      "clip_ratio/high_mean": 0.0008425131418334786,
      "clip_ratio/low_mean": 0.0005316771175785107,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0013741902839683462,
      "completions/clipped_ratio": 0.0390625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3771.0,
      "completions/mean_length": 709.935302734375,
      "completions/mean_terminated_length": 572.2903442382812,
      "completions/min_length": 161.0,
      "completions/min_terminated_length": 161.0,
      "epoch": 3.6810495626822157,
      "grad_norm": 0.20186789333820343,
      "learning_rate": 1e-06,
      "loss": -0.0062,
      "num_tokens": 229380255.0,
      "reward": 0.6127232313156128,
      "reward_std": 0.17036226391792297,
      "rewards/verify_math_reward/mean": 0.6127232313156128,
      "rewards/verify_math_reward/std": 0.4873998463153839,
      "step": 394
    },
    {
      "clip_ratio/high_max": 0.0013513179546862375,
      "clip_ratio/high_mean": 0.0005541715963772731,
      "clip_ratio/low_mean": 0.0005126830387780501,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001066854631062597,
      "completions/clipped_ratio": 0.041294642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4068.0,
      "completions/mean_length": 734.099365234375,
      "completions/mean_terminated_length": 589.291015625,
      "completions/min_length": 138.0,
      "completions/min_terminated_length": 138.0,
      "epoch": 3.6903790087463557,
      "grad_norm": 0.17814481258392334,
      "learning_rate": 1e-06,
      "loss": -0.0059,
      "num_tokens": 229970680.0,
      "reward": 0.5703125,
      "reward_std": 0.16191193461418152,
      "rewards/verify_math_reward/mean": 0.5703125,
      "rewards/verify_math_reward/std": 0.49530795216560364,
      "step": 395
    },
    {
      "clip_ratio/high_max": 0.0018220946731162257,
      "clip_ratio/high_mean": 0.0007106859047780745,
      "clip_ratio/low_mean": 0.0006099286265452974,
      "clip_ratio/low_min": 3.661535811261274e-05,
      "clip_ratio/region_mean": 0.0013206145376898348,
      "completions/clipped_ratio": 0.046875,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3554.0,
      "completions/mean_length": 713.458740234375,
      "completions/mean_terminated_length": 547.1041870117188,
      "completions/min_length": 157.0,
      "completions/min_terminated_length": 157.0,
      "epoch": 3.6997084548104957,
      "grad_norm": 0.2087281048297882,
      "learning_rate": 1e-06,
      "loss": -0.0146,
      "num_tokens": 230526851.0,
      "reward": 0.6049107313156128,
      "reward_std": 0.1885194182395935,
      "rewards/verify_math_reward/mean": 0.6049107313156128,
      "rewards/verify_math_reward/std": 0.48914292454719543,
      "step": 396
    },
    {
      "clip_ratio/high_max": 0.0017975419541471638,
      "clip_ratio/high_mean": 0.0007312477719096933,
      "clip_ratio/low_mean": 0.0007087690391927026,
      "clip_ratio/low_min": 1.3766520169156138e-05,
      "clip_ratio/region_mean": 0.0014400167638086714,
      "completions/clipped_ratio": 0.0390625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3790.0,
      "completions/mean_length": 660.8460083007812,
      "completions/mean_terminated_length": 521.20556640625,
      "completions/min_length": 126.0,
      "completions/min_terminated_length": 126.0,
      "epoch": 3.7090379008746357,
      "grad_norm": 0.25244954228401184,
      "learning_rate": 1e-06,
      "loss": -0.0191,
      "num_tokens": 231061969.0,
      "reward": 0.6238839626312256,
      "reward_std": 0.2110590636730194,
      "rewards/verify_math_reward/mean": 0.6238839030265808,
      "rewards/verify_math_reward/std": 0.4846802353858948,
      "step": 397
    },
    {
      "clip_ratio/high_max": 0.0019930917042074725,
      "clip_ratio/high_mean": 0.0007006300147622824,
      "clip_ratio/low_mean": 0.0005934465352765983,
      "clip_ratio/low_min": 1.5621095371898264e-05,
      "clip_ratio/region_mean": 0.0012940765591338277,
      "completions/clipped_ratio": 0.0591517857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2275.0,
      "completions/mean_length": 792.5078735351562,
      "completions/mean_terminated_length": 584.81494140625,
      "completions/min_length": 139.0,
      "completions/min_terminated_length": 139.0,
      "epoch": 3.7183673469387752,
      "grad_norm": 0.1891532838344574,
      "learning_rate": 1e-06,
      "loss": -0.0167,
      "num_tokens": 231645584.0,
      "reward": 0.5636160969734192,
      "reward_std": 0.17934459447860718,
      "rewards/verify_math_reward/mean": 0.5636160969734192,
      "rewards/verify_math_reward/std": 0.49621346592903137,
      "step": 398
    },
    {
      "clip_ratio/high_max": 0.0015383086665679002,
      "clip_ratio/high_mean": 0.0005625836811304907,
      "clip_ratio/low_mean": 0.0005180119342185208,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010805956153490115,
      "completions/clipped_ratio": 0.03125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1958.0,
      "completions/mean_length": 624.609375,
      "completions/mean_terminated_length": 512.6290283203125,
      "completions/min_length": 150.0,
      "completions/min_terminated_length": 150.0,
      "epoch": 3.7276967930029157,
      "grad_norm": 0.3193029463291168,
      "learning_rate": 1e-06,
      "loss": -0.002,
      "num_tokens": 232170738.0,
      "reward": 0.684151828289032,
      "reward_std": 0.1433078944683075,
      "rewards/verify_math_reward/mean": 0.6841517686843872,
      "rewards/verify_math_reward/std": 0.4651124179363251,
      "step": 399
    },
    {
      "clip_ratio/high_max": 0.001933300263772253,
      "clip_ratio/high_mean": 0.0007834839052520692,
      "clip_ratio/low_mean": 0.0006117325256127515,
      "clip_ratio/low_min": 1.5184645235422067e-05,
      "clip_ratio/region_mean": 0.0013952164445072412,
      "completions/clipped_ratio": 0.0401785714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3498.0,
      "completions/mean_length": 702.0703735351562,
      "completions/mean_terminated_length": 559.9988403320312,
      "completions/min_length": 129.0,
      "completions/min_terminated_length": 129.0,
      "epoch": 3.7370262390670552,
      "grad_norm": 0.19785086810588837,
      "learning_rate": 1e-06,
      "loss": -0.0234,
      "num_tokens": 232732321.0,
      "reward": 0.6785714626312256,
      "reward_std": 0.1814924031496048,
      "rewards/verify_math_reward/mean": 0.6785714030265808,
      "rewards/verify_math_reward/std": 0.46728572249412537,
      "step": 400
    },
    {
      "clip_ratio/high_max": 0.0018581328040454537,
      "clip_ratio/high_mean": 0.0007588266589664272,
      "clip_ratio/low_mean": 0.00046630068300146377,
      "clip_ratio/low_min": 1.549330772832036e-05,
      "clip_ratio/region_mean": 0.001225127351062838,
      "completions/clipped_ratio": 0.041294642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3813.0,
      "completions/mean_length": 707.1842041015625,
      "completions/mean_terminated_length": 561.216552734375,
      "completions/min_length": 177.0,
      "completions/min_terminated_length": 177.0,
      "epoch": 3.746355685131195,
      "grad_norm": 0.18901850283145905,
      "learning_rate": 1e-06,
      "loss": 0.0142,
      "num_tokens": 233296070.0,
      "reward": 0.668526828289032,
      "reward_std": 0.1643964797258377,
      "rewards/verify_math_reward/mean": 0.6685267686843872,
      "rewards/verify_math_reward/std": 0.4710056483745575,
      "step": 401
    },
    {
      "clip_ratio/high_max": 0.0014970728334446903,
      "clip_ratio/high_mean": 0.0005708990856874152,
      "clip_ratio/low_mean": 0.0003868023632094264,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009577014643582515,
      "completions/clipped_ratio": 0.0279017857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2382.0,
      "completions/mean_length": 673.4609375,
      "completions/mean_terminated_length": 575.2250366210938,
      "completions/min_length": 130.0,
      "completions/min_terminated_length": 130.0,
      "epoch": 3.755685131195335,
      "grad_norm": 0.1590486615896225,
      "learning_rate": 1e-06,
      "loss": -0.0047,
      "num_tokens": 233882291.0,
      "reward": 0.6517857313156128,
      "reward_std": 0.13151763379573822,
      "rewards/verify_math_reward/mean": 0.6517857313156128,
      "rewards/verify_math_reward/std": 0.47667041420936584,
      "step": 402
    },
    {
      "clip_ratio/high_max": 0.001991315984923858,
      "clip_ratio/high_mean": 0.0007662270454602549,
      "clip_ratio/low_mean": 0.0005122620159454527,
      "clip_ratio/low_min": 1.4236901733966079e-05,
      "clip_ratio/region_mean": 0.001278489078686107,
      "completions/clipped_ratio": 0.0390625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3048.0,
      "completions/mean_length": 701.0078735351562,
      "completions/mean_terminated_length": 563.0,
      "completions/min_length": 109.0,
      "completions/min_terminated_length": 109.0,
      "epoch": 3.765014577259475,
      "grad_norm": 0.28080618381500244,
      "learning_rate": 1e-06,
      "loss": -0.0216,
      "num_tokens": 234447778.0,
      "reward": 0.6305803656578064,
      "reward_std": 0.1858833283185959,
      "rewards/verify_math_reward/mean": 0.6305803656578064,
      "rewards/verify_math_reward/std": 0.4829172194004059,
      "step": 403
    },
    {
      "clip_ratio/high_max": 0.0014330903650261462,
      "clip_ratio/high_mean": 0.0005325768388502183,
      "clip_ratio/low_mean": 0.00043402173287176993,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009665985562605783,
      "completions/clipped_ratio": 0.0636160714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3895.0,
      "completions/mean_length": 805.9397583007812,
      "completions/mean_terminated_length": 582.4195556640625,
      "completions/min_length": 166.0,
      "completions/min_terminated_length": 166.0,
      "epoch": 3.774344023323615,
      "grad_norm": 0.17053110897541046,
      "learning_rate": 1e-06,
      "loss": -0.0198,
      "num_tokens": 235021012.0,
      "reward": 0.6506696939468384,
      "reward_std": 0.12610390782356262,
      "rewards/verify_math_reward/mean": 0.6506696343421936,
      "rewards/verify_math_reward/std": 0.47702476382255554,
      "step": 404
    },
    {
      "clip_ratio/high_max": 0.0013069679553154856,
      "clip_ratio/high_mean": 0.0005528732399397995,
      "clip_ratio/low_mean": 0.0005487712951435242,
      "clip_ratio/low_min": 2.5991384973167442e-05,
      "clip_ratio/region_mean": 0.0011016445241693873,
      "completions/clipped_ratio": 0.033482142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3910.0,
      "completions/mean_length": 703.6641235351562,
      "completions/mean_terminated_length": 586.1466674804688,
      "completions/min_length": 146.0,
      "completions/min_terminated_length": 146.0,
      "epoch": 3.783673469387755,
      "grad_norm": 0.17060096561908722,
      "learning_rate": 1e-06,
      "loss": -0.0069,
      "num_tokens": 235629927.0,
      "reward": 0.5915178656578064,
      "reward_std": 0.16296502947807312,
      "rewards/verify_math_reward/mean": 0.5915178656578064,
      "rewards/verify_math_reward/std": 0.49182769656181335,
      "step": 405
    },
    {
      "clip_ratio/high_max": 0.0020681845235230867,
      "clip_ratio/high_mean": 0.0006885394050186733,
      "clip_ratio/low_mean": 0.00044127773708169116,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011298171448288485,
      "completions/clipped_ratio": 0.0558035714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3542.0,
      "completions/mean_length": 738.8973388671875,
      "completions/mean_terminated_length": 540.4869995117188,
      "completions/min_length": 151.0,
      "completions/min_terminated_length": 151.0,
      "epoch": 3.793002915451895,
      "grad_norm": 0.1886061578989029,
      "learning_rate": 1e-06,
      "loss": -0.0212,
      "num_tokens": 236167043.0,
      "reward": 0.5870535969734192,
      "reward_std": 0.15930767357349396,
      "rewards/verify_math_reward/mean": 0.5870535969734192,
      "rewards/verify_math_reward/std": 0.49263837933540344,
      "step": 406
    },
    {
      "clip_ratio/high_max": 0.001883883582195267,
      "clip_ratio/high_mean": 0.000595442439589533,
      "clip_ratio/low_mean": 0.000400367771362653,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009958102054952178,
      "completions/clipped_ratio": 0.036830357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3756.0,
      "completions/mean_length": 708.7444458007812,
      "completions/mean_terminated_length": 579.2201538085938,
      "completions/min_length": 133.0,
      "completions/min_terminated_length": 133.0,
      "epoch": 3.8023323615160347,
      "grad_norm": 0.1705055981874466,
      "learning_rate": 1e-06,
      "loss": -0.0111,
      "num_tokens": 236748742.0,
      "reward": 0.6618303656578064,
      "reward_std": 0.15382862091064453,
      "rewards/verify_math_reward/mean": 0.6618303656578064,
      "rewards/verify_math_reward/std": 0.4733508229255676,
      "step": 407
    },
    {
      "clip_ratio/high_max": 0.0020897837785014417,
      "clip_ratio/high_mean": 0.0007634654139110353,
      "clip_ratio/low_mean": 0.0004084199117642129,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011718853420461528,
      "completions/clipped_ratio": 0.041294642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2886.0,
      "completions/mean_length": 670.654052734375,
      "completions/mean_terminated_length": 523.1129150390625,
      "completions/min_length": 129.0,
      "completions/min_terminated_length": 129.0,
      "epoch": 3.811661807580175,
      "grad_norm": 0.18541960418224335,
      "learning_rate": 1e-06,
      "loss": -0.0302,
      "num_tokens": 237277168.0,
      "reward": 0.6428571939468384,
      "reward_std": 0.138014018535614,
      "rewards/verify_math_reward/mean": 0.6428571343421936,
      "rewards/verify_math_reward/std": 0.4794250428676605,
      "step": 408
    },
    {
      "clip_ratio/high_max": 0.001379933954012813,
      "clip_ratio/high_mean": 0.0005394819563662168,
      "clip_ratio/low_mean": 0.0004734645453936537,
      "clip_ratio/low_min": 1.4165911125019193e-05,
      "clip_ratio/region_mean": 0.0010129465135833016,
      "completions/clipped_ratio": 0.0736607142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3088.0,
      "completions/mean_length": 872.3069458007812,
      "completions/mean_terminated_length": 615.965087890625,
      "completions/min_length": 125.0,
      "completions/min_terminated_length": 125.0,
      "epoch": 3.8209912536443147,
      "grad_norm": 0.16116270422935486,
      "learning_rate": 1e-06,
      "loss": -0.0308,
      "num_tokens": 237874883.0,
      "reward": 0.5714285969734192,
      "reward_std": 0.1557818502187729,
      "rewards/verify_math_reward/mean": 0.5714285969734192,
      "rewards/verify_math_reward/std": 0.49514803290367126,
      "step": 409
    },
    {
      "clip_ratio/high_max": 0.001288067122004577,
      "clip_ratio/high_mean": 0.0004187676495348569,
      "clip_ratio/low_mean": 0.00037481185336218914,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000793579518358456,
      "completions/clipped_ratio": 0.052455357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3016.0,
      "completions/mean_length": 756.2656860351562,
      "completions/mean_terminated_length": 571.3804931640625,
      "completions/min_length": 120.0,
      "completions/min_terminated_length": 120.0,
      "epoch": 3.8303206997084547,
      "grad_norm": 0.12468872219324112,
      "learning_rate": 1e-06,
      "loss": -0.0131,
      "num_tokens": 238451505.0,
      "reward": 0.5602678656578064,
      "reward_std": 0.10521144419908524,
      "rewards/verify_math_reward/mean": 0.5602678656578064,
      "rewards/verify_math_reward/std": 0.4966317117214203,
      "step": 410
    },
    {
      "clip_ratio/high_max": 0.0015861556566960644,
      "clip_ratio/high_mean": 0.0005598666220976156,
      "clip_ratio/low_mean": 0.0004007624011137523,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000960629044129746,
      "completions/clipped_ratio": 0.060267857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2859.0,
      "completions/mean_length": 749.4386596679688,
      "completions/mean_terminated_length": 534.8135375976562,
      "completions/min_length": 140.0,
      "completions/min_terminated_length": 140.0,
      "epoch": 3.8396501457725947,
      "grad_norm": 0.17238463461399078,
      "learning_rate": 1e-06,
      "loss": -0.0163,
      "num_tokens": 238986634.0,
      "reward": 0.6428571939468384,
      "reward_std": 0.1530490666627884,
      "rewards/verify_math_reward/mean": 0.6428571343421936,
      "rewards/verify_math_reward/std": 0.47942501306533813,
      "step": 411
    },
    {
      "clip_ratio/high_max": 0.0016027354431571439,
      "clip_ratio/high_mean": 0.0006036079685145523,
      "clip_ratio/low_mean": 0.000477300554393878,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010809085179062095,
      "completions/clipped_ratio": 0.0558035714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4034.0,
      "completions/mean_length": 752.7120971679688,
      "completions/mean_terminated_length": 555.1182250976562,
      "completions/min_length": 139.0,
      "completions/min_terminated_length": 139.0,
      "epoch": 3.8489795918367347,
      "grad_norm": 0.17312484979629517,
      "learning_rate": 1e-06,
      "loss": -0.0068,
      "num_tokens": 239540840.0,
      "reward": 0.6082589626312256,
      "reward_std": 0.1429741084575653,
      "rewards/verify_math_reward/mean": 0.6082589030265808,
      "rewards/verify_math_reward/std": 0.4884119927883148,
      "step": 412
    },
    {
      "clip_ratio/high_max": 0.0014972918725106865,
      "clip_ratio/high_mean": 0.0005365095371416828,
      "clip_ratio/low_mean": 0.00033137091645585315,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000867880463374604,
      "completions/clipped_ratio": 0.056919642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3277.0,
      "completions/mean_length": 702.779052734375,
      "completions/mean_terminated_length": 497.9810485839844,
      "completions/min_length": 167.0,
      "completions/min_terminated_length": 167.0,
      "epoch": 3.8583090379008746,
      "grad_norm": 0.16142456233501434,
      "learning_rate": 1e-06,
      "loss": -0.0118,
      "num_tokens": 240040914.0,
      "reward": 0.707589328289032,
      "reward_std": 0.1360180377960205,
      "rewards/verify_math_reward/mean": 0.7075892686843872,
      "rewards/verify_math_reward/std": 0.45512402057647705,
      "step": 413
    },
    {
      "clip_ratio/high_max": 0.0019075731761404313,
      "clip_ratio/high_mean": 0.0007231353165479959,
      "clip_ratio/low_mean": 0.0005526428140001372,
      "clip_ratio/low_min": 1.667555989115499e-05,
      "clip_ratio/region_mean": 0.0012757781296386383,
      "completions/clipped_ratio": 0.0357142857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3487.0,
      "completions/mean_length": 689.9252319335938,
      "completions/mean_terminated_length": 563.7742919921875,
      "completions/min_length": 157.0,
      "completions/min_terminated_length": 157.0,
      "epoch": 3.8676384839650146,
      "grad_norm": 0.21427814662456512,
      "learning_rate": 1e-06,
      "loss": -0.0113,
      "num_tokens": 240623687.0,
      "reward": 0.6149553656578064,
      "reward_std": 0.17104512453079224,
      "rewards/verify_math_reward/mean": 0.6149553656578064,
      "rewards/verify_math_reward/std": 0.4868776500225067,
      "step": 414
    },
    {
      "clip_ratio/high_max": 0.001695656195806805,
      "clip_ratio/high_mean": 0.0006169801290525356,
      "clip_ratio/low_mean": 0.0005569700933847344,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001173950240627164,
      "completions/clipped_ratio": 0.0279017857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2344.0,
      "completions/mean_length": 608.1953125,
      "completions/mean_terminated_length": 508.0860900878906,
      "completions/min_length": 128.0,
      "completions/min_terminated_length": 128.0,
      "epoch": 3.8769679300291546,
      "grad_norm": 0.18354754149913788,
      "learning_rate": 1e-06,
      "loss": 0.0016,
      "num_tokens": 241150190.0,
      "reward": 0.6729910969734192,
      "reward_std": 0.1443602740764618,
      "rewards/verify_math_reward/mean": 0.6729910969734192,
      "rewards/verify_math_reward/std": 0.46938255429267883,
      "step": 415
    },
    {
      "clip_ratio/high_max": 0.002135642702342011,
      "clip_ratio/high_mean": 0.0007123456252884353,
      "clip_ratio/low_mean": 0.0005532885870707105,
      "clip_ratio/low_min": 1.5617191820638254e-05,
      "clip_ratio/region_mean": 0.001265634193259757,
      "completions/clipped_ratio": 0.0691964285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4077.0,
      "completions/mean_length": 834.4263916015625,
      "completions/mean_terminated_length": 591.959228515625,
      "completions/min_length": 185.0,
      "completions/min_terminated_length": 185.0,
      "epoch": 3.8862973760932946,
      "grad_norm": 0.18390852212905884,
      "learning_rate": 1e-06,
      "loss": -0.0231,
      "num_tokens": 241739988.0,
      "reward": 0.5546875,
      "reward_std": 0.17618604004383087,
      "rewards/verify_math_reward/mean": 0.5546875,
      "rewards/verify_math_reward/std": 0.4972778558731079,
      "step": 416
    },
    {
      "clip_ratio/high_max": 0.0016206954096560366,
      "clip_ratio/high_mean": 0.0007030477354419418,
      "clip_ratio/low_mean": 0.000554286551050609,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012573342792165931,
      "completions/clipped_ratio": 0.0535714285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3834.0,
      "completions/mean_length": 764.5960083007812,
      "completions/mean_terminated_length": 576.0259399414062,
      "completions/min_length": 146.0,
      "completions/min_terminated_length": 146.0,
      "epoch": 3.8956268221574346,
      "grad_norm": 0.2396124005317688,
      "learning_rate": 1e-06,
      "loss": -0.0254,
      "num_tokens": 242321666.0,
      "reward": 0.6004464626312256,
      "reward_std": 0.17615465819835663,
      "rewards/verify_math_reward/mean": 0.6004464030265808,
      "rewards/verify_math_reward/std": 0.49008017778396606,
      "step": 417
    },
    {
      "clip_ratio/high_max": 0.001999508596782107,
      "clip_ratio/high_mean": 0.0007815576373104705,
      "clip_ratio/low_mean": 0.0003721101156770601,
      "clip_ratio/low_min": 1.0111632036569063e-05,
      "clip_ratio/region_mean": 0.0011536677702679299,
      "completions/clipped_ratio": 0.0357142857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3993.0,
      "completions/mean_length": 677.3549194335938,
      "completions/mean_terminated_length": 550.7384033203125,
      "completions/min_length": 127.0,
      "completions/min_terminated_length": 127.0,
      "epoch": 3.904956268221574,
      "grad_norm": 0.1902942657470703,
      "learning_rate": 1e-06,
      "loss": -0.0215,
      "num_tokens": 242889016.0,
      "reward": 0.6383928656578064,
      "reward_std": 0.14943771064281464,
      "rewards/verify_math_reward/mean": 0.6383928656578064,
      "rewards/verify_math_reward/std": 0.4807341992855072,
      "step": 418
    },
    {
      "clip_ratio/high_max": 0.002159133549866965,
      "clip_ratio/high_mean": 0.000910751932678977,
      "clip_ratio/low_mean": 0.0006056313368389965,
      "clip_ratio/low_min": 1.3787778698315378e-05,
      "clip_ratio/region_mean": 0.0015163832795224153,
      "completions/clipped_ratio": 0.052455357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3032.0,
      "completions/mean_length": 792.075927734375,
      "completions/mean_terminated_length": 609.1731567382812,
      "completions/min_length": 130.0,
      "completions/min_terminated_length": 130.0,
      "epoch": 3.914285714285714,
      "grad_norm": 0.2009381502866745,
      "learning_rate": 1e-06,
      "loss": -0.036,
      "num_tokens": 243491412.0,
      "reward": 0.6194196939468384,
      "reward_std": 0.21451324224472046,
      "rewards/verify_math_reward/mean": 0.6194196343421936,
      "rewards/verify_math_reward/std": 0.48580074310302734,
      "step": 419
    },
    {
      "clip_ratio/high_max": 0.0014295754899649182,
      "clip_ratio/high_mean": 0.0005785217995253333,
      "clip_ratio/low_mean": 0.0005807519701193087,
      "clip_ratio/low_min": 1.7327418390777893e-05,
      "clip_ratio/region_mean": 0.0011592737682804,
      "completions/clipped_ratio": 0.049107142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1833.0,
      "completions/mean_length": 684.1674194335938,
      "completions/mean_terminated_length": 507.969482421875,
      "completions/min_length": 150.0,
      "completions/min_terminated_length": 150.0,
      "epoch": 3.923615160349854,
      "grad_norm": 0.19341816008090973,
      "learning_rate": 1e-06,
      "loss": -0.0156,
      "num_tokens": 244012114.0,
      "reward": 0.6774553656578064,
      "reward_std": 0.15634779632091522,
      "rewards/verify_math_reward/mean": 0.6774553656578064,
      "rewards/verify_math_reward/std": 0.4677111804485321,
      "step": 420
    },
    {
      "clip_ratio/high_max": 0.001651230275456328,
      "clip_ratio/high_mean": 0.0006677539759039064,
      "clip_ratio/low_mean": 0.0006492672764579765,
      "clip_ratio/low_min": 2.4468825358781032e-05,
      "clip_ratio/region_mean": 0.001317021255090367,
      "completions/clipped_ratio": 0.0669642857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4037.0,
      "completions/mean_length": 802.0904541015625,
      "completions/mean_terminated_length": 565.6853637695312,
      "completions/min_length": 149.0,
      "completions/min_terminated_length": 149.0,
      "epoch": 3.932944606413994,
      "grad_norm": 0.1774033010005951,
      "learning_rate": 1e-06,
      "loss": -0.0122,
      "num_tokens": 244578291.0,
      "reward": 0.574776828289032,
      "reward_std": 0.17468081414699554,
      "rewards/verify_math_reward/mean": 0.5747767686843872,
      "rewards/verify_math_reward/std": 0.49465295672416687,
      "step": 421
    },
    {
      "clip_ratio/high_max": 0.001811074347642716,
      "clip_ratio/high_mean": 0.0006715565041304217,
      "clip_ratio/low_mean": 0.0005162078393823322,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011877643264597282,
      "completions/clipped_ratio": 0.060267857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3190.0,
      "completions/mean_length": 793.2701416015625,
      "completions/mean_terminated_length": 581.4560546875,
      "completions/min_length": 134.0,
      "completions/min_terminated_length": 134.0,
      "epoch": 3.942274052478134,
      "grad_norm": 0.19825774431228638,
      "learning_rate": 1e-06,
      "loss": -0.0065,
      "num_tokens": 245153885.0,
      "reward": 0.6026785969734192,
      "reward_std": 0.16709676384925842,
      "rewards/verify_math_reward/mean": 0.6026785969734192,
      "rewards/verify_math_reward/std": 0.48961687088012695,
      "step": 422
    },
    {
      "clip_ratio/high_max": 0.001544920240121428,
      "clip_ratio/high_mean": 0.0006404981068044435,
      "clip_ratio/low_mean": 0.0004122852078580763,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010527833328524139,
      "completions/clipped_ratio": 0.0479910714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3552.0,
      "completions/mean_length": 747.935302734375,
      "completions/mean_terminated_length": 579.1582641601562,
      "completions/min_length": 152.0,
      "completions/min_terminated_length": 152.0,
      "epoch": 3.951603498542274,
      "grad_norm": 0.16005107760429382,
      "learning_rate": 1e-06,
      "loss": -0.0115,
      "num_tokens": 245739067.0,
      "reward": 0.6004464626312256,
      "reward_std": 0.13319942355155945,
      "rewards/verify_math_reward/mean": 0.6004464030265808,
      "rewards/verify_math_reward/std": 0.49008017778396606,
      "step": 423
    },
    {
      "clip_ratio/high_max": 0.001829434411774855,
      "clip_ratio/high_mean": 0.0006740614317095606,
      "clip_ratio/low_mean": 0.00046427523557213135,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011383366436348297,
      "completions/clipped_ratio": 0.0535714285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3272.0,
      "completions/mean_length": 704.7767944335938,
      "completions/mean_terminated_length": 512.8207397460938,
      "completions/min_length": 144.0,
      "completions/min_terminated_length": 144.0,
      "epoch": 3.960932944606414,
      "grad_norm": 0.1984466165304184,
      "learning_rate": 1e-06,
      "loss": -0.005,
      "num_tokens": 246256451.0,
      "reward": 0.6573660969734192,
      "reward_std": 0.15744110941886902,
      "rewards/verify_math_reward/mean": 0.6573660969734192,
      "rewards/verify_math_reward/std": 0.47485533356666565,
      "step": 424
    },
    {
      "clip_ratio/high_max": 0.0019211695980629884,
      "clip_ratio/high_mean": 0.0006977131324674701,
      "clip_ratio/low_mean": 0.0005139370568940649,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012116501566197257,
      "completions/clipped_ratio": 0.049107142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3219.0,
      "completions/mean_length": 736.3538208007812,
      "completions/mean_terminated_length": 562.8509521484375,
      "completions/min_length": 144.0,
      "completions/min_terminated_length": 144.0,
      "epoch": 3.970262390670554,
      "grad_norm": 0.18405428528785706,
      "learning_rate": 1e-06,
      "loss": -0.0206,
      "num_tokens": 246824592.0,
      "reward": 0.6205357313156128,
      "reward_std": 0.17325936257839203,
      "rewards/verify_math_reward/mean": 0.6205357313156128,
      "rewards/verify_math_reward/std": 0.4855247139930725,
      "step": 425
    },
    {
      "clip_ratio/high_max": 0.002113569153152639,
      "clip_ratio/high_mean": 0.0007154547784011811,
      "clip_ratio/low_mean": 0.0005296306944728713,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012450854774215259,
      "completions/clipped_ratio": 0.0424107142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3477.0,
      "completions/mean_length": 678.3248291015625,
      "completions/mean_terminated_length": 526.959228515625,
      "completions/min_length": 150.0,
      "completions/min_terminated_length": 150.0,
      "epoch": 3.979591836734694,
      "grad_norm": 0.19453385472297668,
      "learning_rate": 1e-06,
      "loss": -0.0031,
      "num_tokens": 247367003.0,
      "reward": 0.6439732313156128,
      "reward_std": 0.1684875339269638,
      "rewards/verify_math_reward/mean": 0.6439732313156128,
      "rewards/verify_math_reward/std": 0.47909072041511536,
      "step": 426
    },
    {
      "clip_ratio/high_max": 0.0016110108535940526,
      "clip_ratio/high_mean": 0.0006107969102231436,
      "clip_ratio/low_mean": 0.00043135071882716147,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010421476290503051,
      "completions/clipped_ratio": 0.0580357142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3254.0,
      "completions/mean_length": 754.3516235351562,
      "completions/mean_terminated_length": 548.468017578125,
      "completions/min_length": 152.0,
      "completions/min_terminated_length": 152.0,
      "epoch": 3.9889212827988336,
      "grad_norm": 0.16089829802513123,
      "learning_rate": 1e-06,
      "loss": -0.0195,
      "num_tokens": 247920278.0,
      "reward": 0.6696428656578064,
      "reward_std": 0.13230928778648376,
      "rewards/verify_math_reward/mean": 0.6696428656578064,
      "rewards/verify_math_reward/std": 0.47060438990592957,
      "step": 427
    },
    {
      "clip_ratio/high_max": 0.0024735098049859516,
      "clip_ratio/high_mean": 0.0009925615377142094,
      "clip_ratio/low_mean": 0.0007235122666315874,
      "clip_ratio/low_min": 3.9696005842415616e-05,
      "clip_ratio/region_mean": 0.0017160737843369134,
      "completions/clipped_ratio": 0.03693181818181823,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3544.0,
      "completions/mean_length": 668.04833984375,
      "completions/mean_terminated_length": 536.5928955078125,
      "completions/min_length": 184.0,
      "completions/min_terminated_length": 184.0,
      "epoch": 3.9982507288629736,
      "grad_norm": 0.21787001192569733,
      "learning_rate": 1e-06,
      "loss": -0.0179,
      "num_tokens": 248463154.0,
      "reward": 0.6774553656578064,
      "reward_std": 0.18806587159633636,
      "rewards/verify_math_reward/mean": 0.6774553656578064,
      "rewards/verify_math_reward/std": 0.4677111804485321,
      "step": 428
    },
    {
      "clip_ratio/high_max": 0.001550124095956562,
      "clip_ratio/high_mean": 0.0005623620854748879,
      "clip_ratio/low_mean": 0.0005324066460161703,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010947687478619628,
      "completions/clipped_ratio": 0.0580357142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3375.0,
      "completions/mean_length": 734.4710083007812,
      "completions/mean_terminated_length": 527.362548828125,
      "completions/min_length": 143.0,
      "completions/min_terminated_length": 143.0,
      "epoch": 4.0093294460641395,
      "grad_norm": 0.17575211822986603,
      "learning_rate": 1e-06,
      "loss": -0.0242,
      "num_tokens": 249002344.0,
      "reward": 0.660714328289032,
      "reward_std": 0.1356828510761261,
      "rewards/verify_math_reward/mean": 0.6607142686843872,
      "rewards/verify_math_reward/std": 0.4737313687801361,
      "step": 429
    },
    {
      "clip_ratio/high_max": 0.0018280293879797682,
      "clip_ratio/high_mean": 0.0006028579900885234,
      "clip_ratio/low_mean": 0.0006333552573778434,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012362132583803032,
      "completions/clipped_ratio": 0.0401785714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4068.0,
      "completions/mean_length": 697.0435791015625,
      "completions/mean_terminated_length": 554.7615966796875,
      "completions/min_length": 138.0,
      "completions/min_terminated_length": 138.0,
      "epoch": 4.01865889212828,
      "grad_norm": 0.17597052454948425,
      "learning_rate": 1e-06,
      "loss": -0.0081,
      "num_tokens": 249564175.0,
      "reward": 0.6127232313156128,
      "reward_std": 0.1612725704908371,
      "rewards/verify_math_reward/mean": 0.6127232313156128,
      "rewards/verify_math_reward/std": 0.4873998463153839,
      "step": 430
    },
    {
      "clip_ratio/high_max": 0.0018020729767158628,
      "clip_ratio/high_mean": 0.0007306219995371066,
      "clip_ratio/low_mean": 0.00048518850417167414,
      "clip_ratio/low_min": 3.35315507982159e-05,
      "clip_ratio/region_mean": 0.0012158105237176642,
      "completions/clipped_ratio": 0.0457589285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4057.0,
      "completions/mean_length": 736.1830444335938,
      "completions/mean_terminated_length": 575.0689697265625,
      "completions/min_length": 139.0,
      "completions/min_terminated_length": 139.0,
      "epoch": 4.0279883381924195,
      "grad_norm": 0.18957768380641937,
      "learning_rate": 1e-06,
      "loss": 0.0002,
      "num_tokens": 250134251.0,
      "reward": 0.629464328289032,
      "reward_std": 0.16491781175136566,
      "rewards/verify_math_reward/mean": 0.6294642686843872,
      "rewards/verify_math_reward/std": 0.4832179844379425,
      "step": 431
    },
    {
      "clip_ratio/high_max": 0.0016300738425343297,
      "clip_ratio/high_mean": 0.00061281691705517,
      "clip_ratio/low_mean": 0.0005833730356243905,
      "clip_ratio/low_min": 1.642143979552202e-05,
      "clip_ratio/region_mean": 0.0011961899435846135,
      "completions/clipped_ratio": 0.0379464285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3851.0,
      "completions/mean_length": 731.4297485351562,
      "completions/mean_terminated_length": 598.7203979492188,
      "completions/min_length": 151.0,
      "completions/min_terminated_length": 151.0,
      "epoch": 4.03731778425656,
      "grad_norm": 0.1931212693452835,
      "learning_rate": 1e-06,
      "loss": -0.0074,
      "num_tokens": 250736332.0,
      "reward": 0.6584821939468384,
      "reward_std": 0.18986491858959198,
      "rewards/verify_math_reward/mean": 0.6584821343421936,
      "rewards/verify_math_reward/std": 0.4744836091995239,
      "step": 432
    },
    {
      "clip_ratio/high_max": 0.001458179725887021,
      "clip_ratio/high_mean": 0.0005024361425967072,
      "clip_ratio/low_mean": 0.00032729567737987963,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008297318090626504,
      "completions/clipped_ratio": 0.0647321428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3925.0,
      "completions/mean_length": 832.6920166015625,
      "completions/mean_terminated_length": 606.83056640625,
      "completions/min_length": 149.0,
      "completions/min_terminated_length": 149.0,
      "epoch": 4.0466472303206995,
      "grad_norm": 0.1558295339345932,
      "learning_rate": 1e-06,
      "loss": -0.0039,
      "num_tokens": 251329912.0,
      "reward": 0.5859375,
      "reward_std": 0.13185352087020874,
      "rewards/verify_math_reward/mean": 0.5859375,
      "rewards/verify_math_reward/std": 0.4928344786167145,
      "step": 433
    },
    {
      "clip_ratio/high_max": 0.0023373076001007576,
      "clip_ratio/high_mean": 0.0009709448768262519,
      "clip_ratio/low_mean": 0.0007500772353523644,
      "clip_ratio/low_min": 4.23151723225601e-05,
      "clip_ratio/region_mean": 0.0017210221267305315,
      "completions/clipped_ratio": 0.0881696428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4068.0,
      "completions/mean_length": 896.3917846679688,
      "completions/mean_terminated_length": 587.0048828125,
      "completions/min_length": 155.0,
      "completions/min_terminated_length": 155.0,
      "epoch": 4.05597667638484,
      "grad_norm": 0.2019355148077011,
      "learning_rate": 1e-06,
      "loss": -0.0426,
      "num_tokens": 251909367.0,
      "reward": 0.6004464626312256,
      "reward_std": 0.2145560085773468,
      "rewards/verify_math_reward/mean": 0.6004464030265808,
      "rewards/verify_math_reward/std": 0.49008017778396606,
      "step": 434
    },
    {
      "clip_ratio/high_max": 0.0020325570585555397,
      "clip_ratio/high_mean": 0.0007336153103096876,
      "clip_ratio/low_mean": 0.00047519568897769204,
      "clip_ratio/low_min": 2.19606463360833e-05,
      "clip_ratio/region_mean": 0.0012088110015611164,
      "completions/clipped_ratio": 0.0502232142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2026.0,
      "completions/mean_length": 680.1361694335938,
      "completions/mean_terminated_length": 499.5087890625,
      "completions/min_length": 134.0,
      "completions/min_terminated_length": 134.0,
      "epoch": 4.0653061224489795,
      "grad_norm": 0.1919316202402115,
      "learning_rate": 1e-06,
      "loss": -0.0255,
      "num_tokens": 252418449.0,
      "reward": 0.6718750596046448,
      "reward_std": 0.1665661334991455,
      "rewards/verify_math_reward/mean": 0.671875,
      "rewards/verify_math_reward/std": 0.46979284286499023,
      "step": 435
    },
    {
      "clip_ratio/high_max": 0.0017925076244864613,
      "clip_ratio/high_mean": 0.000628407547083043,
      "clip_ratio/low_mean": 0.0003784722875934676,
      "clip_ratio/low_min": 1.4898689187248237e-05,
      "clip_ratio/region_mean": 0.0010068798219435848,
      "completions/clipped_ratio": 0.0691964285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2798.0,
      "completions/mean_length": 791.8348388671875,
      "completions/mean_terminated_length": 546.2014770507812,
      "completions/min_length": 157.0,
      "completions/min_terminated_length": 157.0,
      "epoch": 4.07463556851312,
      "grad_norm": 0.1585756242275238,
      "learning_rate": 1e-06,
      "loss": -0.0252,
      "num_tokens": 252964285.0,
      "reward": 0.6629464626312256,
      "reward_std": 0.13455379009246826,
      "rewards/verify_math_reward/mean": 0.6629464030265808,
      "rewards/verify_math_reward/std": 0.47296738624572754,
      "step": 436
    },
    {
      "clip_ratio/high_max": 0.0019483662836137228,
      "clip_ratio/high_mean": 0.0008094949462247314,
      "clip_ratio/low_mean": 0.0006003248581691878,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.00140981977165211,
      "completions/clipped_ratio": 0.0658482142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2295.0,
      "completions/mean_length": 779.6495971679688,
      "completions/mean_terminated_length": 545.8804931640625,
      "completions/min_length": 162.0,
      "completions/min_terminated_length": 162.0,
      "epoch": 4.0839650145772595,
      "grad_norm": 0.21476420760154724,
      "learning_rate": 1e-06,
      "loss": -0.0162,
      "num_tokens": 253510843.0,
      "reward": 0.5814732313156128,
      "reward_std": 0.17577417194843292,
      "rewards/verify_math_reward/mean": 0.5814732313156128,
      "rewards/verify_math_reward/std": 0.4935929775238037,
      "step": 437
    },
    {
      "clip_ratio/high_max": 0.0018718587707553525,
      "clip_ratio/high_mean": 0.0007202738433988998,
      "clip_ratio/low_mean": 0.000541594497008191,
      "clip_ratio/low_min": 2.1724017642554827e-05,
      "clip_ratio/region_mean": 0.0012618683358596172,
      "completions/clipped_ratio": 0.0669642857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3965.0,
      "completions/mean_length": 828.8236694335938,
      "completions/mean_terminated_length": 594.3372802734375,
      "completions/min_length": 186.0,
      "completions/min_terminated_length": 186.0,
      "epoch": 4.093294460641399,
      "grad_norm": 0.19090571999549866,
      "learning_rate": 1e-06,
      "loss": -0.0169,
      "num_tokens": 254088917.0,
      "reward": 0.5803571939468384,
      "reward_std": 0.18577638268470764,
      "rewards/verify_math_reward/mean": 0.5803571343421936,
      "rewards/verify_math_reward/std": 0.4937761425971985,
      "step": 438
    },
    {
      "clip_ratio/high_max": 0.0016446651679871138,
      "clip_ratio/high_mean": 0.000559275911655277,
      "clip_ratio/low_mean": 0.00048501389574084897,
      "clip_ratio/low_min": 1.07499135992839e-05,
      "clip_ratio/region_mean": 0.001044289798301179,
      "completions/clipped_ratio": 0.0424107142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3640.0,
      "completions/mean_length": 748.8660888671875,
      "completions/mean_terminated_length": 600.6246948242188,
      "completions/min_length": 93.0,
      "completions/min_terminated_length": 93.0,
      "epoch": 4.1026239067055394,
      "grad_norm": 0.17010226845741272,
      "learning_rate": 1e-06,
      "loss": -0.0055,
      "num_tokens": 254694853.0,
      "reward": 0.5803571939468384,
      "reward_std": 0.16078469157218933,
      "rewards/verify_math_reward/mean": 0.5803571343421936,
      "rewards/verify_math_reward/std": 0.4937761425971985,
      "step": 439
    },
    {
      "clip_ratio/high_max": 0.0017630194415687583,
      "clip_ratio/high_mean": 0.0006545774831465678,
      "clip_ratio/low_mean": 0.0005907071354158688,
      "clip_ratio/low_min": 1.2150077964179218e-05,
      "clip_ratio/region_mean": 0.0012452845985535532,
      "completions/clipped_ratio": 0.0513392857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2877.0,
      "completions/mean_length": 786.872802734375,
      "completions/mean_terminated_length": 607.7905883789062,
      "completions/min_length": 171.0,
      "completions/min_terminated_length": 171.0,
      "epoch": 4.111953352769679,
      "grad_norm": 0.15964417159557343,
      "learning_rate": 1e-06,
      "loss": -0.0171,
      "num_tokens": 255293563.0,
      "reward": 0.5792410969734192,
      "reward_std": 0.16709743440151215,
      "rewards/verify_math_reward/mean": 0.5792410969734192,
      "rewards/verify_math_reward/std": 0.49395665526390076,
      "step": 440
    },
    {
      "clip_ratio/high_max": 0.0014676659920951352,
      "clip_ratio/high_mean": 0.0005042846823926084,
      "clip_ratio/low_mean": 0.0004731673179776408,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009774520021892386,
      "completions/clipped_ratio": 0.036830357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2953.0,
      "completions/mean_length": 705.7645263671875,
      "completions/mean_terminated_length": 576.1262817382812,
      "completions/min_length": 147.0,
      "completions/min_terminated_length": 147.0,
      "epoch": 4.121282798833819,
      "grad_norm": 0.21414446830749512,
      "learning_rate": 1e-06,
      "loss": -0.0131,
      "num_tokens": 255874480.0,
      "reward": 0.6808035969734192,
      "reward_std": 0.14199630916118622,
      "rewards/verify_math_reward/mean": 0.6808035969734192,
      "rewards/verify_math_reward/std": 0.46642565727233887,
      "step": 441
    },
    {
      "clip_ratio/high_max": 0.002008291019592434,
      "clip_ratio/high_mean": 0.0006626217946177348,
      "clip_ratio/low_mean": 0.0004880354131273634,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011506572445796337,
      "completions/clipped_ratio": 0.0390625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3138.0,
      "completions/mean_length": 720.8326416015625,
      "completions/mean_terminated_length": 583.6306762695312,
      "completions/min_length": 146.0,
      "completions/min_terminated_length": 146.0,
      "epoch": 4.130612244897959,
      "grad_norm": 0.1744566708803177,
      "learning_rate": 1e-06,
      "loss": -0.0206,
      "num_tokens": 256467282.0,
      "reward": 0.6160714626312256,
      "reward_std": 0.15849420428276062,
      "rewards/verify_math_reward/mean": 0.6160714030265808,
      "rewards/verify_math_reward/std": 0.486612468957901,
      "step": 442
    },
    {
      "clip_ratio/high_max": 0.0018489293870516121,
      "clip_ratio/high_mean": 0.000563837046684057,
      "clip_ratio/low_mean": 0.00033357295546920795,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008974099946499337,
      "completions/clipped_ratio": 0.046875,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2574.0,
      "completions/mean_length": 723.3482666015625,
      "completions/mean_terminated_length": 557.4800415039062,
      "completions/min_length": 128.0,
      "completions/min_terminated_length": 128.0,
      "epoch": 4.139941690962099,
      "grad_norm": 0.18050570785999298,
      "learning_rate": 1e-06,
      "loss": -0.0082,
      "num_tokens": 257041730.0,
      "reward": 0.6361607313156128,
      "reward_std": 0.12674400210380554,
      "rewards/verify_math_reward/mean": 0.6361607313156128,
      "rewards/verify_math_reward/std": 0.4813718795776367,
      "step": 443
    },
    {
      "clip_ratio/high_max": 0.0014826594124315307,
      "clip_ratio/high_mean": 0.0005463933921419084,
      "clip_ratio/low_mean": 0.0005113395827720524,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010577329594525509,
      "completions/clipped_ratio": 0.03125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2941.0,
      "completions/mean_length": 640.4096069335938,
      "completions/mean_terminated_length": 528.93896484375,
      "completions/min_length": 140.0,
      "completions/min_terminated_length": 140.0,
      "epoch": 4.149271137026239,
      "grad_norm": 0.1837472915649414,
      "learning_rate": 1e-06,
      "loss": -0.0145,
      "num_tokens": 257597953.0,
      "reward": 0.6305803656578064,
      "reward_std": 0.149361714720726,
      "rewards/verify_math_reward/mean": 0.6305803656578064,
      "rewards/verify_math_reward/std": 0.4829172194004059,
      "step": 444
    },
    {
      "clip_ratio/high_max": 0.0019662171325762756,
      "clip_ratio/high_mean": 0.0007879960066929925,
      "clip_ratio/low_mean": 0.0006120282741903793,
      "clip_ratio/low_min": 2.2731404897058383e-05,
      "clip_ratio/region_mean": 0.0014000243027112447,
      "completions/clipped_ratio": 0.0390625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3948.0,
      "completions/mean_length": 688.4174194335938,
      "completions/mean_terminated_length": 549.8977661132812,
      "completions/min_length": 143.0,
      "completions/min_terminated_length": 143.0,
      "epoch": 4.158600583090379,
      "grad_norm": 0.2023085504770279,
      "learning_rate": 1e-06,
      "loss": 0.0079,
      "num_tokens": 258162423.0,
      "reward": 0.6618303656578064,
      "reward_std": 0.1816311627626419,
      "rewards/verify_math_reward/mean": 0.6618303656578064,
      "rewards/verify_math_reward/std": 0.4733508229255676,
      "step": 445
    },
    {
      "clip_ratio/high_max": 0.0019811143902188633,
      "clip_ratio/high_mean": 0.0007271489575941814,
      "clip_ratio/low_mean": 0.0004393708222778514,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001166519767139107,
      "completions/clipped_ratio": 0.0435267857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4055.0,
      "completions/mean_length": 720.2924194335938,
      "completions/mean_terminated_length": 566.672119140625,
      "completions/min_length": 140.0,
      "completions/min_terminated_length": 140.0,
      "epoch": 4.167930029154519,
      "grad_norm": 0.17384913563728333,
      "learning_rate": 1e-06,
      "loss": -0.025,
      "num_tokens": 258734085.0,
      "reward": 0.652901828289032,
      "reward_std": 0.17525240778923035,
      "rewards/verify_math_reward/mean": 0.6529017686843872,
      "rewards/verify_math_reward/std": 0.47631320357322693,
      "step": 446
    },
    {
      "clip_ratio/high_max": 0.001937957677000668,
      "clip_ratio/high_mean": 0.0007817323348717764,
      "clip_ratio/low_mean": 0.0006255138932829141,
      "clip_ratio/low_min": 1.4595982975151855e-05,
      "clip_ratio/region_mean": 0.001407246232702164,
      "completions/clipped_ratio": 0.0591517857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3820.0,
      "completions/mean_length": 771.0078735351562,
      "completions/mean_terminated_length": 561.9632568359375,
      "completions/min_length": 153.0,
      "completions/min_terminated_length": 153.0,
      "epoch": 4.1772594752186585,
      "grad_norm": 0.24516253173351288,
      "learning_rate": 1e-06,
      "loss": -0.007,
      "num_tokens": 259298228.0,
      "reward": 0.6026785969734192,
      "reward_std": 0.175969198346138,
      "rewards/verify_math_reward/mean": 0.6026785969734192,
      "rewards/verify_math_reward/std": 0.48961687088012695,
      "step": 447
    },
    {
      "clip_ratio/high_max": 0.0022653146806987934,
      "clip_ratio/high_mean": 0.0007788074835843872,
      "clip_ratio/low_mean": 0.0004663111021727673,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012451185648387764,
      "completions/clipped_ratio": 0.0457589285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3996.0,
      "completions/mean_length": 714.8750610351562,
      "completions/mean_terminated_length": 552.7391967773438,
      "completions/min_length": 166.0,
      "completions/min_terminated_length": 166.0,
      "epoch": 4.186588921282799,
      "grad_norm": 0.16899371147155762,
      "learning_rate": 1e-06,
      "loss": -0.0336,
      "num_tokens": 259866924.0,
      "reward": 0.6462053656578064,
      "reward_std": 0.15210728347301483,
      "rewards/verify_math_reward/mean": 0.6462053656578064,
      "rewards/verify_math_reward/std": 0.478413462638855,
      "step": 448
    },
    {
      "clip_ratio/high_max": 0.0015653916780138388,
      "clip_ratio/high_mean": 0.0006049003204680048,
      "clip_ratio/low_mean": 0.0005498287810041802,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001154729110567132,
      "completions/clipped_ratio": 0.0625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3565.0,
      "completions/mean_length": 794.388427734375,
      "completions/mean_terminated_length": 574.2809448242188,
      "completions/min_length": 152.0,
      "completions/min_terminated_length": 152.0,
      "epoch": 4.1959183673469385,
      "grad_norm": 0.17593829333782196,
      "learning_rate": 1e-06,
      "loss": -0.0169,
      "num_tokens": 260438440.0,
      "reward": 0.6026785969734192,
      "reward_std": 0.16360443830490112,
      "rewards/verify_math_reward/mean": 0.6026785969734192,
      "rewards/verify_math_reward/std": 0.48961687088012695,
      "step": 449
    },
    {
      "clip_ratio/high_max": 0.0016926053067436442,
      "clip_ratio/high_mean": 0.000615798800936318,
      "clip_ratio/low_mean": 0.0006116657750681043,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012274645596335176,
      "completions/clipped_ratio": 0.0379464285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3470.0,
      "completions/mean_length": 694.6674194335938,
      "completions/mean_terminated_length": 560.5081176757812,
      "completions/min_length": 124.0,
      "completions/min_terminated_length": 124.0,
      "epoch": 4.205247813411079,
      "grad_norm": 0.2083657830953598,
      "learning_rate": 1e-06,
      "loss": -0.0093,
      "num_tokens": 261009934.0,
      "reward": 0.6272321939468384,
      "reward_std": 0.17021070420742035,
      "rewards/verify_math_reward/mean": 0.6272321343421936,
      "rewards/verify_math_reward/std": 0.4838111698627472,
      "step": 450
    },
    {
      "clip_ratio/high_max": 0.0014267964179452974,
      "clip_ratio/high_mean": 0.0005876838677068008,
      "clip_ratio/low_mean": 0.000573326185985934,
      "clip_ratio/low_min": 1.8355360225541517e-05,
      "clip_ratio/region_mean": 0.0011610100373218302,
      "completions/clipped_ratio": 0.03125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3799.0,
      "completions/mean_length": 685.5781860351562,
      "completions/mean_terminated_length": 575.5645141601562,
      "completions/min_length": 140.0,
      "completions/min_terminated_length": 140.0,
      "epoch": 4.214577259475218,
      "grad_norm": 0.17914150655269623,
      "learning_rate": 1e-06,
      "loss": -0.0089,
      "num_tokens": 261601044.0,
      "reward": 0.6540178656578064,
      "reward_std": 0.17472361028194427,
      "rewards/verify_math_reward/mean": 0.6540178656578064,
      "rewards/verify_math_reward/std": 0.4759531021118164,
      "step": 451
    },
    {
      "clip_ratio/high_max": 0.0016414470119343605,
      "clip_ratio/high_mean": 0.0005945230295765214,
      "clip_ratio/low_mean": 0.00056577136820124,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011602943850448355,
      "completions/clipped_ratio": 0.0725446428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3350.0,
      "completions/mean_length": 781.6707763671875,
      "completions/mean_terminated_length": 522.4271850585938,
      "completions/min_length": 148.0,
      "completions/min_terminated_length": 148.0,
      "epoch": 4.223906705539359,
      "grad_norm": 0.17691665887832642,
      "learning_rate": 1e-06,
      "loss": -0.0425,
      "num_tokens": 262122149.0,
      "reward": 0.6026785969734192,
      "reward_std": 0.16398167610168457,
      "rewards/verify_math_reward/mean": 0.6026785969734192,
      "rewards/verify_math_reward/std": 0.48961687088012695,
      "step": 452
    },
    {
      "clip_ratio/high_max": 0.002144000154657988,
      "clip_ratio/high_mean": 0.0008388274454773637,
      "clip_ratio/low_mean": 0.0005876320974493865,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001426459519279888,
      "completions/clipped_ratio": 0.0580357142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3902.0,
      "completions/mean_length": 814.6842041015625,
      "completions/mean_terminated_length": 612.517822265625,
      "completions/min_length": 156.0,
      "completions/min_terminated_length": 156.0,
      "epoch": 4.233236151603498,
      "grad_norm": 0.200413316488266,
      "learning_rate": 1e-06,
      "loss": -0.0175,
      "num_tokens": 262732994.0,
      "reward": 0.6082589626312256,
      "reward_std": 0.18303264677524567,
      "rewards/verify_math_reward/mean": 0.6082589030265808,
      "rewards/verify_math_reward/std": 0.48841196298599243,
      "step": 453
    },
    {
      "clip_ratio/high_max": 0.0018000014970311895,
      "clip_ratio/high_mean": 0.0007076233650877839,
      "clip_ratio/low_mean": 0.0004935935685352888,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012012169536319561,
      "completions/clipped_ratio": 0.052455357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3184.0,
      "completions/mean_length": 737.0692138671875,
      "completions/mean_terminated_length": 551.121337890625,
      "completions/min_length": 148.0,
      "completions/min_terminated_length": 148.0,
      "epoch": 4.242565597667639,
      "grad_norm": 0.19994544982910156,
      "learning_rate": 1e-06,
      "loss": -0.025,
      "num_tokens": 263297664.0,
      "reward": 0.6395089626312256,
      "reward_std": 0.1640922725200653,
      "rewards/verify_math_reward/mean": 0.6395089030265808,
      "rewards/verify_math_reward/std": 0.4804111421108246,
      "step": 454
    },
    {
      "clip_ratio/high_max": 0.0018318804432055913,
      "clip_ratio/high_mean": 0.0005455324644572102,
      "clip_ratio/low_mean": 0.0004073308882652782,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009528633672744036,
      "completions/clipped_ratio": 0.0435267857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4007.0,
      "completions/mean_length": 713.9207763671875,
      "completions/mean_terminated_length": 560.010498046875,
      "completions/min_length": 143.0,
      "completions/min_terminated_length": 143.0,
      "epoch": 4.251895043731778,
      "grad_norm": 0.1690525859594345,
      "learning_rate": 1e-06,
      "loss": -0.0173,
      "num_tokens": 263862577.0,
      "reward": 0.6674107313156128,
      "reward_std": 0.1351943016052246,
      "rewards/verify_math_reward/mean": 0.6674107313156128,
      "rewards/verify_math_reward/std": 0.47140392661094666,
      "step": 455
    },
    {
      "clip_ratio/high_max": 0.0020246798230800778,
      "clip_ratio/high_mean": 0.0007691207392781507,
      "clip_ratio/low_mean": 0.0006225039333003224,
      "clip_ratio/low_min": 3.5597511669038795e-05,
      "clip_ratio/region_mean": 0.001391624642565148,
      "completions/clipped_ratio": 0.0513392857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4034.0,
      "completions/mean_length": 730.9766235351562,
      "completions/mean_terminated_length": 548.869384765625,
      "completions/min_length": 141.0,
      "completions/min_terminated_length": 141.0,
      "epoch": 4.261224489795918,
      "grad_norm": 0.1988225132226944,
      "learning_rate": 1e-06,
      "loss": -0.0084,
      "num_tokens": 264413948.0,
      "reward": 0.6428571939468384,
      "reward_std": 0.1742018610239029,
      "rewards/verify_math_reward/mean": 0.6428571343421936,
      "rewards/verify_math_reward/std": 0.4794250428676605,
      "step": 456
    },
    {
      "clip_ratio/high_max": 0.001460505171053228,
      "clip_ratio/high_mean": 0.0005831749031131039,
      "clip_ratio/low_mean": 0.00040539355359214824,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009885684430628316,
      "completions/clipped_ratio": 0.0725446428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3958.0,
      "completions/mean_length": 821.3616333007812,
      "completions/mean_terminated_length": 565.22265625,
      "completions/min_length": 167.0,
      "completions/min_terminated_length": 167.0,
      "epoch": 4.270553935860058,
      "grad_norm": 0.16104991734027863,
      "learning_rate": 1e-06,
      "loss": -0.0332,
      "num_tokens": 264969280.0,
      "reward": 0.5959821939468384,
      "reward_std": 0.13429415225982666,
      "rewards/verify_math_reward/mean": 0.5959821343421936,
      "rewards/verify_math_reward/std": 0.490975022315979,
      "step": 457
    },
    {
      "clip_ratio/high_max": 0.0023104666615836322,
      "clip_ratio/high_mean": 0.0006289108641794883,
      "clip_ratio/low_mean": 0.0005199798324611038,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011488906711747404,
      "completions/clipped_ratio": 0.0691964285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2340.0,
      "completions/mean_length": 824.4319458007812,
      "completions/mean_terminated_length": 581.2218627929688,
      "completions/min_length": 143.0,
      "completions/min_terminated_length": 143.0,
      "epoch": 4.279883381924198,
      "grad_norm": 0.18824583292007446,
      "learning_rate": 1e-06,
      "loss": -0.0044,
      "num_tokens": 265539691.0,
      "reward": 0.6004464626312256,
      "reward_std": 0.1331673413515091,
      "rewards/verify_math_reward/mean": 0.6004464030265808,
      "rewards/verify_math_reward/std": 0.49008017778396606,
      "step": 458
    },
    {
      "clip_ratio/high_max": 0.002066777720756363,
      "clip_ratio/high_mean": 0.0007543249539594399,
      "clip_ratio/low_mean": 0.0003897678893736156,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011440928574302234,
      "completions/clipped_ratio": 0.0457589285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3074.0,
      "completions/mean_length": 735.521240234375,
      "completions/mean_terminated_length": 574.3754272460938,
      "completions/min_length": 138.0,
      "completions/min_terminated_length": 138.0,
      "epoch": 4.289212827988338,
      "grad_norm": 0.18381187319755554,
      "learning_rate": 1e-06,
      "loss": -0.0003,
      "num_tokens": 266119558.0,
      "reward": 0.6350446939468384,
      "reward_std": 0.1475597620010376,
      "rewards/verify_math_reward/mean": 0.6350446343421936,
      "rewards/verify_math_reward/std": 0.481686532497406,
      "step": 459
    },
    {
      "clip_ratio/high_max": 0.001597754508111393,
      "clip_ratio/high_mean": 0.0006955106946406886,
      "clip_ratio/low_mean": 0.000667080661514774,
      "clip_ratio/low_min": 1.5632816939614713e-05,
      "clip_ratio/region_mean": 0.0013625913888972718,
      "completions/clipped_ratio": 0.056919642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3586.0,
      "completions/mean_length": 809.3739013671875,
      "completions/mean_terminated_length": 611.0094604492188,
      "completions/min_length": 147.0,
      "completions/min_terminated_length": 147.0,
      "epoch": 4.298542274052478,
      "grad_norm": 0.173160120844841,
      "learning_rate": 1e-06,
      "loss": -0.0179,
      "num_tokens": 266736085.0,
      "reward": 0.578125,
      "reward_std": 0.1738220751285553,
      "rewards/verify_math_reward/mean": 0.578125,
      "rewards/verify_math_reward/std": 0.4941346049308777,
      "step": 460
    },
    {
      "clip_ratio/high_max": 0.0019878382809110917,
      "clip_ratio/high_mean": 0.0007721334459347418,
      "clip_ratio/low_mean": 0.0005932776821282459,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0013654111462528817,
      "completions/clipped_ratio": 0.0546875,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3248.0,
      "completions/mean_length": 778.5670166015625,
      "completions/mean_terminated_length": 586.6493530273438,
      "completions/min_length": 177.0,
      "completions/min_terminated_length": 177.0,
      "epoch": 4.307871720116618,
      "grad_norm": 0.18714334070682526,
      "learning_rate": 1e-06,
      "loss": -0.0143,
      "num_tokens": 267324329.0,
      "reward": 0.6071428656578064,
      "reward_std": 0.16450455784797668,
      "rewards/verify_math_reward/mean": 0.6071428656578064,
      "rewards/verify_math_reward/std": 0.48865827918052673,
      "step": 461
    },
    {
      "clip_ratio/high_max": 0.002032181990216486,
      "clip_ratio/high_mean": 0.0007165919905673945,
      "clip_ratio/low_mean": 0.0004605987501236086,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001177190752059687,
      "completions/clipped_ratio": 0.0379464285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2023.0,
      "completions/mean_length": 649.9788208007812,
      "completions/mean_terminated_length": 514.0568237304688,
      "completions/min_length": 117.0,
      "completions/min_terminated_length": 117.0,
      "epoch": 4.317201166180758,
      "grad_norm": 0.19159138202667236,
      "learning_rate": 1e-06,
      "loss": -0.0209,
      "num_tokens": 267862102.0,
      "reward": 0.6551339626312256,
      "reward_std": 0.14188572764396667,
      "rewards/verify_math_reward/mean": 0.6551339030265808,
      "rewards/verify_math_reward/std": 0.4755900800228119,
      "step": 462
    },
    {
      "clip_ratio/high_max": 0.0017651188863965217,
      "clip_ratio/high_mean": 0.0007189238440332701,
      "clip_ratio/low_mean": 0.0004800896194865345,
      "clip_ratio/low_min": 1.402603265887592e-05,
      "clip_ratio/region_mean": 0.0011990134680672782,
      "completions/clipped_ratio": 0.0792410714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3392.0,
      "completions/mean_length": 825.5535888671875,
      "completions/mean_terminated_length": 544.0969848632812,
      "completions/min_length": 117.0,
      "completions/min_terminated_length": 117.0,
      "epoch": 4.326530612244898,
      "grad_norm": 0.1885886788368225,
      "learning_rate": 1e-06,
      "loss": -0.0408,
      "num_tokens": 268401510.0,
      "reward": 0.5915178656578064,
      "reward_std": 0.154136061668396,
      "rewards/verify_math_reward/mean": 0.5915178656578064,
      "rewards/verify_math_reward/std": 0.49182769656181335,
      "step": 463
    },
    {
      "clip_ratio/high_max": 0.0016796603413240518,
      "clip_ratio/high_mean": 0.000691595454554772,
      "clip_ratio/low_mean": 0.0004600154034051229,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011516108570504002,
      "completions/clipped_ratio": 0.0323660714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3545.0,
      "completions/mean_length": 698.9308471679688,
      "completions/mean_terminated_length": 585.3033447265625,
      "completions/min_length": 147.0,
      "completions/min_terminated_length": 147.0,
      "epoch": 4.335860058309038,
      "grad_norm": 0.19632436335086823,
      "learning_rate": 1e-06,
      "loss": 0.008,
      "num_tokens": 268995784.0,
      "reward": 0.6651785969734192,
      "reward_std": 0.14196310937404633,
      "rewards/verify_math_reward/mean": 0.6651785969734192,
      "rewards/verify_math_reward/std": 0.47219160199165344,
      "step": 464
    },
    {
      "clip_ratio/high_max": 0.0016694177611498162,
      "clip_ratio/high_mean": 0.0006217189511517063,
      "clip_ratio/low_mean": 0.0004182116781521472,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010399306192994118,
      "completions/clipped_ratio": 0.0401785714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3824.0,
      "completions/mean_length": 669.4163208007812,
      "completions/mean_terminated_length": 525.9779052734375,
      "completions/min_length": 126.0,
      "completions/min_terminated_length": 126.0,
      "epoch": 4.345189504373177,
      "grad_norm": 0.19902345538139343,
      "learning_rate": 1e-06,
      "loss": -0.005,
      "num_tokens": 269535069.0,
      "reward": 0.684151828289032,
      "reward_std": 0.12572552263736725,
      "rewards/verify_math_reward/mean": 0.6841517686843872,
      "rewards/verify_math_reward/std": 0.4651124179363251,
      "step": 465
    },
    {
      "clip_ratio/high_max": 0.0015598318714182824,
      "clip_ratio/high_mean": 0.0006147263784441748,
      "clip_ratio/low_mean": 0.00025568385171936825,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008704102292540483,
      "completions/clipped_ratio": 0.0379464285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3598.0,
      "completions/mean_length": 674.9285888671875,
      "completions/mean_terminated_length": 539.99072265625,
      "completions/min_length": 151.0,
      "completions/min_terminated_length": 151.0,
      "epoch": 4.354518950437318,
      "grad_norm": 0.15384966135025024,
      "learning_rate": 1e-06,
      "loss": -0.0253,
      "num_tokens": 270088189.0,
      "reward": 0.7053571939468384,
      "reward_std": 0.133991077542305,
      "rewards/verify_math_reward/mean": 0.7053571343421936,
      "rewards/verify_math_reward/std": 0.45613667368888855,
      "step": 466
    },
    {
      "clip_ratio/high_max": 0.0015461368420801591,
      "clip_ratio/high_mean": 0.0005656045050272951,
      "clip_ratio/low_mean": 0.0004643862666853238,
      "clip_ratio/low_min": 1.5078408068802673e-05,
      "clip_ratio/region_mean": 0.0010299907698936295,
      "completions/clipped_ratio": 0.044642857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3968.0,
      "completions/mean_length": 684.8538208007812,
      "completions/mean_terminated_length": 525.4544067382812,
      "completions/min_length": 151.0,
      "completions/min_terminated_length": 151.0,
      "epoch": 4.363848396501457,
      "grad_norm": 0.2761067748069763,
      "learning_rate": 1e-06,
      "loss": -0.0166,
      "num_tokens": 270625442.0,
      "reward": 0.640625,
      "reward_std": 0.1557832509279251,
      "rewards/verify_math_reward/mean": 0.640625,
      "rewards/verify_math_reward/std": 0.48008525371551514,
      "step": 467
    },
    {
      "clip_ratio/high_max": 0.002094090494210832,
      "clip_ratio/high_mean": 0.000778362604251015,
      "clip_ratio/low_mean": 0.000514425164055865,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012927877723996062,
      "completions/clipped_ratio": 0.0535714285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3219.0,
      "completions/mean_length": 730.0469360351562,
      "completions/mean_terminated_length": 539.521240234375,
      "completions/min_length": 145.0,
      "completions/min_terminated_length": 145.0,
      "epoch": 4.373177842565598,
      "grad_norm": 0.5231519937515259,
      "learning_rate": 1e-06,
      "loss": -0.0252,
      "num_tokens": 271168580.0,
      "reward": 0.668526828289032,
      "reward_std": 0.1542862057685852,
      "rewards/verify_math_reward/mean": 0.6685267686843872,
      "rewards/verify_math_reward/std": 0.4710056483745575,
      "step": 468
    },
    {
      "clip_ratio/high_max": 0.0017877782665891573,
      "clip_ratio/high_mean": 0.0006603659430766129,
      "clip_ratio/low_mean": 0.00032976200827761204,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009901279627229087,
      "completions/clipped_ratio": 0.0758928571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4091.0,
      "completions/mean_length": 828.5167846679688,
      "completions/mean_terminated_length": 560.1726684570312,
      "completions/min_length": 124.0,
      "completions/min_terminated_length": 124.0,
      "epoch": 4.382507288629737,
      "grad_norm": 0.22118157148361206,
      "learning_rate": 1e-06,
      "loss": -0.0206,
      "num_tokens": 271722323.0,
      "reward": 0.6149553656578064,
      "reward_std": 0.14507634937763214,
      "rewards/verify_math_reward/mean": 0.6149553656578064,
      "rewards/verify_math_reward/std": 0.4868776500225067,
      "step": 469
    },
    {
      "clip_ratio/high_max": 0.0016418554696429055,
      "clip_ratio/high_mean": 0.0005991567741148174,
      "clip_ratio/low_mean": 0.000684322667439119,
      "clip_ratio/low_min": 1.2042389244015794e-05,
      "clip_ratio/region_mean": 0.0012834794360969681,
      "completions/clipped_ratio": 0.0424107142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3638.0,
      "completions/mean_length": 735.4408569335938,
      "completions/mean_terminated_length": 586.6049194335938,
      "completions/min_length": 148.0,
      "completions/min_terminated_length": 148.0,
      "epoch": 4.391836734693878,
      "grad_norm": 0.17829455435276031,
      "learning_rate": 1e-06,
      "loss": -0.0188,
      "num_tokens": 272321646.0,
      "reward": 0.5848214626312256,
      "reward_std": 0.16717232763767242,
      "rewards/verify_math_reward/mean": 0.5848214030265808,
      "rewards/verify_math_reward/std": 0.49302801489830017,
      "step": 470
    },
    {
      "clip_ratio/high_max": 0.0017424862526240759,
      "clip_ratio/high_mean": 0.0005964314627817657,
      "clip_ratio/low_mean": 0.00040148980042431504,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.00099792124274245,
      "completions/clipped_ratio": 0.0658482142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3968.0,
      "completions/mean_length": 801.927490234375,
      "completions/mean_terminated_length": 569.728759765625,
      "completions/min_length": 150.0,
      "completions/min_terminated_length": 150.0,
      "epoch": 4.401166180758017,
      "grad_norm": 0.17486721277236938,
      "learning_rate": 1e-06,
      "loss": -0.0344,
      "num_tokens": 272896141.0,
      "reward": 0.621651828289032,
      "reward_std": 0.14643321931362152,
      "rewards/verify_math_reward/mean": 0.6216517686843872,
      "rewards/verify_math_reward/std": 0.4852459728717804,
      "step": 471
    },
    {
      "clip_ratio/high_max": 0.0018719569452514406,
      "clip_ratio/high_mean": 0.0006134354989626445,
      "clip_ratio/low_mean": 0.0005102566428831778,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011236921382078435,
      "completions/clipped_ratio": 0.056919642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3691.0,
      "completions/mean_length": 727.091552734375,
      "completions/mean_terminated_length": 523.7609252929688,
      "completions/min_length": 162.0,
      "completions/min_terminated_length": 162.0,
      "epoch": 4.410495626822158,
      "grad_norm": 0.1855579912662506,
      "learning_rate": 1e-06,
      "loss": -0.0144,
      "num_tokens": 273430167.0,
      "reward": 0.6383928656578064,
      "reward_std": 0.15180349349975586,
      "rewards/verify_math_reward/mean": 0.6383928656578064,
      "rewards/verify_math_reward/std": 0.4807341992855072,
      "step": 472
    },
    {
      "clip_ratio/high_max": 0.0017102072451962158,
      "clip_ratio/high_mean": 0.0006409972775145434,
      "clip_ratio/low_mean": 0.0006280849593167659,
      "clip_ratio/low_min": 2.213956759078428e-05,
      "clip_ratio/region_mean": 0.0012690822113654576,
      "completions/clipped_ratio": 0.060267857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4070.0,
      "completions/mean_length": 746.2410888671875,
      "completions/mean_terminated_length": 531.4109497070312,
      "completions/min_length": 154.0,
      "completions/min_terminated_length": 154.0,
      "epoch": 4.419825072886297,
      "grad_norm": 0.20268253982067108,
      "learning_rate": 1e-06,
      "loss": 0.0039,
      "num_tokens": 273969439.0,
      "reward": 0.6160714626312256,
      "reward_std": 0.16953809559345245,
      "rewards/verify_math_reward/mean": 0.6160714030265808,
      "rewards/verify_math_reward/std": 0.486612468957901,
      "step": 473
    },
    {
      "clip_ratio/high_max": 0.0014539443545800168,
      "clip_ratio/high_mean": 0.0006013629317749292,
      "clip_ratio/low_mean": 0.00045351814969762927,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001054881104209926,
      "completions/clipped_ratio": 0.036830357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3914.0,
      "completions/mean_length": 706.0870971679688,
      "completions/mean_terminated_length": 576.461181640625,
      "completions/min_length": 136.0,
      "completions/min_terminated_length": 136.0,
      "epoch": 4.429154518950437,
      "grad_norm": 0.1866583377122879,
      "learning_rate": 1e-06,
      "loss": -0.0182,
      "num_tokens": 274557893.0,
      "reward": 0.6238839626312256,
      "reward_std": 0.14759299159049988,
      "rewards/verify_math_reward/mean": 0.6238839030265808,
      "rewards/verify_math_reward/std": 0.48468026518821716,
      "step": 474
    },
    {
      "clip_ratio/high_max": 0.0011870732232637238,
      "clip_ratio/high_mean": 0.0004201244241812674,
      "clip_ratio/low_mean": 0.0006123480252426816,
      "clip_ratio/low_min": 1.4789398846914992e-05,
      "clip_ratio/region_mean": 0.0010324724316888023,
      "completions/clipped_ratio": 0.0736607142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3373.0,
      "completions/mean_length": 837.8471069335938,
      "completions/mean_terminated_length": 578.7650756835938,
      "completions/min_length": 120.0,
      "completions/min_terminated_length": 120.0,
      "epoch": 4.438483965014577,
      "grad_norm": 0.16509538888931274,
      "learning_rate": 1e-06,
      "loss": -0.0111,
      "num_tokens": 275129516.0,
      "reward": 0.5636160969734192,
      "reward_std": 0.1417027711868286,
      "rewards/verify_math_reward/mean": 0.5636160969734192,
      "rewards/verify_math_reward/std": 0.49621346592903137,
      "step": 475
    },
    {
      "clip_ratio/high_max": 0.0017929547393578105,
      "clip_ratio/high_mean": 0.0006511890314868651,
      "clip_ratio/low_mean": 0.00043041574213020795,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010816047542903107,
      "completions/clipped_ratio": 0.0479910714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1803.0,
      "completions/mean_length": 692.7120971679688,
      "completions/mean_terminated_length": 521.1512451171875,
      "completions/min_length": 147.0,
      "completions/min_terminated_length": 147.0,
      "epoch": 4.447813411078717,
      "grad_norm": 0.23841670155525208,
      "learning_rate": 1e-06,
      "loss": -0.0147,
      "num_tokens": 275663066.0,
      "reward": 0.6629464626312256,
      "reward_std": 0.13339374959468842,
      "rewards/verify_math_reward/mean": 0.6629464030265808,
      "rewards/verify_math_reward/std": 0.47296738624572754,
      "step": 476
    },
    {
      "clip_ratio/high_max": 0.0022910384795977734,
      "clip_ratio/high_mean": 0.0008949081111495616,
      "clip_ratio/low_mean": 0.0005468743415804056,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0014417824349948205,
      "completions/clipped_ratio": 0.046875,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3545.0,
      "completions/mean_length": 700.8560791015625,
      "completions/mean_terminated_length": 533.8817138671875,
      "completions/min_length": 158.0,
      "completions/min_terminated_length": 158.0,
      "epoch": 4.457142857142857,
      "grad_norm": 0.21968919038772583,
      "learning_rate": 1e-06,
      "loss": -0.0183,
      "num_tokens": 276204841.0,
      "reward": 0.6674107313156128,
      "reward_std": 0.17536300420761108,
      "rewards/verify_math_reward/mean": 0.6674107313156128,
      "rewards/verify_math_reward/std": 0.47140392661094666,
      "step": 477
    },
    {
      "clip_ratio/high_max": 0.001537029049359262,
      "clip_ratio/high_mean": 0.0005785736011603149,
      "clip_ratio/low_mean": 0.0005203723349040956,
      "clip_ratio/low_min": 2.3229882572195493e-05,
      "clip_ratio/region_mean": 0.0010989459515258204,
      "completions/clipped_ratio": 0.0424107142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3737.0,
      "completions/mean_length": 754.0535888671875,
      "completions/mean_terminated_length": 606.0419921875,
      "completions/min_length": 125.0,
      "completions/min_terminated_length": 125.0,
      "epoch": 4.466472303206997,
      "grad_norm": 0.19907844066619873,
      "learning_rate": 1e-06,
      "loss": -0.0284,
      "num_tokens": 276812321.0,
      "reward": 0.637276828289032,
      "reward_std": 0.16559115052223206,
      "rewards/verify_math_reward/mean": 0.6372767686843872,
      "rewards/verify_math_reward/std": 0.481054425239563,
      "step": 478
    },
    {
      "clip_ratio/high_max": 0.0021917228477832396,
      "clip_ratio/high_mean": 0.0007876925938035129,
      "clip_ratio/low_mean": 0.0006437472352445184,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0014314398031274322,
      "completions/clipped_ratio": 0.0658482142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3900.0,
      "completions/mean_length": 785.2188110351562,
      "completions/mean_terminated_length": 551.84228515625,
      "completions/min_length": 119.0,
      "completions/min_terminated_length": 119.0,
      "epoch": 4.475801749271137,
      "grad_norm": 0.26668596267700195,
      "learning_rate": 1e-06,
      "loss": -0.025,
      "num_tokens": 277364997.0,
      "reward": 0.6328125,
      "reward_std": 0.17584973573684692,
      "rewards/verify_math_reward/mean": 0.6328125,
      "rewards/verify_math_reward/std": 0.48230743408203125,
      "step": 479
    },
    {
      "clip_ratio/high_max": 0.0014978326744312653,
      "clip_ratio/high_mean": 0.000552147514099488,
      "clip_ratio/low_mean": 0.000527881029938726,
      "clip_ratio/low_min": 1.577884358994197e-05,
      "clip_ratio/region_mean": 0.0010800285435834667,
      "completions/clipped_ratio": 0.0758928571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3816.0,
      "completions/mean_length": 848.4721069335938,
      "completions/mean_terminated_length": 581.7669067382812,
      "completions/min_length": 135.0,
      "completions/min_terminated_length": 135.0,
      "epoch": 4.485131195335277,
      "grad_norm": 0.1616966277360916,
      "learning_rate": 1e-06,
      "loss": -0.0252,
      "num_tokens": 277940524.0,
      "reward": 0.6015625,
      "reward_std": 0.14943841099739075,
      "rewards/verify_math_reward/mean": 0.6015625,
      "rewards/verify_math_reward/std": 0.48984986543655396,
      "step": 480
    },
    {
      "clip_ratio/high_max": 0.0022573780624952633,
      "clip_ratio/high_mean": 0.0007787540616845945,
      "clip_ratio/low_mean": 0.0006733808249919093,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0014521348712150939,
      "completions/clipped_ratio": 0.0691964285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3029.0,
      "completions/mean_length": 739.8917846679688,
      "completions/mean_terminated_length": 490.3968811035156,
      "completions/min_length": 159.0,
      "completions/min_terminated_length": 159.0,
      "epoch": 4.494460641399417,
      "grad_norm": 0.20210717618465424,
      "learning_rate": 1e-06,
      "loss": -0.0121,
      "num_tokens": 278436291.0,
      "reward": 0.6238839626312256,
      "reward_std": 0.14766854047775269,
      "rewards/verify_math_reward/mean": 0.6238839030265808,
      "rewards/verify_math_reward/std": 0.4846802353858948,
      "step": 481
    },
    {
      "clip_ratio/high_max": 0.0012219393265695544,
      "clip_ratio/high_mean": 0.00044862418280899874,
      "clip_ratio/low_mean": 0.0005656630401063012,
      "clip_ratio/low_min": 1.3922922335041221e-05,
      "clip_ratio/region_mean": 0.001014287227008026,
      "completions/clipped_ratio": 0.0613839285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4079.0,
      "completions/mean_length": 772.2678833007812,
      "completions/mean_terminated_length": 554.9013061523438,
      "completions/min_length": 149.0,
      "completions/min_terminated_length": 149.0,
      "epoch": 4.503790087463557,
      "grad_norm": 0.17671526968479156,
      "learning_rate": 1e-06,
      "loss": -0.0107,
      "num_tokens": 278995203.0,
      "reward": 0.6272321939468384,
      "reward_std": 0.12685278058052063,
      "rewards/verify_math_reward/mean": 0.6272321343421936,
      "rewards/verify_math_reward/std": 0.4838111698627472,
      "step": 482
    },
    {
      "clip_ratio/high_max": 0.0017179354799736757,
      "clip_ratio/high_mean": 0.0005674014209944289,
      "clip_ratio/low_mean": 0.00040528669796913164,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009726881180540659,
      "completions/clipped_ratio": 0.0647321428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3602.0,
      "completions/mean_length": 722.7545166015625,
      "completions/mean_terminated_length": 489.2840270996094,
      "completions/min_length": 145.0,
      "completions/min_terminated_length": 145.0,
      "epoch": 4.513119533527696,
      "grad_norm": 0.16090349853038788,
      "learning_rate": 1e-06,
      "loss": -0.012,
      "num_tokens": 279485391.0,
      "reward": 0.6674107313156128,
      "reward_std": 0.1107741966843605,
      "rewards/verify_math_reward/mean": 0.6674107313156128,
      "rewards/verify_math_reward/std": 0.47140392661094666,
      "step": 483
    },
    {
      "clip_ratio/high_max": 0.0015326368047681171,
      "clip_ratio/high_mean": 0.0005107696488266811,
      "clip_ratio/low_mean": 0.0004591243396134814,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009698939938971307,
      "completions/clipped_ratio": 0.0613839285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4038.0,
      "completions/mean_length": 817.7767944335938,
      "completions/mean_terminated_length": 603.3864135742188,
      "completions/min_length": 171.0,
      "completions/min_terminated_length": 171.0,
      "epoch": 4.522448979591837,
      "grad_norm": 0.16423842310905457,
      "learning_rate": 1e-06,
      "loss": -0.0391,
      "num_tokens": 280075135.0,
      "reward": 0.6183035969734192,
      "reward_std": 0.13433623313903809,
      "rewards/verify_math_reward/mean": 0.6183035969734192,
      "rewards/verify_math_reward/std": 0.4860740303993225,
      "step": 484
    },
    {
      "clip_ratio/high_max": 0.0011952998420383665,
      "clip_ratio/high_mean": 0.00033654845924502297,
      "clip_ratio/low_mean": 0.0004079132208971714,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007444616749125998,
      "completions/clipped_ratio": 0.0591517857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1790.0,
      "completions/mean_length": 761.6004638671875,
      "completions/mean_terminated_length": 551.9644165039062,
      "completions/min_length": 167.0,
      "completions/min_terminated_length": 167.0,
      "epoch": 4.531778425655976,
      "grad_norm": 0.14998149871826172,
      "learning_rate": 1e-06,
      "loss": -0.0122,
      "num_tokens": 280633321.0,
      "reward": 0.5736607313156128,
      "reward_std": 0.1043110266327858,
      "rewards/verify_math_reward/mean": 0.5736607313156128,
      "rewards/verify_math_reward/std": 0.4948205351829529,
      "step": 485
    },
    {
      "clip_ratio/high_max": 0.0013147492645657621,
      "clip_ratio/high_mean": 0.0004685137801061501,
      "clip_ratio/low_mean": 0.00047560554958181456,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009441193396924064,
      "completions/clipped_ratio": 0.0379464285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2635.0,
      "completions/mean_length": 647.2433471679688,
      "completions/mean_terminated_length": 511.21343994140625,
      "completions/min_length": 177.0,
      "completions/min_terminated_length": 177.0,
      "epoch": 4.541107871720117,
      "grad_norm": 0.17931948602199554,
      "learning_rate": 1e-06,
      "loss": -0.0169,
      "num_tokens": 281152339.0,
      "reward": 0.6428571939468384,
      "reward_std": 0.1504134237766266,
      "rewards/verify_math_reward/mean": 0.6428571343421936,
      "rewards/verify_math_reward/std": 0.47942501306533813,
      "step": 486
    },
    {
      "clip_ratio/high_max": 0.0018322935429750942,
      "clip_ratio/high_mean": 0.0007823343203199329,
      "clip_ratio/low_mean": 0.0005496792377925885,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0013320135403773747,
      "completions/clipped_ratio": 0.056919642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3771.0,
      "completions/mean_length": 763.3895263671875,
      "completions/mean_terminated_length": 562.2496948242188,
      "completions/min_length": 161.0,
      "completions/min_terminated_length": 161.0,
      "epoch": 4.550437317784256,
      "grad_norm": 0.2100006639957428,
      "learning_rate": 1e-06,
      "loss": -0.0256,
      "num_tokens": 281713448.0,
      "reward": 0.6495535969734192,
      "reward_std": 0.1673588901758194,
      "rewards/verify_math_reward/mean": 0.6495535969734192,
      "rewards/verify_math_reward/std": 0.477376252412796,
      "step": 487
    },
    {
      "clip_ratio/high_max": 0.0017669797198323067,
      "clip_ratio/high_mean": 0.0006832164363004267,
      "clip_ratio/low_mean": 0.0004076522900504642,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010908686999755446,
      "completions/clipped_ratio": 0.0457589285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4013.0,
      "completions/mean_length": 693.9553833007812,
      "completions/mean_terminated_length": 530.8163452148438,
      "completions/min_length": 124.0,
      "completions/min_terminated_length": 124.0,
      "epoch": 4.559766763848397,
      "grad_norm": 0.21150177717208862,
      "learning_rate": 1e-06,
      "loss": -0.0023,
      "num_tokens": 282253192.0,
      "reward": 0.7265625596046448,
      "reward_std": 0.16081610321998596,
      "rewards/verify_math_reward/mean": 0.7265625,
      "rewards/verify_math_reward/std": 0.4459724426269531,
      "step": 488
    },
    {
      "clip_ratio/high_max": 0.001862763430835912,
      "clip_ratio/high_mean": 0.0006479796838902985,
      "clip_ratio/low_mean": 0.0004711603437499434,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001119140040827915,
      "completions/clipped_ratio": 0.0636160714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3180.0,
      "completions/mean_length": 785.8516235351562,
      "completions/mean_terminated_length": 560.9666137695312,
      "completions/min_length": 83.0,
      "completions/min_terminated_length": 83.0,
      "epoch": 4.569096209912536,
      "grad_norm": 0.23946093022823334,
      "learning_rate": 1e-06,
      "loss": -0.0165,
      "num_tokens": 282817283.0,
      "reward": 0.6428571939468384,
      "reward_std": 0.15195181965827942,
      "rewards/verify_math_reward/mean": 0.6428571343421936,
      "rewards/verify_math_reward/std": 0.4794250428676605,
      "step": 489
    },
    {
      "clip_ratio/high_max": 0.0019186276003893,
      "clip_ratio/high_mean": 0.0006966680484765675,
      "clip_ratio/low_mean": 0.0003403620912649785,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010370301097282209,
      "completions/clipped_ratio": 0.0703125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2050.0,
      "completions/mean_length": 811.4029541015625,
      "completions/mean_terminated_length": 562.9879760742188,
      "completions/min_length": 122.0,
      "completions/min_terminated_length": 122.0,
      "epoch": 4.578425655976677,
      "grad_norm": 0.16684193909168243,
      "learning_rate": 1e-06,
      "loss": -0.0567,
      "num_tokens": 283383804.0,
      "reward": 0.598214328289032,
      "reward_std": 0.13831600546836853,
      "rewards/verify_math_reward/mean": 0.5982142686843872,
      "rewards/verify_math_reward/std": 0.49053287506103516,
      "step": 490
    },
    {
      "clip_ratio/high_max": 0.0020056581270182505,
      "clip_ratio/high_mean": 0.0006818503643444274,
      "clip_ratio/low_mean": 0.000503500452850858,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011853508403874002,
      "completions/clipped_ratio": 0.044642857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4035.0,
      "completions/mean_length": 697.3326416015625,
      "completions/mean_terminated_length": 538.516357421875,
      "completions/min_length": 155.0,
      "completions/min_terminated_length": 155.0,
      "epoch": 4.587755102040816,
      "grad_norm": 0.2001650631427765,
      "learning_rate": 1e-06,
      "loss": -0.0028,
      "num_tokens": 283930614.0,
      "reward": 0.6830357313156128,
      "reward_std": 0.1457175612449646,
      "rewards/verify_math_reward/mean": 0.6830357313156128,
      "rewards/verify_math_reward/std": 0.46555325388908386,
      "step": 491
    },
    {
      "clip_ratio/high_max": 0.0019435344038356561,
      "clip_ratio/high_mean": 0.0007646160665899515,
      "clip_ratio/low_mean": 0.0006633842331211781,
      "clip_ratio/low_min": 1.9512955987011082e-05,
      "clip_ratio/region_mean": 0.0014280002433224581,
      "completions/clipped_ratio": 0.060267857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3367.0,
      "completions/mean_length": 769.6439819335938,
      "completions/mean_terminated_length": 556.3147583007812,
      "completions/min_length": 125.0,
      "completions/min_terminated_length": 125.0,
      "epoch": 4.597084548104956,
      "grad_norm": 0.18549324572086334,
      "learning_rate": 1e-06,
      "loss": -0.0279,
      "num_tokens": 284487335.0,
      "reward": 0.637276828289032,
      "reward_std": 0.18013371527194977,
      "rewards/verify_math_reward/mean": 0.6372767686843872,
      "rewards/verify_math_reward/std": 0.481054425239563,
      "step": 492
    },
    {
      "clip_ratio/high_max": 0.0016129888463183306,
      "clip_ratio/high_mean": 0.0006472732002293924,
      "clip_ratio/low_mean": 0.00040173178149416344,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010490049608051777,
      "completions/clipped_ratio": 0.0613839285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3518.0,
      "completions/mean_length": 748.7064819335938,
      "completions/mean_terminated_length": 529.7990112304688,
      "completions/min_length": 153.0,
      "completions/min_terminated_length": 153.0,
      "epoch": 4.606413994169096,
      "grad_norm": 0.18927405774593353,
      "learning_rate": 1e-06,
      "loss": -0.0339,
      "num_tokens": 285018688.0,
      "reward": 0.5926339626312256,
      "reward_std": 0.13211314380168915,
      "rewards/verify_math_reward/mean": 0.5926339030265808,
      "rewards/verify_math_reward/std": 0.49161848425865173,
      "step": 493
    },
    {
      "clip_ratio/high_max": 0.002222860977781238,
      "clip_ratio/high_mean": 0.0008036092076508794,
      "clip_ratio/low_mean": 0.0005098076007925556,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0013134168002579827,
      "completions/clipped_ratio": 0.0401785714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3682.0,
      "completions/mean_length": 709.0982666015625,
      "completions/mean_terminated_length": 567.3209228515625,
      "completions/min_length": 149.0,
      "completions/min_terminated_length": 149.0,
      "epoch": 4.615743440233236,
      "grad_norm": 0.2239789068698883,
      "learning_rate": 1e-06,
      "loss": -0.0127,
      "num_tokens": 285585968.0,
      "reward": 0.6718750596046448,
      "reward_std": 0.16953669488430023,
      "rewards/verify_math_reward/mean": 0.671875,
      "rewards/verify_math_reward/std": 0.46979284286499023,
      "step": 494
    },
    {
      "clip_ratio/high_max": 0.001853333531471435,
      "clip_ratio/high_mean": 0.000694443686370505,
      "clip_ratio/low_mean": 0.0005493978615049855,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012438415360520594,
      "completions/clipped_ratio": 0.0546875,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2817.0,
      "completions/mean_length": 715.4219360351562,
      "completions/mean_terminated_length": 519.8511962890625,
      "completions/min_length": 128.0,
      "completions/min_terminated_length": 128.0,
      "epoch": 4.625072886297376,
      "grad_norm": 0.26524800062179565,
      "learning_rate": 1e-06,
      "loss": -0.0184,
      "num_tokens": 286111914.0,
      "reward": 0.6830357313156128,
      "reward_std": 0.17765070497989655,
      "rewards/verify_math_reward/mean": 0.6830357313156128,
      "rewards/verify_math_reward/std": 0.46555325388908386,
      "step": 495
    },
    {
      "clip_ratio/high_max": 0.001967742820852436,
      "clip_ratio/high_mean": 0.000702927791280672,
      "clip_ratio/low_mean": 0.000588283081924601,
      "clip_ratio/low_min": 3.1709791073808447e-05,
      "clip_ratio/region_mean": 0.0012912108650198206,
      "completions/clipped_ratio": 0.060267857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1882.0,
      "completions/mean_length": 764.1172485351562,
      "completions/mean_terminated_length": 550.4335327148438,
      "completions/min_length": 153.0,
      "completions/min_terminated_length": 153.0,
      "epoch": 4.634402332361516,
      "grad_norm": 0.22960741817951202,
      "learning_rate": 1e-06,
      "loss": -0.0181,
      "num_tokens": 286667139.0,
      "reward": 0.5948660969734192,
      "reward_std": 0.17818482220172882,
      "rewards/verify_math_reward/mean": 0.5948660969734192,
      "rewards/verify_math_reward/std": 0.49119213223457336,
      "step": 496
    },
    {
      "clip_ratio/high_max": 0.00152768318366725,
      "clip_ratio/high_mean": 0.000530095907379291,
      "clip_ratio/low_mean": 0.0005729503295697214,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011030462264898233,
      "completions/clipped_ratio": 0.0234375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2759.0,
      "completions/mean_length": 602.1283569335938,
      "completions/mean_terminated_length": 518.2754516601562,
      "completions/min_length": 125.0,
      "completions/min_terminated_length": 125.0,
      "epoch": 4.643731778425656,
      "grad_norm": 5.484728813171387,
      "learning_rate": 1e-06,
      "loss": -0.0071,
      "num_tokens": 287203462.0,
      "reward": 0.6651785969734192,
      "reward_std": 0.1493610441684723,
      "rewards/verify_math_reward/mean": 0.6651785969734192,
      "rewards/verify_math_reward/std": 0.47219157218933105,
      "step": 497
    },
    {
      "clip_ratio/high_max": 0.0015428415281348862,
      "clip_ratio/high_mean": 0.0006701672446070006,
      "clip_ratio/low_mean": 0.0006359260405588429,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001306093286984833,
      "completions/clipped_ratio": 0.0647321428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3899.0,
      "completions/mean_length": 799.4766235351562,
      "completions/mean_terminated_length": 571.3162231445312,
      "completions/min_length": 111.0,
      "completions/min_terminated_length": 111.0,
      "epoch": 4.653061224489796,
      "grad_norm": 0.21233105659484863,
      "learning_rate": 1e-06,
      "loss": -0.0254,
      "num_tokens": 287771761.0,
      "reward": 0.6417410969734192,
      "reward_std": 0.17066533863544464,
      "rewards/verify_math_reward/mean": 0.6417410969734192,
      "rewards/verify_math_reward/std": 0.47975653409957886,
      "step": 498
    },
    {
      "clip_ratio/high_max": 0.0018094013903464656,
      "clip_ratio/high_mean": 0.000651849810310523,
      "clip_ratio/low_mean": 0.0004923005108139478,
      "clip_ratio/low_min": 1.493072159064468e-05,
      "clip_ratio/region_mean": 0.001144150308391545,
      "completions/clipped_ratio": 0.060267857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3224.0,
      "completions/mean_length": 759.8828735351562,
      "completions/mean_terminated_length": 545.9275512695312,
      "completions/min_length": 150.0,
      "completions/min_terminated_length": 150.0,
      "epoch": 4.662390670553936,
      "grad_norm": 0.361331045627594,
      "learning_rate": 1e-06,
      "loss": -0.0146,
      "num_tokens": 288322464.0,
      "reward": 0.6774553656578064,
      "reward_std": 0.17498140037059784,
      "rewards/verify_math_reward/mean": 0.6774553656578064,
      "rewards/verify_math_reward/std": 0.4677111804485321,
      "step": 499
    },
    {
      "clip_ratio/high_max": 0.0023586605821037665,
      "clip_ratio/high_mean": 0.0008542319155822042,
      "clip_ratio/low_mean": 0.00040829024374033906,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012625221606867854,
      "completions/clipped_ratio": 0.0546875,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3962.0,
      "completions/mean_length": 736.6105346679688,
      "completions/mean_terminated_length": 542.265625,
      "completions/min_length": 130.0,
      "completions/min_terminated_length": 130.0,
      "epoch": 4.671720116618076,
      "grad_norm": 0.197487935423851,
      "learning_rate": 1e-06,
      "loss": -0.0349,
      "num_tokens": 288872907.0,
      "reward": 0.6183035969734192,
      "reward_std": 0.14342734217643738,
      "rewards/verify_math_reward/mean": 0.6183035969734192,
      "rewards/verify_math_reward/std": 0.4860740303993225,
      "step": 500
    },
    {
      "clip_ratio/high_max": 0.002165036043152213,
      "clip_ratio/high_mean": 0.0008253226878878195,
      "clip_ratio/low_mean": 0.00044876248966829735,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012740851434500655,
      "completions/clipped_ratio": 0.0513392857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3568.0,
      "completions/mean_length": 743.1082763671875,
      "completions/mean_terminated_length": 561.6576538085938,
      "completions/min_length": 165.0,
      "completions/min_terminated_length": 165.0,
      "epoch": 4.681049562682215,
      "grad_norm": 0.19396717846393585,
      "learning_rate": 1e-06,
      "loss": -0.0223,
      "num_tokens": 289445516.0,
      "reward": 0.6082589626312256,
      "reward_std": 0.1599937528371811,
      "rewards/verify_math_reward/mean": 0.6082589030265808,
      "rewards/verify_math_reward/std": 0.48841196298599243,
      "step": 501
    },
    {
      "clip_ratio/high_max": 0.0016229326211032458,
      "clip_ratio/high_mean": 0.0006127836386440322,
      "clip_ratio/low_mean": 0.0005866753099326161,
      "clip_ratio/low_min": 5.1067519962089136e-05,
      "clip_ratio/region_mean": 0.0011994589513051324,
      "completions/clipped_ratio": 0.056919642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2858.0,
      "completions/mean_length": 760.575927734375,
      "completions/mean_terminated_length": 559.2662963867188,
      "completions/min_length": 147.0,
      "completions/min_terminated_length": 147.0,
      "epoch": 4.690379008746356,
      "grad_norm": 0.23123548924922943,
      "learning_rate": 1e-06,
      "loss": -0.0037,
      "num_tokens": 290016672.0,
      "reward": 0.5680803656578064,
      "reward_std": 0.1706332564353943,
      "rewards/verify_math_reward/mean": 0.5680803656578064,
      "rewards/verify_math_reward/std": 0.4956200420856476,
      "step": 502
    },
    {
      "clip_ratio/high_max": 0.001514357709311298,
      "clip_ratio/high_mean": 0.0006434163278754568,
      "clip_ratio/low_mean": 0.0005111019117975957,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011545181987457909,
      "completions/clipped_ratio": 0.0848214285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4013.0,
      "completions/mean_length": 859.4185791015625,
      "completions/mean_terminated_length": 559.4426879882812,
      "completions/min_length": 118.0,
      "completions/min_terminated_length": 118.0,
      "epoch": 4.699708454810495,
      "grad_norm": 0.21300344169139862,
      "learning_rate": 1e-06,
      "loss": -0.0349,
      "num_tokens": 290569695.0,
      "reward": 0.590401828289032,
      "reward_std": 0.16848641633987427,
      "rewards/verify_math_reward/mean": 0.5904017686843872,
      "rewards/verify_math_reward/std": 0.49203425645828247,
      "step": 503
    },
    {
      "clip_ratio/high_max": 0.0020721697710541775,
      "clip_ratio/high_mean": 0.000666506194647809,
      "clip_ratio/low_mean": 0.0004435453745372797,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011100515730504412,
      "completions/clipped_ratio": 0.0691964285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3831.0,
      "completions/mean_length": 820.2589721679688,
      "completions/mean_terminated_length": 576.7386474609375,
      "completions/min_length": 161.0,
      "completions/min_terminated_length": 161.0,
      "epoch": 4.709037900874636,
      "grad_norm": 0.21655043959617615,
      "learning_rate": 1e-06,
      "loss": -0.0233,
      "num_tokens": 291142287.0,
      "reward": 0.5915178656578064,
      "reward_std": 0.15326520800590515,
      "rewards/verify_math_reward/mean": 0.5915178656578064,
      "rewards/verify_math_reward/std": 0.49182769656181335,
      "step": 504
    },
    {
      "clip_ratio/high_max": 0.001905234094010666,
      "clip_ratio/high_mean": 0.0006369381944750785,
      "clip_ratio/low_mean": 0.0003795098900809535,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010164481063839048,
      "completions/clipped_ratio": 0.0758928571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2177.0,
      "completions/mean_length": 817.1864013671875,
      "completions/mean_terminated_length": 547.9118041992188,
      "completions/min_length": 151.0,
      "completions/min_terminated_length": 151.0,
      "epoch": 4.718367346938775,
      "grad_norm": 0.18194395303726196,
      "learning_rate": 1e-06,
      "loss": -0.0356,
      "num_tokens": 291690726.0,
      "reward": 0.5859375,
      "reward_std": 0.14248163998126984,
      "rewards/verify_math_reward/mean": 0.5859375,
      "rewards/verify_math_reward/std": 0.4928344786167145,
      "step": 505
    },
    {
      "clip_ratio/high_max": 0.001590982086781878,
      "clip_ratio/high_mean": 0.0005425151657618699,
      "clip_ratio/low_mean": 0.00047243583435374603,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001014950994431274,
      "completions/clipped_ratio": 0.0647321428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3204.0,
      "completions/mean_length": 796.4241333007812,
      "completions/mean_terminated_length": 568.0525512695312,
      "completions/min_length": 132.0,
      "completions/min_terminated_length": 132.0,
      "epoch": 4.727696793002916,
      "grad_norm": 0.1851402372121811,
      "learning_rate": 1e-06,
      "loss": -0.0247,
      "num_tokens": 292256266.0,
      "reward": 0.5680803656578064,
      "reward_std": 0.14628097414970398,
      "rewards/verify_math_reward/mean": 0.5680803656578064,
      "rewards/verify_math_reward/std": 0.4956200420856476,
      "step": 506
    },
    {
      "clip_ratio/high_max": 0.001641770315472968,
      "clip_ratio/high_mean": 0.0006798308804718545,
      "clip_ratio/low_mean": 0.0006032846877133125,
      "clip_ratio/low_min": 1.2972187505511101e-05,
      "clip_ratio/region_mean": 0.001283115561818704,
      "completions/clipped_ratio": 0.044642857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2595.0,
      "completions/mean_length": 667.0178833007812,
      "completions/mean_terminated_length": 506.7850341796875,
      "completions/min_length": 148.0,
      "completions/min_terminated_length": 148.0,
      "epoch": 4.737026239067055,
      "grad_norm": 0.2165297269821167,
      "learning_rate": 1e-06,
      "loss": 0.0025,
      "num_tokens": 292788186.0,
      "reward": 0.65625,
      "reward_std": 0.16393959522247314,
      "rewards/verify_math_reward/mean": 0.65625,
      "rewards/verify_math_reward/std": 0.4752241373062134,
      "step": 507
    },
    {
      "clip_ratio/high_max": 0.0019648484958452173,
      "clip_ratio/high_mean": 0.0007177483748819213,
      "clip_ratio/low_mean": 0.0004973882305421284,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012151366099715233,
      "completions/clipped_ratio": 0.0792410714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3965.0,
      "completions/mean_length": 858.8471069335938,
      "completions/mean_terminated_length": 580.2557373046875,
      "completions/min_length": 155.0,
      "completions/min_terminated_length": 155.0,
      "epoch": 4.746355685131196,
      "grad_norm": 0.20138254761695862,
      "learning_rate": 1e-06,
      "loss": -0.0384,
      "num_tokens": 293361609.0,
      "reward": 0.5647321939468384,
      "reward_std": 0.16607898473739624,
      "rewards/verify_math_reward/mean": 0.5647321343421936,
      "rewards/verify_math_reward/std": 0.49606895446777344,
      "step": 508
    },
    {
      "clip_ratio/high_max": 0.0017334266631223727,
      "clip_ratio/high_mean": 0.0006477061779150972,
      "clip_ratio/low_mean": 0.0006191994889377384,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012669056486629415,
      "completions/clipped_ratio": 0.0580357142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3945.0,
      "completions/mean_length": 744.8158569335938,
      "completions/mean_terminated_length": 538.3447875976562,
      "completions/min_length": 154.0,
      "completions/min_terminated_length": 154.0,
      "epoch": 4.755685131195335,
      "grad_norm": 0.23863951861858368,
      "learning_rate": 1e-06,
      "loss": -0.0339,
      "num_tokens": 293909596.0,
      "reward": 0.625,
      "reward_std": 0.17495255172252655,
      "rewards/verify_math_reward/mean": 0.625,
      "rewards/verify_math_reward/std": 0.48439329862594604,
      "step": 509
    },
    {
      "clip_ratio/high_max": 0.002056710440228926,
      "clip_ratio/high_mean": 0.0007373519492830383,
      "clip_ratio/low_mean": 0.0004097512669432035,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011471032375993673,
      "completions/clipped_ratio": 0.060267857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2530.0,
      "completions/mean_length": 779.4006958007812,
      "completions/mean_terminated_length": 566.6972045898438,
      "completions/min_length": 166.0,
      "completions/min_terminated_length": 166.0,
      "epoch": 4.765014577259475,
      "grad_norm": 0.20938974618911743,
      "learning_rate": 1e-06,
      "loss": -0.0249,
      "num_tokens": 294471355.0,
      "reward": 0.6986607313156128,
      "reward_std": 0.17659832537174225,
      "rewards/verify_math_reward/mean": 0.6986607313156128,
      "rewards/verify_math_reward/std": 0.4590960443019867,
      "step": 510
    },
    {
      "clip_ratio/high_max": 0.0019161519703629892,
      "clip_ratio/high_mean": 0.0007846968983358238,
      "clip_ratio/low_mean": 0.0004255761336935393,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012102730470360257,
      "completions/clipped_ratio": 0.0658482142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3427.0,
      "completions/mean_length": 827.9163208007812,
      "completions/mean_terminated_length": 597.549560546875,
      "completions/min_length": 132.0,
      "completions/min_terminated_length": 132.0,
      "epoch": 4.774344023323615,
      "grad_norm": 0.20174439251422882,
      "learning_rate": 1e-06,
      "loss": -0.0296,
      "num_tokens": 295074488.0,
      "reward": 0.609375,
      "reward_std": 0.18141572177410126,
      "rewards/verify_math_reward/mean": 0.609375,
      "rewards/verify_math_reward/std": 0.48816296458244324,
      "step": 511
    },
    {
      "clip_ratio/high_max": 0.001628550222449121,
      "clip_ratio/high_mean": 0.0005664115392391977,
      "clip_ratio/low_mean": 0.0005180246389500098,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010844361822819337,
      "completions/clipped_ratio": 0.044642857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3729.0,
      "completions/mean_length": 661.0703125,
      "completions/mean_terminated_length": 500.5595703125,
      "completions/min_length": 148.0,
      "completions/min_terminated_length": 148.0,
      "epoch": 4.783673469387755,
      "grad_norm": 0.188065305352211,
      "learning_rate": 1e-06,
      "loss": -0.0082,
      "num_tokens": 295588015.0,
      "reward": 0.6473214626312256,
      "reward_std": 0.12790516018867493,
      "rewards/verify_math_reward/mean": 0.6473214030265808,
      "rewards/verify_math_reward/std": 0.47807058691978455,
      "step": 512
    },
    {
      "clip_ratio/high_max": 0.0017988044128287584,
      "clip_ratio/high_mean": 0.0007507897207688075,
      "clip_ratio/low_mean": 0.0004354124384917668,
      "clip_ratio/low_min": 1.3640331417263951e-05,
      "clip_ratio/region_mean": 0.001186202160170069,
      "completions/clipped_ratio": 0.044642857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2530.0,
      "completions/mean_length": 697.2567138671875,
      "completions/mean_terminated_length": 538.4368896484375,
      "completions/min_length": 149.0,
      "completions/min_terminated_length": 149.0,
      "epoch": 4.793002915451895,
      "grad_norm": 1.1061233282089233,
      "learning_rate": 1e-06,
      "loss": -0.0197,
      "num_tokens": 296141229.0,
      "reward": 0.5970982313156128,
      "reward_std": 0.15980830788612366,
      "rewards/verify_math_reward/mean": 0.5970982313156128,
      "rewards/verify_math_reward/std": 0.49075525999069214,
      "step": 513
    },
    {
      "clip_ratio/high_max": 0.0017655004703556187,
      "clip_ratio/high_mean": 0.0006406353877537185,
      "clip_ratio/low_mean": 0.0004561484574878705,
      "clip_ratio/low_min": 2.1856967578059994e-05,
      "clip_ratio/region_mean": 0.0010967838352371473,
      "completions/clipped_ratio": 0.0613839285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4092.0,
      "completions/mean_length": 787.3995971679688,
      "completions/mean_terminated_length": 571.0225830078125,
      "completions/min_length": 159.0,
      "completions/min_terminated_length": 159.0,
      "epoch": 4.802332361516035,
      "grad_norm": 0.23085437715053558,
      "learning_rate": 1e-06,
      "loss": -0.0014,
      "num_tokens": 296702347.0,
      "reward": 0.6350446939468384,
      "reward_std": 0.18193678557872772,
      "rewards/verify_math_reward/mean": 0.6350446343421936,
      "rewards/verify_math_reward/std": 0.481686532497406,
      "step": 514
    },
    {
      "clip_ratio/high_max": 0.0018188555113738403,
      "clip_ratio/high_mean": 0.0006787879956391407,
      "clip_ratio/low_mean": 0.00048321402209694497,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011620020341069903,
      "completions/clipped_ratio": 0.0814732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3307.0,
      "completions/mean_length": 841.5502319335938,
      "completions/mean_terminated_length": 552.8809204101562,
      "completions/min_length": 158.0,
      "completions/min_terminated_length": 158.0,
      "epoch": 4.811661807580175,
      "grad_norm": 0.28857406973838806,
      "learning_rate": 1e-06,
      "loss": -0.0143,
      "num_tokens": 297254144.0,
      "reward": 0.606026828289032,
      "reward_std": 0.15826597809791565,
      "rewards/verify_math_reward/mean": 0.6060267686843872,
      "rewards/verify_math_reward/std": 0.48890194296836853,
      "step": 515
    },
    {
      "clip_ratio/high_max": 0.002027556700340938,
      "clip_ratio/high_mean": 0.0008020605837373296,
      "clip_ratio/low_mean": 0.0007501269938074984,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0015521875757258385,
      "completions/clipped_ratio": 0.0736607142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3265.0,
      "completions/mean_length": 826.0513916015625,
      "completions/mean_terminated_length": 566.0313110351562,
      "completions/min_length": 112.0,
      "completions/min_terminated_length": 112.0,
      "epoch": 4.820991253644315,
      "grad_norm": 0.21496886014938354,
      "learning_rate": 1e-06,
      "loss": -0.0344,
      "num_tokens": 297818046.0,
      "reward": 0.578125,
      "reward_std": 0.19531114399433136,
      "rewards/verify_math_reward/mean": 0.578125,
      "rewards/verify_math_reward/std": 0.4941346049308777,
      "step": 516
    },
    {
      "clip_ratio/high_max": 0.0018064766736642923,
      "clip_ratio/high_mean": 0.000677952555633965,
      "clip_ratio/low_mean": 0.0006463891731982585,
      "clip_ratio/low_min": 5.396787673817016e-05,
      "clip_ratio/region_mean": 0.001324341727013234,
      "completions/clipped_ratio": 0.0401785714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3644.0,
      "completions/mean_length": 719.2422485351562,
      "completions/mean_terminated_length": 577.8895263671875,
      "completions/min_length": 173.0,
      "completions/min_terminated_length": 173.0,
      "epoch": 4.830320699708455,
      "grad_norm": 0.21425247192382812,
      "learning_rate": 1e-06,
      "loss": -0.0029,
      "num_tokens": 298403903.0,
      "reward": 0.6116071939468384,
      "reward_std": 0.18024571239948273,
      "rewards/verify_math_reward/mean": 0.6116071343421936,
      "rewards/verify_math_reward/std": 0.4876568913459778,
      "step": 517
    },
    {
      "clip_ratio/high_max": 0.0017812663081713254,
      "clip_ratio/high_mean": 0.000571148573044411,
      "clip_ratio/low_mean": 0.00040367850806433125,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009748270931595471,
      "completions/clipped_ratio": 0.0825892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3386.0,
      "completions/mean_length": 837.9576416015625,
      "completions/mean_terminated_length": 544.6544799804688,
      "completions/min_length": 99.0,
      "completions/min_terminated_length": 99.0,
      "epoch": 4.839650145772595,
      "grad_norm": 0.31801754236221313,
      "learning_rate": 1e-06,
      "loss": -0.0146,
      "num_tokens": 298933921.0,
      "reward": 0.6316964626312256,
      "reward_std": 0.12629005312919617,
      "rewards/verify_math_reward/mean": 0.6316964030265808,
      "rewards/verify_math_reward/std": 0.4826137125492096,
      "step": 518
    },
    {
      "clip_ratio/high_max": 0.0019636725628515705,
      "clip_ratio/high_mean": 0.0006848009579698555,
      "clip_ratio/low_mean": 0.00046225982123360154,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011470608005765826,
      "completions/clipped_ratio": 0.078125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4004.0,
      "completions/mean_length": 841.7678833007812,
      "completions/mean_terminated_length": 565.9854736328125,
      "completions/min_length": 167.0,
      "completions/min_terminated_length": 167.0,
      "epoch": 4.848979591836734,
      "grad_norm": 0.19902126491069794,
      "learning_rate": 1e-06,
      "loss": -0.0442,
      "num_tokens": 299484529.0,
      "reward": 0.5948660969734192,
      "reward_std": 0.1479395478963852,
      "rewards/verify_math_reward/mean": 0.5948660969734192,
      "rewards/verify_math_reward/std": 0.49119213223457336,
      "step": 519
    },
    {
      "clip_ratio/high_max": 0.0015951618261169642,
      "clip_ratio/high_mean": 0.0005832678198203212,
      "clip_ratio/low_mean": 0.0004987134489056189,
      "clip_ratio/low_min": 2.4127414690156e-05,
      "clip_ratio/region_mean": 0.0010819812559930142,
      "completions/clipped_ratio": 0.0691964285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3452.0,
      "completions/mean_length": 831.3973388671875,
      "completions/mean_terminated_length": 588.705078125,
      "completions/min_length": 143.0,
      "completions/min_terminated_length": 143.0,
      "epoch": 4.858309037900875,
      "grad_norm": 0.17633138597011566,
      "learning_rate": 1e-06,
      "loss": -0.0219,
      "num_tokens": 300063341.0,
      "reward": 0.5569196939468384,
      "reward_std": 0.15060842037200928,
      "rewards/verify_math_reward/mean": 0.5569196343421936,
      "rewards/verify_math_reward/std": 0.4970270097255707,
      "step": 520
    },
    {
      "clip_ratio/high_max": 0.0016294401102641132,
      "clip_ratio/high_mean": 0.000635173610135098,
      "clip_ratio/low_mean": 0.0005369109121602378,
      "clip_ratio/low_min": 5.691649221262196e-06,
      "clip_ratio/region_mean": 0.001172084534118767,
      "completions/clipped_ratio": 0.0558035714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4018.0,
      "completions/mean_length": 778.2355346679688,
      "completions/mean_terminated_length": 582.150146484375,
      "completions/min_length": 139.0,
      "completions/min_terminated_length": 139.0,
      "epoch": 4.867638483965014,
      "grad_norm": 0.19363398849964142,
      "learning_rate": 1e-06,
      "loss": -0.0226,
      "num_tokens": 300648728.0,
      "reward": 0.6305803656578064,
      "reward_std": 0.1528654545545578,
      "rewards/verify_math_reward/mean": 0.6305803656578064,
      "rewards/verify_math_reward/std": 0.48291724920272827,
      "step": 521
    },
    {
      "clip_ratio/high_max": 0.0020411835721461102,
      "clip_ratio/high_mean": 0.0007113227475201711,
      "clip_ratio/low_mean": 0.0004420190798555268,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011533418255567085,
      "completions/clipped_ratio": 0.0479910714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2730.0,
      "completions/mean_length": 685.357177734375,
      "completions/mean_terminated_length": 513.425537109375,
      "completions/min_length": 139.0,
      "completions/min_terminated_length": 139.0,
      "epoch": 4.876967930029155,
      "grad_norm": 0.212229385972023,
      "learning_rate": 1e-06,
      "loss": -0.0367,
      "num_tokens": 301171648.0,
      "reward": 0.6897321939468384,
      "reward_std": 0.15492630004882812,
      "rewards/verify_math_reward/mean": 0.6897321343421936,
      "rewards/verify_math_reward/std": 0.4628615975379944,
      "step": 522
    },
    {
      "clip_ratio/high_max": 0.0016648603341309354,
      "clip_ratio/high_mean": 0.0007063466109684668,
      "clip_ratio/low_mean": 0.0005030818419982097,
      "clip_ratio/low_min": 2.4855627088982146e-05,
      "clip_ratio/region_mean": 0.0012094284647901077,
      "completions/clipped_ratio": 0.0546875,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3781.0,
      "completions/mean_length": 758.6417846679688,
      "completions/mean_terminated_length": 565.5714111328125,
      "completions/min_length": 127.0,
      "completions/min_terminated_length": 127.0,
      "epoch": 4.886297376093294,
      "grad_norm": 0.1821240335702896,
      "learning_rate": 1e-06,
      "loss": -0.0246,
      "num_tokens": 301747087.0,
      "reward": 0.582589328289032,
      "reward_std": 0.17746664583683014,
      "rewards/verify_math_reward/mean": 0.5825892686843872,
      "rewards/verify_math_reward/std": 0.493407279253006,
      "step": 523
    },
    {
      "clip_ratio/high_max": 0.001530692014057422,
      "clip_ratio/high_mean": 0.0005710766499760211,
      "clip_ratio/low_mean": 0.00046945452277213917,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001040531156832003,
      "completions/clipped_ratio": 0.0714285714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3729.0,
      "completions/mean_length": 776.4832763671875,
      "completions/mean_terminated_length": 521.1358642578125,
      "completions/min_length": 154.0,
      "completions/min_terminated_length": 154.0,
      "epoch": 4.895626822157435,
      "grad_norm": 0.1970164030790329,
      "learning_rate": 1e-06,
      "loss": -0.0284,
      "num_tokens": 302271976.0,
      "reward": 0.6462053656578064,
      "reward_std": 0.12985865771770477,
      "rewards/verify_math_reward/mean": 0.6462053656578064,
      "rewards/verify_math_reward/std": 0.478413462638855,
      "step": 524
    },
    {
      "clip_ratio/high_max": 0.0016025000186346006,
      "clip_ratio/high_mean": 0.000574866196984658,
      "clip_ratio/low_mean": 0.0005449543987197103,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011198205829714425,
      "completions/clipped_ratio": 0.0747767857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1774.0,
      "completions/mean_length": 858.8549194335938,
      "completions/mean_terminated_length": 597.2279663085938,
      "completions/min_length": 147.0,
      "completions/min_terminated_length": 147.0,
      "epoch": 4.904956268221574,
      "grad_norm": 0.1987365186214447,
      "learning_rate": 1e-06,
      "loss": -0.0176,
      "num_tokens": 302863254.0,
      "reward": 0.535714328289032,
      "reward_std": 0.15793287754058838,
      "rewards/verify_math_reward/mean": 0.5357142686843872,
      "rewards/verify_math_reward/std": 0.4990014135837555,
      "step": 525
    },
    {
      "clip_ratio/high_max": 0.0018846390703401994,
      "clip_ratio/high_mean": 0.0007924009560156264,
      "clip_ratio/low_mean": 0.0005120176338095916,
      "clip_ratio/low_min": 1.2415574019541964e-05,
      "clip_ratio/region_mean": 0.0013044185761827976,
      "completions/clipped_ratio": 0.1015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4088.0,
      "completions/mean_length": 919.724365234375,
      "completions/mean_terminated_length": 560.6670532226562,
      "completions/min_length": 166.0,
      "completions/min_terminated_length": 166.0,
      "epoch": 4.914285714285715,
      "grad_norm": 0.2049759328365326,
      "learning_rate": 1e-06,
      "loss": -0.0363,
      "num_tokens": 303406407.0,
      "reward": 0.6205357313156128,
      "reward_std": 0.18069963157176971,
      "rewards/verify_math_reward/mean": 0.6205357313156128,
      "rewards/verify_math_reward/std": 0.4855247139930725,
      "step": 526
    },
    {
      "clip_ratio/high_max": 0.0016742554980737623,
      "clip_ratio/high_mean": 0.0005448062183859292,
      "clip_ratio/low_mean": 0.00034900177320196235,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008938079972722335,
      "completions/clipped_ratio": 0.0758928571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3889.0,
      "completions/mean_length": 818.396240234375,
      "completions/mean_terminated_length": 549.2210083007812,
      "completions/min_length": 140.0,
      "completions/min_terminated_length": 140.0,
      "epoch": 4.923615160349854,
      "grad_norm": 0.1686515510082245,
      "learning_rate": 1e-06,
      "loss": -0.0217,
      "num_tokens": 303950626.0,
      "reward": 0.5915178656578064,
      "reward_std": 0.11114510893821716,
      "rewards/verify_math_reward/mean": 0.5915178656578064,
      "rewards/verify_math_reward/std": 0.49182769656181335,
      "step": 527
    },
    {
      "clip_ratio/high_max": 0.0016543881138204597,
      "clip_ratio/high_mean": 0.000614532134932233,
      "clip_ratio/low_mean": 0.0004907750080747064,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011053071539208759,
      "completions/clipped_ratio": 0.0658482142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3884.0,
      "completions/mean_length": 819.8125610351562,
      "completions/mean_terminated_length": 588.87451171875,
      "completions/min_length": 132.0,
      "completions/min_terminated_length": 132.0,
      "epoch": 4.932944606413994,
      "grad_norm": 0.18196366727352142,
      "learning_rate": 1e-06,
      "loss": -0.0343,
      "num_tokens": 304532546.0,
      "reward": 0.621651828289032,
      "reward_std": 0.1621701568365097,
      "rewards/verify_math_reward/mean": 0.6216517686843872,
      "rewards/verify_math_reward/std": 0.4852459728717804,
      "step": 528
    },
    {
      "clip_ratio/high_max": 0.001804248575353995,
      "clip_ratio/high_mean": 0.0006423912036552792,
      "clip_ratio/low_mean": 0.0003004134805451031,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000942804694204824,
      "completions/clipped_ratio": 0.0669642857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3302.0,
      "completions/mean_length": 770.8348388671875,
      "completions/mean_terminated_length": 532.1865844726562,
      "completions/min_length": 186.0,
      "completions/min_terminated_length": 186.0,
      "epoch": 4.942274052478134,
      "grad_norm": 0.19462347030639648,
      "learning_rate": 1e-06,
      "loss": -0.0328,
      "num_tokens": 305057566.0,
      "reward": 0.645089328289032,
      "reward_std": 0.14196309447288513,
      "rewards/verify_math_reward/mean": 0.6450892686843872,
      "rewards/verify_math_reward/std": 0.4787535071372986,
      "step": 529
    },
    {
      "clip_ratio/high_max": 0.0017251281169592403,
      "clip_ratio/high_mean": 0.0006889636570122093,
      "clip_ratio/low_mean": 0.0004455980179045582,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001134561694925651,
      "completions/clipped_ratio": 0.0691964285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4061.0,
      "completions/mean_length": 817.2756958007812,
      "completions/mean_terminated_length": 573.5335693359375,
      "completions/min_length": 140.0,
      "completions/min_terminated_length": 140.0,
      "epoch": 4.9516034985422746,
      "grad_norm": 0.2360532432794571,
      "learning_rate": 1e-06,
      "loss": -0.0155,
      "num_tokens": 305625701.0,
      "reward": 0.6506696939468384,
      "reward_std": 0.15022864937782288,
      "rewards/verify_math_reward/mean": 0.6506696343421936,
      "rewards/verify_math_reward/std": 0.47702476382255554,
      "step": 530
    },
    {
      "clip_ratio/high_max": 0.001609930634003831,
      "clip_ratio/high_mean": 0.0005716356172342785,
      "clip_ratio/low_mean": 0.00044805481638832134,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010196904331678525,
      "completions/clipped_ratio": 0.0669642857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3932.0,
      "completions/mean_length": 756.1261596679688,
      "completions/mean_terminated_length": 516.4222412109375,
      "completions/min_length": 152.0,
      "completions/min_terminated_length": 152.0,
      "epoch": 4.960932944606414,
      "grad_norm": 0.2021724432706833,
      "learning_rate": 1e-06,
      "loss": -0.0272,
      "num_tokens": 306141678.0,
      "reward": 0.7008928656578064,
      "reward_std": 0.13200366497039795,
      "rewards/verify_math_reward/mean": 0.7008928656578064,
      "rewards/verify_math_reward/std": 0.458122581243515,
      "step": 531
    },
    {
      "clip_ratio/high_max": 0.001937603625265183,
      "clip_ratio/high_mean": 0.0006960711689316668,
      "clip_ratio/low_mean": 0.0006089017770136707,
      "clip_ratio/low_min": 3.501400715322234e-05,
      "clip_ratio/region_mean": 0.0013049729568592738,
      "completions/clipped_ratio": 0.0647321428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1958.0,
      "completions/mean_length": 776.3616333007812,
      "completions/mean_terminated_length": 546.6014404296875,
      "completions/min_length": 145.0,
      "completions/min_terminated_length": 145.0,
      "epoch": 4.970262390670554,
      "grad_norm": 0.23575113713741302,
      "learning_rate": 1e-06,
      "loss": -0.0192,
      "num_tokens": 306679418.0,
      "reward": 0.6462053656578064,
      "reward_std": 0.16972285509109497,
      "rewards/verify_math_reward/mean": 0.6462053656578064,
      "rewards/verify_math_reward/std": 0.478413462638855,
      "step": 532
    },
    {
      "clip_ratio/high_max": 0.0018032093175861519,
      "clip_ratio/high_mean": 0.000643911968836619,
      "clip_ratio/low_mean": 0.0004977471126039745,
      "clip_ratio/low_min": 3.773394928430207e-05,
      "clip_ratio/region_mean": 0.001141659085988067,
      "completions/clipped_ratio": 0.0669642857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3844.0,
      "completions/mean_length": 793.4542846679688,
      "completions/mean_terminated_length": 556.4293823242188,
      "completions/min_length": 116.0,
      "completions/min_terminated_length": 116.0,
      "epoch": 4.979591836734694,
      "grad_norm": 0.19462807476520538,
      "learning_rate": 1e-06,
      "loss": -0.038,
      "num_tokens": 307237297.0,
      "reward": 0.6071428656578064,
      "reward_std": 0.14057300984859467,
      "rewards/verify_math_reward/mean": 0.6071428656578064,
      "rewards/verify_math_reward/std": 0.48865824937820435,
      "step": 533
    },
    {
      "clip_ratio/high_max": 0.0015770237005199306,
      "clip_ratio/high_mean": 0.0004682087228502496,
      "clip_ratio/low_mean": 0.0003231561415759643,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007913648523754091,
      "completions/clipped_ratio": 0.0680803571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2732.0,
      "completions/mean_length": 798.2500610351562,
      "completions/mean_terminated_length": 557.3365478515625,
      "completions/min_length": 138.0,
      "completions/min_terminated_length": 138.0,
      "epoch": 4.988921282798834,
      "grad_norm": 2.719938278198242,
      "learning_rate": 1e-06,
      "loss": -0.0379,
      "num_tokens": 307794881.0,
      "reward": 0.5948660969734192,
      "reward_std": 0.12729713320732117,
      "rewards/verify_math_reward/mean": 0.5948660969734192,
      "rewards/verify_math_reward/std": 0.49119213223457336,
      "step": 534
    },
    {
      "clip_ratio/high_max": 0.0019295079473522492,
      "clip_ratio/high_mean": 0.0006116930326243164,
      "clip_ratio/low_mean": 0.00046655322330479976,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010782462632050738,
      "completions/clipped_ratio": 0.06534090909090906,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1694.0,
      "completions/mean_length": 743.0823974609375,
      "completions/mean_terminated_length": 508.68389892578125,
      "completions/min_length": 102.0,
      "completions/min_terminated_length": 102.0,
      "epoch": 4.998250728862974,
      "grad_norm": 0.17135488986968994,
      "learning_rate": 1e-06,
      "loss": -0.0213,
      "num_tokens": 308343258.0,
      "reward": 0.6328125,
      "reward_std": 0.12798002362251282,
      "rewards/verify_math_reward/mean": 0.6328125,
      "rewards/verify_math_reward/std": 0.48230743408203125,
      "step": 535
    },
    {
      "clip_ratio/high_max": 0.001734473873511888,
      "clip_ratio/high_mean": 0.0007146760399336927,
      "clip_ratio/low_mean": 0.0005809196209156653,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012955956481164321,
      "completions/clipped_ratio": 0.049107142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3428.0,
      "completions/mean_length": 738.5892944335938,
      "completions/mean_terminated_length": 565.201904296875,
      "completions/min_length": 114.0,
      "completions/min_terminated_length": 114.0,
      "epoch": 5.0093294460641395,
      "grad_norm": 0.21436621248722076,
      "learning_rate": 1e-06,
      "loss": -0.0377,
      "num_tokens": 308902402.0,
      "reward": 0.6696428656578064,
      "reward_std": 0.1736377328634262,
      "rewards/verify_math_reward/mean": 0.6696428656578064,
      "rewards/verify_math_reward/std": 0.47060438990592957,
      "step": 536
    },
    {
      "clip_ratio/high_max": 0.0015680753713240847,
      "clip_ratio/high_mean": 0.0006392968753061723,
      "clip_ratio/low_mean": 0.0004740121639770223,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011133090265502688,
      "completions/clipped_ratio": 0.0613839285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4003.0,
      "completions/mean_length": 805.2689819335938,
      "completions/mean_terminated_length": 590.0606079101562,
      "completions/min_length": 113.0,
      "completions/min_terminated_length": 113.0,
      "epoch": 5.01865889212828,
      "grad_norm": 0.19323799014091492,
      "learning_rate": 1e-06,
      "loss": -0.0244,
      "num_tokens": 309480363.0,
      "reward": 0.5837053656578064,
      "reward_std": 0.16675862669944763,
      "rewards/verify_math_reward/mean": 0.5837053656578064,
      "rewards/verify_math_reward/std": 0.49321895837783813,
      "step": 537
    },
    {
      "clip_ratio/high_max": 0.0018389608048892114,
      "clip_ratio/high_mean": 0.0006140505993244005,
      "clip_ratio/low_mean": 0.0004651449494303961,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010791955355671234,
      "completions/clipped_ratio": 0.09375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3725.0,
      "completions/mean_length": 924.3047485351562,
      "completions/mean_terminated_length": 596.1982421875,
      "completions/min_length": 146.0,
      "completions/min_terminated_length": 146.0,
      "epoch": 5.0279883381924195,
      "grad_norm": 0.16805453598499298,
      "learning_rate": 1e-06,
      "loss": -0.0374,
      "num_tokens": 310054836.0,
      "reward": 0.5970982313156128,
      "reward_std": 0.15446916222572327,
      "rewards/verify_math_reward/mean": 0.5970982313156128,
      "rewards/verify_math_reward/std": 0.49075525999069214,
      "step": 538
    },
    {
      "clip_ratio/high_max": 0.0019854991260217503,
      "clip_ratio/high_mean": 0.0005954767184448428,
      "clip_ratio/low_mean": 0.00045045749357086606,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001045934248395497,
      "completions/clipped_ratio": 0.0803571428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3103.0,
      "completions/mean_length": 811.9486694335938,
      "completions/mean_terminated_length": 524.9927368164062,
      "completions/min_length": 109.0,
      "completions/min_terminated_length": 109.0,
      "epoch": 5.03731778425656,
      "grad_norm": 0.1802738904953003,
      "learning_rate": 1e-06,
      "loss": -0.0488,
      "num_tokens": 310580038.0,
      "reward": 0.6395089626312256,
      "reward_std": 0.12181992828845978,
      "rewards/verify_math_reward/mean": 0.6395089030265808,
      "rewards/verify_math_reward/std": 0.4804111421108246,
      "step": 539
    },
    {
      "clip_ratio/high_max": 0.0016207788430619985,
      "clip_ratio/high_mean": 0.0006753572306479327,
      "clip_ratio/low_mean": 0.00038364015745173674,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010589973753667437,
      "completions/clipped_ratio": 0.036830357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3873.0,
      "completions/mean_length": 692.3460083007812,
      "completions/mean_terminated_length": 562.1946411132812,
      "completions/min_length": 129.0,
      "completions/min_terminated_length": 129.0,
      "epoch": 5.0466472303206995,
      "grad_norm": 0.25847533345222473,
      "learning_rate": 1e-06,
      "loss": -0.0239,
      "num_tokens": 311148324.0,
      "reward": 0.6964285969734192,
      "reward_std": 0.15207336843013763,
      "rewards/verify_math_reward/mean": 0.6964285969734192,
      "rewards/verify_math_reward/std": 0.4600565731525421,
      "step": 540
    },
    {
      "clip_ratio/high_max": 0.0020743900931847747,
      "clip_ratio/high_mean": 0.0007518874990637414,
      "clip_ratio/low_mean": 0.0005199604274821468,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012718479047180153,
      "completions/clipped_ratio": 0.0758928571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3514.0,
      "completions/mean_length": 792.4877319335938,
      "completions/mean_terminated_length": 521.1847534179688,
      "completions/min_length": 159.0,
      "completions/min_terminated_length": 159.0,
      "epoch": 5.05597667638484,
      "grad_norm": 0.24380233883857727,
      "learning_rate": 1e-06,
      "loss": -0.0229,
      "num_tokens": 311662793.0,
      "reward": 0.6651785969734192,
      "reward_std": 0.14158332347869873,
      "rewards/verify_math_reward/mean": 0.6651785969734192,
      "rewards/verify_math_reward/std": 0.47219160199165344,
      "step": 541
    },
    {
      "clip_ratio/high_max": 0.001640480197238503,
      "clip_ratio/high_mean": 0.0006248580903047696,
      "clip_ratio/low_mean": 0.0006177023233249201,
      "clip_ratio/low_min": 1.4688601368106902e-05,
      "clip_ratio/region_mean": 0.0012425604181771632,
      "completions/clipped_ratio": 0.0625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3033.0,
      "completions/mean_length": 766.1004638671875,
      "completions/mean_terminated_length": 544.107177734375,
      "completions/min_length": 124.0,
      "completions/min_terminated_length": 124.0,
      "epoch": 5.0653061224489795,
      "grad_norm": 0.2195175290107727,
      "learning_rate": 1e-06,
      "loss": -0.0115,
      "num_tokens": 312202379.0,
      "reward": 0.6183035969734192,
      "reward_std": 0.16848823428153992,
      "rewards/verify_math_reward/mean": 0.6183035969734192,
      "rewards/verify_math_reward/std": 0.4860740303993225,
      "step": 542
    },
    {
      "clip_ratio/high_max": 0.0018057929955830332,
      "clip_ratio/high_mean": 0.0005769770186816459,
      "clip_ratio/low_mean": 0.0005559519349844777,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011329289663990494,
      "completions/clipped_ratio": 0.0502232142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1964.0,
      "completions/mean_length": 703.802490234375,
      "completions/mean_terminated_length": 524.4265747070312,
      "completions/min_length": 136.0,
      "completions/min_terminated_length": 136.0,
      "epoch": 5.07463556851312,
      "grad_norm": 0.19934502243995667,
      "learning_rate": 1e-06,
      "loss": -0.0139,
      "num_tokens": 312736114.0,
      "reward": 0.6473214626312256,
      "reward_std": 0.14969734847545624,
      "rewards/verify_math_reward/mean": 0.6473214030265808,
      "rewards/verify_math_reward/std": 0.47807058691978455,
      "step": 543
    },
    {
      "clip_ratio/high_max": 0.0016377591200580355,
      "clip_ratio/high_mean": 0.000579360916162841,
      "clip_ratio/low_mean": 0.000343085593158321,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009224465247825719,
      "completions/clipped_ratio": 0.0725446428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3808.0,
      "completions/mean_length": 803.0949096679688,
      "completions/mean_terminated_length": 545.527099609375,
      "completions/min_length": 163.0,
      "completions/min_terminated_length": 163.0,
      "epoch": 5.0839650145772595,
      "grad_norm": 0.1511351764202118,
      "learning_rate": 1e-06,
      "loss": -0.0122,
      "num_tokens": 313282951.0,
      "reward": 0.6328125,
      "reward_std": 0.11644896864891052,
      "rewards/verify_math_reward/mean": 0.6328125,
      "rewards/verify_math_reward/std": 0.48230743408203125,
      "step": 544
    },
    {
      "clip_ratio/high_max": 0.001759360806318,
      "clip_ratio/high_mean": 0.0006546797612827504,
      "clip_ratio/low_mean": 0.0005265207164484309,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011812004704552237,
      "completions/clipped_ratio": 0.0658482142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3742.0,
      "completions/mean_length": 816.489990234375,
      "completions/mean_terminated_length": 585.3178100585938,
      "completions/min_length": 148.0,
      "completions/min_terminated_length": 148.0,
      "epoch": 5.093294460641399,
      "grad_norm": 0.33368080854415894,
      "learning_rate": 1e-06,
      "loss": -0.0391,
      "num_tokens": 313872118.0,
      "reward": 0.5290178656578064,
      "reward_std": 0.17344370484352112,
      "rewards/verify_math_reward/mean": 0.5290178656578064,
      "rewards/verify_math_reward/std": 0.49943602085113525,
      "step": 545
    },
    {
      "clip_ratio/high_max": 0.0018291265150764957,
      "clip_ratio/high_mean": 0.0007576696643809555,
      "clip_ratio/low_mean": 0.00040248151799460175,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001160151183285052,
      "completions/clipped_ratio": 0.0558035714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3986.0,
      "completions/mean_length": 733.5904541015625,
      "completions/mean_terminated_length": 534.866455078125,
      "completions/min_length": 154.0,
      "completions/min_terminated_length": 154.0,
      "epoch": 5.1026239067055394,
      "grad_norm": 0.1947004348039627,
      "learning_rate": 1e-06,
      "loss": -0.0241,
      "num_tokens": 314407583.0,
      "reward": 0.652901828289032,
      "reward_std": 0.15649862587451935,
      "rewards/verify_math_reward/mean": 0.6529017686843872,
      "rewards/verify_math_reward/std": 0.47631317377090454,
      "step": 546
    },
    {
      "clip_ratio/high_max": 0.0015986562939360738,
      "clip_ratio/high_mean": 0.0005538484174394398,
      "clip_ratio/low_mean": 0.0005457822521748312,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010996306664310396,
      "completions/clipped_ratio": 0.052455357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3710.0,
      "completions/mean_length": 740.5089721679688,
      "completions/mean_terminated_length": 554.75146484375,
      "completions/min_length": 162.0,
      "completions/min_terminated_length": 162.0,
      "epoch": 5.111953352769679,
      "grad_norm": 0.1526365578174591,
      "learning_rate": 1e-06,
      "loss": -0.0193,
      "num_tokens": 314968583.0,
      "reward": 0.6785714626312256,
      "reward_std": 0.12433726340532303,
      "rewards/verify_math_reward/mean": 0.6785714030265808,
      "rewards/verify_math_reward/std": 0.46728572249412537,
      "step": 547
    },
    {
      "clip_ratio/high_max": 0.0017323404536000453,
      "clip_ratio/high_mean": 0.0007162682832131395,
      "clip_ratio/low_mean": 0.00037293118566594785,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010891994679695927,
      "completions/clipped_ratio": 0.0502232142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3698.0,
      "completions/mean_length": 717.3717041015625,
      "completions/mean_terminated_length": 538.7132568359375,
      "completions/min_length": 146.0,
      "completions/min_terminated_length": 146.0,
      "epoch": 5.121282798833819,
      "grad_norm": 0.21684935688972473,
      "learning_rate": 1e-06,
      "loss": -0.0189,
      "num_tokens": 315508996.0,
      "reward": 0.723214328289032,
      "reward_std": 0.1310625821352005,
      "rewards/verify_math_reward/mean": 0.7232142686843872,
      "rewards/verify_math_reward/std": 0.44765952229499817,
      "step": 548
    },
    {
      "clip_ratio/high_max": 0.0018178919526690152,
      "clip_ratio/high_mean": 0.0006247809515116387,
      "clip_ratio/low_mean": 0.0003424692895350745,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009672502528701443,
      "completions/clipped_ratio": 0.0558035714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3700.0,
      "completions/mean_length": 788.0480346679688,
      "completions/mean_terminated_length": 592.5425415039062,
      "completions/min_length": 151.0,
      "completions/min_terminated_length": 151.0,
      "epoch": 5.130612244897959,
      "grad_norm": 0.1766895204782486,
      "learning_rate": 1e-06,
      "loss": -0.0381,
      "num_tokens": 316101279.0,
      "reward": 0.6674107313156128,
      "reward_std": 0.1288476437330246,
      "rewards/verify_math_reward/mean": 0.6674107313156128,
      "rewards/verify_math_reward/std": 0.47140392661094666,
      "step": 549
    },
    {
      "clip_ratio/high_max": 0.0015654471280868165,
      "clip_ratio/high_mean": 0.0005066074886599381,
      "clip_ratio/low_mean": 0.0005465031463245396,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010531106272537727,
      "completions/clipped_ratio": 0.0636160714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3849.0,
      "completions/mean_length": 768.8225708007812,
      "completions/mean_terminated_length": 542.7807006835938,
      "completions/min_length": 164.0,
      "completions/min_terminated_length": 164.0,
      "epoch": 5.139941690962099,
      "grad_norm": 0.17811238765716553,
      "learning_rate": 1e-06,
      "loss": -0.0113,
      "num_tokens": 316655928.0,
      "reward": 0.5446428656578064,
      "reward_std": 0.14722415804862976,
      "rewards/verify_math_reward/mean": 0.5446428656578064,
      "rewards/verify_math_reward/std": 0.49828118085861206,
      "step": 550
    },
    {
      "clip_ratio/high_max": 0.001819049855839694,
      "clip_ratio/high_mean": 0.0005607957336906111,
      "clip_ratio/low_mean": 0.00045058580462864484,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010113815314980457,
      "completions/clipped_ratio": 0.0736607142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3800.0,
      "completions/mean_length": 792.5569458007812,
      "completions/mean_terminated_length": 529.87353515625,
      "completions/min_length": 117.0,
      "completions/min_terminated_length": 117.0,
      "epoch": 5.149271137026239,
      "grad_norm": 0.19947810471057892,
      "learning_rate": 1e-06,
      "loss": -0.0464,
      "num_tokens": 317184059.0,
      "reward": 0.6640625,
      "reward_std": 0.12982404232025146,
      "rewards/verify_math_reward/mean": 0.6640625,
      "rewards/verify_math_reward/std": 0.4725809693336487,
      "step": 551
    },
    {
      "clip_ratio/high_max": 0.0016915455998969264,
      "clip_ratio/high_mean": 0.0007980726641108049,
      "clip_ratio/low_mean": 0.00047955109675967833,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012776237817888614,
      "completions/clipped_ratio": 0.0546875,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2457.0,
      "completions/mean_length": 781.3158569335938,
      "completions/mean_terminated_length": 589.5572509765625,
      "completions/min_length": 152.0,
      "completions/min_terminated_length": 152.0,
      "epoch": 5.158600583090379,
      "grad_norm": 0.20176364481449127,
      "learning_rate": 1e-06,
      "loss": -0.0401,
      "num_tokens": 317770694.0,
      "reward": 0.6339285969734192,
      "reward_std": 0.18644897639751434,
      "rewards/verify_math_reward/mean": 0.6339285969734192,
      "rewards/verify_math_reward/std": 0.48199835419654846,
      "step": 552
    },
    {
      "clip_ratio/high_max": 0.001715225946099963,
      "clip_ratio/high_mean": 0.0005189865278225625,
      "clip_ratio/low_mean": 0.0003097739145232481,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008287604505312629,
      "completions/clipped_ratio": 0.0625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3892.0,
      "completions/mean_length": 768.2511596679688,
      "completions/mean_terminated_length": 546.4011840820312,
      "completions/min_length": 127.0,
      "completions/min_terminated_length": 127.0,
      "epoch": 5.167930029154519,
      "grad_norm": 0.15910810232162476,
      "learning_rate": 1e-06,
      "loss": -0.0162,
      "num_tokens": 318315895.0,
      "reward": 0.6484375,
      "reward_std": 0.1289571076631546,
      "rewards/verify_math_reward/mean": 0.6484375,
      "rewards/verify_math_reward/std": 0.4777248501777649,
      "step": 553
    },
    {
      "clip_ratio/high_max": 0.001746124053170206,
      "clip_ratio/high_mean": 0.0006517072833958082,
      "clip_ratio/low_mean": 0.0005152529715815035,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011669602390611544,
      "completions/clipped_ratio": 0.0747767857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3371.0,
      "completions/mean_length": 858.1998291015625,
      "completions/mean_terminated_length": 596.5198974609375,
      "completions/min_length": 182.0,
      "completions/min_terminated_length": 182.0,
      "epoch": 5.1772594752186585,
      "grad_norm": 0.2465633898973465,
      "learning_rate": 1e-06,
      "loss": -0.0281,
      "num_tokens": 318897826.0,
      "reward": 0.6004464626312256,
      "reward_std": 0.14429426193237305,
      "rewards/verify_math_reward/mean": 0.6004464030265808,
      "rewards/verify_math_reward/std": 0.49008017778396606,
      "step": 554
    },
    {
      "clip_ratio/high_max": 0.0017360788224323187,
      "clip_ratio/high_mean": 0.0006247759920370299,
      "clip_ratio/low_mean": 0.0004566940942822839,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010814701090566814,
      "completions/clipped_ratio": 0.0703125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2604.0,
      "completions/mean_length": 814.8638916015625,
      "completions/mean_terminated_length": 566.710693359375,
      "completions/min_length": 144.0,
      "completions/min_terminated_length": 144.0,
      "epoch": 5.186588921282799,
      "grad_norm": 0.21385346353054047,
      "learning_rate": 1e-06,
      "loss": -0.0092,
      "num_tokens": 319455800.0,
      "reward": 0.660714328289032,
      "reward_std": 0.14458850026130676,
      "rewards/verify_math_reward/mean": 0.6607142686843872,
      "rewards/verify_math_reward/std": 0.4737313687801361,
      "step": 555
    },
    {
      "clip_ratio/high_max": 0.0016607689794909675,
      "clip_ratio/high_mean": 0.000572077973629348,
      "clip_ratio/low_mean": 0.0003538458436196379,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000925923821341712,
      "completions/clipped_ratio": 0.0424107142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4039.0,
      "completions/mean_length": 697.9855346679688,
      "completions/mean_terminated_length": 547.4906616210938,
      "completions/min_length": 131.0,
      "completions/min_terminated_length": 131.0,
      "epoch": 5.1959183673469385,
      "grad_norm": 0.18615104258060455,
      "learning_rate": 1e-06,
      "loss": -0.015,
      "num_tokens": 320016659.0,
      "reward": 0.6930803656578064,
      "reward_std": 0.11419306695461273,
      "rewards/verify_math_reward/mean": 0.6930803656578064,
      "rewards/verify_math_reward/std": 0.46147337555885315,
      "step": 556
    },
    {
      "clip_ratio/high_max": 0.0016630271456961054,
      "clip_ratio/high_mean": 0.0005218762125878129,
      "clip_ratio/low_mean": 0.00037493973923119484,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008968159654614283,
      "completions/clipped_ratio": 0.078125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4008.0,
      "completions/mean_length": 883.4297485351562,
      "completions/mean_terminated_length": 611.177978515625,
      "completions/min_length": 198.0,
      "completions/min_terminated_length": 198.0,
      "epoch": 5.205247813411079,
      "grad_norm": 0.1741432100534439,
      "learning_rate": 1e-06,
      "loss": -0.0216,
      "num_tokens": 320610460.0,
      "reward": 0.6227678656578064,
      "reward_std": 0.14098487794399261,
      "rewards/verify_math_reward/mean": 0.6227678656578064,
      "rewards/verify_math_reward/std": 0.4849644899368286,
      "step": 557
    },
    {
      "clip_ratio/high_max": 0.0018830596418411005,
      "clip_ratio/high_mean": 0.0006792893746023765,
      "clip_ratio/low_mean": 0.00043912097498832736,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011184103314008098,
      "completions/clipped_ratio": 0.0792410714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2745.0,
      "completions/mean_length": 818.3594360351562,
      "completions/mean_terminated_length": 536.2836303710938,
      "completions/min_length": 144.0,
      "completions/min_terminated_length": 144.0,
      "epoch": 5.214577259475218,
      "grad_norm": 0.18685314059257507,
      "learning_rate": 1e-06,
      "loss": -0.0417,
      "num_tokens": 321148422.0,
      "reward": 0.6462053656578064,
      "reward_std": 0.1511276513338089,
      "rewards/verify_math_reward/mean": 0.6462053656578064,
      "rewards/verify_math_reward/std": 0.478413462638855,
      "step": 558
    },
    {
      "clip_ratio/high_max": 0.002113575639668852,
      "clip_ratio/high_mean": 0.0007228144677355886,
      "clip_ratio/low_mean": 0.0007131807924452005,
      "clip_ratio/low_min": 1.3513513295038138e-05,
      "clip_ratio/region_mean": 0.0014359952438098844,
      "completions/clipped_ratio": 0.078125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3617.0,
      "completions/mean_length": 825.2142944335938,
      "completions/mean_terminated_length": 548.029052734375,
      "completions/min_length": 145.0,
      "completions/min_terminated_length": 145.0,
      "epoch": 5.223906705539359,
      "grad_norm": 0.21293967962265015,
      "learning_rate": 1e-06,
      "loss": -0.022,
      "num_tokens": 321686406.0,
      "reward": 0.660714328289032,
      "reward_std": 0.17539508640766144,
      "rewards/verify_math_reward/mean": 0.6607142686843872,
      "rewards/verify_math_reward/std": 0.4737313687801361,
      "step": 559
    },
    {
      "clip_ratio/high_max": 0.001891877464913705,
      "clip_ratio/high_mean": 0.0005734050190540074,
      "clip_ratio/low_mean": 0.00043106369594170246,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001004468711471418,
      "completions/clipped_ratio": 0.0904017857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2605.0,
      "completions/mean_length": 864.3995971679688,
      "completions/mean_terminated_length": 543.2221069335938,
      "completions/min_length": 124.0,
      "completions/min_terminated_length": 124.0,
      "epoch": 5.233236151603498,
      "grad_norm": 0.1860976368188858,
      "learning_rate": 1e-06,
      "loss": -0.0182,
      "num_tokens": 322218852.0,
      "reward": 0.5680803656578064,
      "reward_std": 0.14260178804397583,
      "rewards/verify_math_reward/mean": 0.5680803656578064,
      "rewards/verify_math_reward/std": 0.4956200122833252,
      "step": 560
    },
    {
      "clip_ratio/high_max": 0.0017772664286894724,
      "clip_ratio/high_mean": 0.0006365105400618631,
      "clip_ratio/low_mean": 0.00041055485871765995,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010470654087839648,
      "completions/clipped_ratio": 0.0736607142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3952.0,
      "completions/mean_length": 829.6317138671875,
      "completions/mean_terminated_length": 569.8964233398438,
      "completions/min_length": 172.0,
      "completions/min_terminated_length": 172.0,
      "epoch": 5.242565597667639,
      "grad_norm": 0.18890196084976196,
      "learning_rate": 1e-06,
      "loss": -0.025,
      "num_tokens": 322782994.0,
      "reward": 0.6674107313156128,
      "reward_std": 0.14658337831497192,
      "rewards/verify_math_reward/mean": 0.6674107313156128,
      "rewards/verify_math_reward/std": 0.47140392661094666,
      "step": 561
    },
    {
      "clip_ratio/high_max": 0.0021052542360848747,
      "clip_ratio/high_mean": 0.0008074982433754485,
      "clip_ratio/low_mean": 0.0006057855507606291,
      "clip_ratio/low_min": 1.793400224414654e-05,
      "clip_ratio/region_mean": 0.0014132837823126465,
      "completions/clipped_ratio": 0.0703125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2602.0,
      "completions/mean_length": 794.700927734375,
      "completions/mean_terminated_length": 545.0227661132812,
      "completions/min_length": 127.0,
      "completions/min_terminated_length": 127.0,
      "epoch": 5.251895043731778,
      "grad_norm": 0.2046925127506256,
      "learning_rate": 1e-06,
      "loss": -0.0069,
      "num_tokens": 323322318.0,
      "reward": 0.6026785969734192,
      "reward_std": 0.15965674817562103,
      "rewards/verify_math_reward/mean": 0.6026785969734192,
      "rewards/verify_math_reward/std": 0.48961687088012695,
      "step": 562
    },
    {
      "clip_ratio/high_max": 0.001974537288333522,
      "clip_ratio/high_mean": 0.0007292029949894641,
      "clip_ratio/low_mean": 0.0004600559491336753,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001189258968224749,
      "completions/clipped_ratio": 0.0825892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3237.0,
      "completions/mean_length": 850.5636596679688,
      "completions/mean_terminated_length": 558.3953857421875,
      "completions/min_length": 148.0,
      "completions/min_terminated_length": 148.0,
      "epoch": 5.261224489795918,
      "grad_norm": 0.2143038660287857,
      "learning_rate": 1e-06,
      "loss": -0.0708,
      "num_tokens": 323866591.0,
      "reward": 0.6004464626312256,
      "reward_std": 0.1802852600812912,
      "rewards/verify_math_reward/mean": 0.6004464030265808,
      "rewards/verify_math_reward/std": 0.49008017778396606,
      "step": 563
    },
    {
      "clip_ratio/high_max": 0.0020482320796872955,
      "clip_ratio/high_mean": 0.0007716327954767621,
      "clip_ratio/low_mean": 0.0005118950275573297,
      "clip_ratio/low_min": 3.246057531214319e-05,
      "clip_ratio/region_mean": 0.0012835278248530813,
      "completions/clipped_ratio": 0.0703125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3789.0,
      "completions/mean_length": 850.286865234375,
      "completions/mean_terminated_length": 604.8126831054688,
      "completions/min_length": 152.0,
      "completions/min_terminated_length": 152.0,
      "epoch": 5.270553935860058,
      "grad_norm": 0.28832581639289856,
      "learning_rate": 1e-06,
      "loss": -0.0352,
      "num_tokens": 324470048.0,
      "reward": 0.5803571939468384,
      "reward_std": 0.17213143408298492,
      "rewards/verify_math_reward/mean": 0.5803571343421936,
      "rewards/verify_math_reward/std": 0.4937761425971985,
      "step": 564
    },
    {
      "clip_ratio/high_max": 0.0017895134296850301,
      "clip_ratio/high_mean": 0.0006924274675839115,
      "clip_ratio/low_mean": 0.0006338041748676915,
      "clip_ratio/low_min": 1.4541647033183835e-05,
      "clip_ratio/region_mean": 0.0013262316351756454,
      "completions/clipped_ratio": 0.0848214285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1643.0,
      "completions/mean_length": 833.3750610351562,
      "completions/mean_terminated_length": 530.9853515625,
      "completions/min_length": 121.0,
      "completions/min_terminated_length": 121.0,
      "epoch": 5.279883381924198,
      "grad_norm": 0.2057003527879715,
      "learning_rate": 1e-06,
      "loss": -0.0463,
      "num_tokens": 324996464.0,
      "reward": 0.5970982313156128,
      "reward_std": 0.16330061852931976,
      "rewards/verify_math_reward/mean": 0.5970982313156128,
      "rewards/verify_math_reward/std": 0.49075525999069214,
      "step": 565
    },
    {
      "clip_ratio/high_max": 0.0017614452117413748,
      "clip_ratio/high_mean": 0.0005621731861538137,
      "clip_ratio/low_mean": 0.00044188695392222144,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010040601300715934,
      "completions/clipped_ratio": 0.0714285714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3079.0,
      "completions/mean_length": 788.146240234375,
      "completions/mean_terminated_length": 533.6959228515625,
      "completions/min_length": 132.0,
      "completions/min_terminated_length": 132.0,
      "epoch": 5.289212827988338,
      "grad_norm": 0.19286105036735535,
      "learning_rate": 1e-06,
      "loss": -0.039,
      "num_tokens": 325526403.0,
      "reward": 0.6417410969734192,
      "reward_std": 0.13505050539970398,
      "rewards/verify_math_reward/mean": 0.6417410969734192,
      "rewards/verify_math_reward/std": 0.47975656390190125,
      "step": 566
    },
    {
      "clip_ratio/high_max": 0.0020094738574698567,
      "clip_ratio/high_mean": 0.0007397602312266827,
      "clip_ratio/low_mean": 0.0005216166644004261,
      "clip_ratio/low_min": 3.568305510270875e-05,
      "clip_ratio/region_mean": 0.0012613769104063977,
      "completions/clipped_ratio": 0.0758928571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4054.0,
      "completions/mean_length": 818.1864013671875,
      "completions/mean_terminated_length": 548.9939575195312,
      "completions/min_length": 135.0,
      "completions/min_terminated_length": 135.0,
      "epoch": 5.298542274052478,
      "grad_norm": 0.21803763508796692,
      "learning_rate": 1e-06,
      "loss": -0.0207,
      "num_tokens": 326066434.0,
      "reward": 0.6149553656578064,
      "reward_std": 0.1800595372915268,
      "rewards/verify_math_reward/mean": 0.6149553656578064,
      "rewards/verify_math_reward/std": 0.4868776500225067,
      "step": 567
    },
    {
      "clip_ratio/high_max": 0.002007713159400737,
      "clip_ratio/high_mean": 0.0006402884300769074,
      "clip_ratio/low_mean": 0.0006231778515939368,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012634662925847806,
      "completions/clipped_ratio": 0.0892857142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2609.0,
      "completions/mean_length": 856.6763916015625,
      "completions/mean_terminated_length": 539.0955810546875,
      "completions/min_length": 125.0,
      "completions/min_terminated_length": 125.0,
      "epoch": 5.307871720116618,
      "grad_norm": 0.20887604355812073,
      "learning_rate": 1e-06,
      "loss": -0.0399,
      "num_tokens": 326593096.0,
      "reward": 0.609375,
      "reward_std": 0.14789538085460663,
      "rewards/verify_math_reward/mean": 0.609375,
      "rewards/verify_math_reward/std": 0.48816296458244324,
      "step": 568
    },
    {
      "clip_ratio/high_max": 0.001692012679995969,
      "clip_ratio/high_mean": 0.0005945125139987795,
      "clip_ratio/low_mean": 0.0004166983571849414,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010112109048350248,
      "completions/clipped_ratio": 0.0580357142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3603.0,
      "completions/mean_length": 770.3939819335938,
      "completions/mean_terminated_length": 565.4988403320312,
      "completions/min_length": 184.0,
      "completions/min_terminated_length": 184.0,
      "epoch": 5.317201166180758,
      "grad_norm": 0.2038104236125946,
      "learning_rate": 1e-06,
      "loss": -0.0031,
      "num_tokens": 327162633.0,
      "reward": 0.6205357313156128,
      "reward_std": 0.14004099369049072,
      "rewards/verify_math_reward/mean": 0.6205357313156128,
      "rewards/verify_math_reward/std": 0.4855247139930725,
      "step": 569
    },
    {
      "clip_ratio/high_max": 0.0015329128054872854,
      "clip_ratio/high_mean": 0.0005875737606402254,
      "clip_ratio/low_mean": 0.0004885954831479467,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010761692301457515,
      "completions/clipped_ratio": 0.0725446428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4006.0,
      "completions/mean_length": 817.1563110351562,
      "completions/mean_terminated_length": 560.6883544921875,
      "completions/min_length": 163.0,
      "completions/min_terminated_length": 163.0,
      "epoch": 5.326530612244898,
      "grad_norm": 0.19578103721141815,
      "learning_rate": 1e-06,
      "loss": -0.0171,
      "num_tokens": 327713533.0,
      "reward": 0.5970982313156128,
      "reward_std": 0.14789608120918274,
      "rewards/verify_math_reward/mean": 0.5970982313156128,
      "rewards/verify_math_reward/std": 0.4907552897930145,
      "step": 570
    },
    {
      "clip_ratio/high_max": 0.0017812917576520704,
      "clip_ratio/high_mean": 0.0007292654245247832,
      "clip_ratio/low_mean": 0.0005467043170028774,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012759697419824079,
      "completions/clipped_ratio": 0.0747767857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3161.0,
      "completions/mean_length": 817.0814819335938,
      "completions/mean_terminated_length": 552.0784301757812,
      "completions/min_length": 149.0,
      "completions/min_terminated_length": 149.0,
      "epoch": 5.335860058309038,
      "grad_norm": 0.317152202129364,
      "learning_rate": 1e-06,
      "loss": -0.0325,
      "num_tokens": 328253430.0,
      "reward": 0.6618303656578064,
      "reward_std": 0.16134923696517944,
      "rewards/verify_math_reward/mean": 0.6618303656578064,
      "rewards/verify_math_reward/std": 0.4733508229255676,
      "step": 571
    },
    {
      "clip_ratio/high_max": 0.0013462476381391753,
      "clip_ratio/high_mean": 0.0004718451064036344,
      "clip_ratio/low_mean": 0.0002711944046041026,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007430395289702574,
      "completions/clipped_ratio": 0.0546875,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3398.0,
      "completions/mean_length": 737.3717041015625,
      "completions/mean_terminated_length": 543.07080078125,
      "completions/min_length": 157.0,
      "completions/min_terminated_length": 157.0,
      "epoch": 5.345189504373177,
      "grad_norm": 0.20440132915973663,
      "learning_rate": 1e-06,
      "loss": -0.0282,
      "num_tokens": 328806099.0,
      "reward": 0.5881696939468384,
      "reward_std": 0.11280439049005508,
      "rewards/verify_math_reward/mean": 0.5881696343421936,
      "rewards/verify_math_reward/std": 0.4924396276473999,
      "step": 572
    },
    {
      "clip_ratio/high_max": 0.0013336496558622457,
      "clip_ratio/high_mean": 0.00046210142363634077,
      "clip_ratio/low_mean": 0.0005868909111086396,
      "clip_ratio/low_min": 2.3629489078302868e-05,
      "clip_ratio/region_mean": 0.0010489923406566959,
      "completions/clipped_ratio": 0.0558035714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3004.0,
      "completions/mean_length": 726.6529541015625,
      "completions/mean_terminated_length": 527.5189208984375,
      "completions/min_length": 83.0,
      "completions/min_terminated_length": 83.0,
      "epoch": 5.354518950437318,
      "grad_norm": 0.21076786518096924,
      "learning_rate": 1e-06,
      "loss": -0.0164,
      "num_tokens": 329346956.0,
      "reward": 0.6584821939468384,
      "reward_std": 0.13909989595413208,
      "rewards/verify_math_reward/mean": 0.6584821343421936,
      "rewards/verify_math_reward/std": 0.4744836091995239,
      "step": 573
    },
    {
      "clip_ratio/high_max": 0.0020970545265299734,
      "clip_ratio/high_mean": 0.0008852681348798797,
      "clip_ratio/low_mean": 0.0005595254970103269,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0014447936446231324,
      "completions/clipped_ratio": 0.056919642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3791.0,
      "completions/mean_length": 714.9006958007812,
      "completions/mean_terminated_length": 510.8343200683594,
      "completions/min_length": 181.0,
      "completions/min_terminated_length": 181.0,
      "epoch": 5.363848396501457,
      "grad_norm": 0.22876305878162384,
      "learning_rate": 1e-06,
      "loss": -0.0158,
      "num_tokens": 329869163.0,
      "reward": 0.6082589626312256,
      "reward_std": 0.16468819975852966,
      "rewards/verify_math_reward/mean": 0.6082589030265808,
      "rewards/verify_math_reward/std": 0.4884119927883148,
      "step": 574
    },
    {
      "clip_ratio/high_max": 0.0015995872017811053,
      "clip_ratio/high_mean": 0.0005446128234325442,
      "clip_ratio/low_mean": 0.0004184743111181888,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009630871209083125,
      "completions/clipped_ratio": 0.0625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3948.0,
      "completions/mean_length": 825.6752319335938,
      "completions/mean_terminated_length": 607.653564453125,
      "completions/min_length": 140.0,
      "completions/min_terminated_length": 140.0,
      "epoch": 5.373177842565598,
      "grad_norm": 0.20254768431186676,
      "learning_rate": 1e-06,
      "loss": -0.0189,
      "num_tokens": 330472072.0,
      "reward": 0.6004464626312256,
      "reward_std": 0.1584189236164093,
      "rewards/verify_math_reward/mean": 0.6004464030265808,
      "rewards/verify_math_reward/std": 0.49008017778396606,
      "step": 575
    },
    {
      "clip_ratio/high_max": 0.0018277046874572989,
      "clip_ratio/high_mean": 0.0006004279775879695,
      "clip_ratio/low_mean": 0.0003013545033354603,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000901782475921209,
      "completions/clipped_ratio": 0.0736607142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4060.0,
      "completions/mean_length": 817.8772583007812,
      "completions/mean_terminated_length": 557.2072143554688,
      "completions/min_length": 153.0,
      "completions/min_terminated_length": 153.0,
      "epoch": 5.382507288629737,
      "grad_norm": 0.18974773585796356,
      "learning_rate": 1e-06,
      "loss": -0.0264,
      "num_tokens": 331031098.0,
      "reward": 0.6361607313156128,
      "reward_std": 0.1442061960697174,
      "rewards/verify_math_reward/mean": 0.6361607313156128,
      "rewards/verify_math_reward/std": 0.4813718795776367,
      "step": 576
    },
    {
      "clip_ratio/high_max": 0.0011318937667965656,
      "clip_ratio/high_mean": 0.00037870730284339515,
      "clip_ratio/low_mean": 0.0002643170196279243,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006430243211070774,
      "completions/clipped_ratio": 0.0457589285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4001.0,
      "completions/mean_length": 743.2767944335938,
      "completions/mean_terminated_length": 582.5029296875,
      "completions/min_length": 123.0,
      "completions/min_terminated_length": 123.0,
      "epoch": 5.391836734693878,
      "grad_norm": 0.16180872917175293,
      "learning_rate": 1e-06,
      "loss": -0.0083,
      "num_tokens": 331615714.0,
      "reward": 0.6863839626312256,
      "reward_std": 0.09574238210916519,
      "rewards/verify_math_reward/mean": 0.6863839030265808,
      "rewards/verify_math_reward/std": 0.46422144770622253,
      "step": 577
    },
    {
      "clip_ratio/high_max": 0.00139545380443451,
      "clip_ratio/high_mean": 0.00044874311970488634,
      "clip_ratio/low_mean": 0.00036537688993121265,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008141200014506467,
      "completions/clipped_ratio": 0.0837053571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4076.0,
      "completions/mean_length": 854.4877319335938,
      "completions/mean_terminated_length": 558.3690795898438,
      "completions/min_length": 154.0,
      "completions/min_terminated_length": 154.0,
      "epoch": 5.401166180758017,
      "grad_norm": 0.15557047724723816,
      "learning_rate": 1e-06,
      "loss": -0.0339,
      "num_tokens": 332169759.0,
      "reward": 0.65625,
      "reward_std": 0.11712227016687393,
      "rewards/verify_math_reward/mean": 0.65625,
      "rewards/verify_math_reward/std": 0.4752241373062134,
      "step": 578
    },
    {
      "clip_ratio/high_max": 0.0019078557161265053,
      "clip_ratio/high_mean": 0.000699598141181923,
      "clip_ratio/low_mean": 0.00045237674294185126,
      "clip_ratio/low_min": 1.0903698239417281e-05,
      "clip_ratio/region_mean": 0.0011519748768478166,
      "completions/clipped_ratio": 0.0948660714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2283.0,
      "completions/mean_length": 899.5167846679688,
      "completions/mean_terminated_length": 564.4968872070312,
      "completions/min_length": 166.0,
      "completions/min_terminated_length": 166.0,
      "epoch": 5.410495626822158,
      "grad_norm": 0.1839502453804016,
      "learning_rate": 1e-06,
      "loss": -0.0196,
      "num_tokens": 332721606.0,
      "reward": 0.598214328289032,
      "reward_std": 0.13996821641921997,
      "rewards/verify_math_reward/mean": 0.5982142686843872,
      "rewards/verify_math_reward/std": 0.49053287506103516,
      "step": 579
    },
    {
      "clip_ratio/high_max": 0.001396314461089787,
      "clip_ratio/high_mean": 0.000474154721814557,
      "clip_ratio/low_mean": 0.0004216043334963615,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008957590653153602,
      "completions/clipped_ratio": 0.0647321428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3805.0,
      "completions/mean_length": 793.8705444335938,
      "completions/mean_terminated_length": 565.3222045898438,
      "completions/min_length": 166.0,
      "completions/min_terminated_length": 166.0,
      "epoch": 5.419825072886297,
      "grad_norm": 0.5899951457977295,
      "learning_rate": 1e-06,
      "loss": -0.0272,
      "num_tokens": 333281082.0,
      "reward": 0.6662946939468384,
      "reward_std": 0.13984736800193787,
      "rewards/verify_math_reward/mean": 0.6662946343421936,
      "rewards/verify_math_reward/std": 0.47179925441741943,
      "step": 580
    },
    {
      "clip_ratio/high_max": 0.001612413179827854,
      "clip_ratio/high_mean": 0.0005793492891825736,
      "clip_ratio/low_mean": 0.0004715553150163032,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001050904600560898,
      "completions/clipped_ratio": 0.0825892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4060.0,
      "completions/mean_length": 823.3314819335938,
      "completions/mean_terminated_length": 528.711669921875,
      "completions/min_length": 163.0,
      "completions/min_terminated_length": 163.0,
      "epoch": 5.429154518950437,
      "grad_norm": 0.19468367099761963,
      "learning_rate": 1e-06,
      "loss": -0.0269,
      "num_tokens": 333801907.0,
      "reward": 0.6395089626312256,
      "reward_std": 0.12302455306053162,
      "rewards/verify_math_reward/mean": 0.6395089030265808,
      "rewards/verify_math_reward/std": 0.4804111123085022,
      "step": 581
    },
    {
      "clip_ratio/high_max": 0.002196735735196853,
      "clip_ratio/high_mean": 0.0006915792146173771,
      "clip_ratio/low_mean": 0.00044164179780636914,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011332209760439582,
      "completions/clipped_ratio": 0.0680803571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2405.0,
      "completions/mean_length": 785.1808471679688,
      "completions/mean_terminated_length": 543.3125610351562,
      "completions/min_length": 137.0,
      "completions/min_terminated_length": 137.0,
      "epoch": 5.438483965014577,
      "grad_norm": 0.19301320612430573,
      "learning_rate": 1e-06,
      "loss": -0.019,
      "num_tokens": 334337101.0,
      "reward": 0.668526828289032,
      "reward_std": 0.15495768189430237,
      "rewards/verify_math_reward/mean": 0.6685267686843872,
      "rewards/verify_math_reward/std": 0.4710056483745575,
      "step": 582
    },
    {
      "clip_ratio/high_max": 0.00198226571228588,
      "clip_ratio/high_mean": 0.0006953595129743917,
      "clip_ratio/low_mean": 0.0003817330175479583,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001077092510968214,
      "completions/clipped_ratio": 0.0591517857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3544.0,
      "completions/mean_length": 739.5223388671875,
      "completions/mean_terminated_length": 528.4982299804688,
      "completions/min_length": 155.0,
      "completions/min_terminated_length": 155.0,
      "epoch": 5.447813411078717,
      "grad_norm": 0.20036178827285767,
      "learning_rate": 1e-06,
      "loss": -0.0373,
      "num_tokens": 334865513.0,
      "reward": 0.668526828289032,
      "reward_std": 0.14996904134750366,
      "rewards/verify_math_reward/mean": 0.6685267686843872,
      "rewards/verify_math_reward/std": 0.4710056483745575,
      "step": 583
    },
    {
      "clip_ratio/high_max": 0.0017318636273557786,
      "clip_ratio/high_mean": 0.000776032597059384,
      "clip_ratio/low_mean": 0.0004366377875157923,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001212670365930535,
      "completions/clipped_ratio": 0.0814732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2489.0,
      "completions/mean_length": 828.739990234375,
      "completions/mean_terminated_length": 538.9343872070312,
      "completions/min_length": 139.0,
      "completions/min_terminated_length": 139.0,
      "epoch": 5.457142857142857,
      "grad_norm": 0.2247992902994156,
      "learning_rate": 1e-06,
      "loss": -0.0591,
      "num_tokens": 335393696.0,
      "reward": 0.598214328289032,
      "reward_std": 0.16750863194465637,
      "rewards/verify_math_reward/mean": 0.5982142686843872,
      "rewards/verify_math_reward/std": 0.49053287506103516,
      "step": 584
    },
    {
      "clip_ratio/high_max": 0.0017781036040105391,
      "clip_ratio/high_mean": 0.0006188633406054578,
      "clip_ratio/low_mean": 0.0004684656846620783,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010873290157178417,
      "completions/clipped_ratio": 0.0803571428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3043.0,
      "completions/mean_length": 847.3873291015625,
      "completions/mean_terminated_length": 563.5278930664062,
      "completions/min_length": 156.0,
      "completions/min_terminated_length": 156.0,
      "epoch": 5.466472303206997,
      "grad_norm": 0.27033933997154236,
      "learning_rate": 1e-06,
      "loss": -0.0222,
      "num_tokens": 335955011.0,
      "reward": 0.5948660969734192,
      "reward_std": 0.14027202129364014,
      "rewards/verify_math_reward/mean": 0.5948660969734192,
      "rewards/verify_math_reward/std": 0.49119213223457336,
      "step": 585
    },
    {
      "clip_ratio/high_max": 0.0020864395082753617,
      "clip_ratio/high_mean": 0.0009011084002850112,
      "clip_ratio/low_mean": 0.0005780601268270402,
      "clip_ratio/low_min": 1.5375153452623636e-05,
      "clip_ratio/region_mean": 0.0014791684989177156,
      "completions/clipped_ratio": 0.0703125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3285.0,
      "completions/mean_length": 784.7332763671875,
      "completions/mean_terminated_length": 534.3013305664062,
      "completions/min_length": 169.0,
      "completions/min_terminated_length": 169.0,
      "epoch": 5.475801749271137,
      "grad_norm": 0.23711398243904114,
      "learning_rate": 1e-06,
      "loss": -0.0469,
      "num_tokens": 336487820.0,
      "reward": 0.652901828289032,
      "reward_std": 0.17581765353679657,
      "rewards/verify_math_reward/mean": 0.6529017686843872,
      "rewards/verify_math_reward/std": 0.47631320357322693,
      "step": 586
    },
    {
      "clip_ratio/high_max": 0.001547319370729383,
      "clip_ratio/high_mean": 0.0004892838705927716,
      "clip_ratio/low_mean": 0.00044836341567133786,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000937647293540067,
      "completions/clipped_ratio": 0.0691964285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3966.0,
      "completions/mean_length": 845.9564819335938,
      "completions/mean_terminated_length": 604.3465576171875,
      "completions/min_length": 138.0,
      "completions/min_terminated_length": 138.0,
      "epoch": 5.485131195335277,
      "grad_norm": 0.4471912980079651,
      "learning_rate": 1e-06,
      "loss": -0.0115,
      "num_tokens": 337090461.0,
      "reward": 0.578125,
      "reward_std": 0.1476692408323288,
      "rewards/verify_math_reward/mean": 0.578125,
      "rewards/verify_math_reward/std": 0.4941346049308777,
      "step": 587
    },
    {
      "clip_ratio/high_max": 0.0018884771052398719,
      "clip_ratio/high_mean": 0.0006595084778382443,
      "clip_ratio/low_mean": 0.0005453445792227285,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012048530406900682,
      "completions/clipped_ratio": 0.0904017857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3647.0,
      "completions/mean_length": 904.1495971679688,
      "completions/mean_terminated_length": 586.9226684570312,
      "completions/min_length": 176.0,
      "completions/min_terminated_length": 176.0,
      "epoch": 5.494460641399417,
      "grad_norm": 0.2929361164569855,
      "learning_rate": 1e-06,
      "loss": -0.0177,
      "num_tokens": 337666475.0,
      "reward": 0.5915178656578064,
      "reward_std": 0.16694730520248413,
      "rewards/verify_math_reward/mean": 0.5915178656578064,
      "rewards/verify_math_reward/std": 0.49182769656181335,
      "step": 588
    },
    {
      "clip_ratio/high_max": 0.002092951541271759,
      "clip_ratio/high_mean": 0.000775908210016496,
      "clip_ratio/low_mean": 0.0004078817446497851,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011837899546662811,
      "completions/clipped_ratio": 0.0658482142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1752.0,
      "completions/mean_length": 783.6585083007812,
      "completions/mean_terminated_length": 550.1719970703125,
      "completions/min_length": 151.0,
      "completions/min_terminated_length": 151.0,
      "epoch": 5.503790087463557,
      "grad_norm": 0.2266514003276825,
      "learning_rate": 1e-06,
      "loss": -0.0338,
      "num_tokens": 338212625.0,
      "reward": 0.6662946939468384,
      "reward_std": 0.14951369166374207,
      "rewards/verify_math_reward/mean": 0.6662946343421936,
      "rewards/verify_math_reward/std": 0.47179922461509705,
      "step": 589
    },
    {
      "clip_ratio/high_max": 0.001805393349059159,
      "clip_ratio/high_mean": 0.0005829860820085742,
      "clip_ratio/low_mean": 0.00044519790480990196,
      "clip_ratio/low_min": 1.721763146633748e-05,
      "clip_ratio/region_mean": 0.0010281839713570662,
      "completions/clipped_ratio": 0.0647321428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4032.0,
      "completions/mean_length": 754.6797485351562,
      "completions/mean_terminated_length": 523.4188842773438,
      "completions/min_length": 121.0,
      "completions/min_terminated_length": 121.0,
      "epoch": 5.513119533527696,
      "grad_norm": 0.19441890716552734,
      "learning_rate": 1e-06,
      "loss": -0.0329,
      "num_tokens": 338734970.0,
      "reward": 0.6417410969734192,
      "reward_std": 0.13598664104938507,
      "rewards/verify_math_reward/mean": 0.6417410969734192,
      "rewards/verify_math_reward/std": 0.47975659370422363,
      "step": 590
    },
    {
      "clip_ratio/high_max": 0.0019316535326652229,
      "clip_ratio/high_mean": 0.0005992088181301369,
      "clip_ratio/low_mean": 0.0006648907292401418,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012640995591937099,
      "completions/clipped_ratio": 0.0948660714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3237.0,
      "completions/mean_length": 899.7567138671875,
      "completions/mean_terminated_length": 564.7620239257812,
      "completions/min_length": 160.0,
      "completions/min_terminated_length": 160.0,
      "epoch": 5.522448979591837,
      "grad_norm": 0.2167060673236847,
      "learning_rate": 1e-06,
      "loss": -0.0185,
      "num_tokens": 339289072.0,
      "reward": 0.6037946939468384,
      "reward_std": 0.15026073157787323,
      "rewards/verify_math_reward/mean": 0.6037946343421936,
      "rewards/verify_math_reward/std": 0.48938122391700745,
      "step": 591
    },
    {
      "clip_ratio/high_max": 0.0017859246545413043,
      "clip_ratio/high_mean": 0.000552656611034763,
      "clip_ratio/low_mean": 0.00042353591743449215,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009761925284692552,
      "completions/clipped_ratio": 0.0647321428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3097.0,
      "completions/mean_length": 781.654052734375,
      "completions/mean_terminated_length": 552.2601928710938,
      "completions/min_length": 141.0,
      "completions/min_terminated_length": 141.0,
      "epoch": 5.531778425655976,
      "grad_norm": 0.1835176795721054,
      "learning_rate": 1e-06,
      "loss": -0.0037,
      "num_tokens": 339853098.0,
      "reward": 0.6149553656578064,
      "reward_std": 0.14530275762081146,
      "rewards/verify_math_reward/mean": 0.6149553656578064,
      "rewards/verify_math_reward/std": 0.4868776500225067,
      "step": 592
    },
    {
      "clip_ratio/high_max": 0.001817769731133012,
      "clip_ratio/high_mean": 0.0006622924884140957,
      "clip_ratio/low_mean": 0.0006602996272704331,
      "clip_ratio/low_min": 1.0310128345736302e-05,
      "clip_ratio/region_mean": 0.0013225921247794759,
      "completions/clipped_ratio": 0.0747767857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3932.0,
      "completions/mean_length": 812.091552734375,
      "completions/mean_terminated_length": 546.6851806640625,
      "completions/min_length": 133.0,
      "completions/min_terminated_length": 133.0,
      "epoch": 5.541107871720117,
      "grad_norm": 0.22387462854385376,
      "learning_rate": 1e-06,
      "loss": -0.019,
      "num_tokens": 340402620.0,
      "reward": 0.6383928656578064,
      "reward_std": 0.14617151021957397,
      "rewards/verify_math_reward/mean": 0.6383928656578064,
      "rewards/verify_math_reward/std": 0.4807341992855072,
      "step": 593
    },
    {
      "clip_ratio/high_max": 0.0020483315347519238,
      "clip_ratio/high_mean": 0.000892381542143994,
      "clip_ratio/low_mean": 0.0006209309067344293,
      "clip_ratio/low_min": 1.291589160246076e-05,
      "clip_ratio/region_mean": 0.001513312501629116,
      "completions/clipped_ratio": 0.0915178571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3277.0,
      "completions/mean_length": 921.42529296875,
      "completions/mean_terminated_length": 601.6277465820312,
      "completions/min_length": 143.0,
      "completions/min_terminated_length": 143.0,
      "epoch": 5.550437317784256,
      "grad_norm": 0.20755481719970703,
      "learning_rate": 1e-06,
      "loss": -0.0493,
      "num_tokens": 340976921.0,
      "reward": 0.6149553656578064,
      "reward_std": 0.17430990934371948,
      "rewards/verify_math_reward/mean": 0.6149553656578064,
      "rewards/verify_math_reward/std": 0.4868776500225067,
      "step": 594
    },
    {
      "clip_ratio/high_max": 0.0022864980783197097,
      "clip_ratio/high_mean": 0.0007460992619598983,
      "clip_ratio/low_mean": 0.0007244940734381089,
      "clip_ratio/low_min": 9.901774319587275e-06,
      "clip_ratio/region_mean": 0.0014705933426739648,
      "completions/clipped_ratio": 0.0714285714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4080.0,
      "completions/mean_length": 825.6038208007812,
      "completions/mean_terminated_length": 574.0348510742188,
      "completions/min_length": 101.0,
      "completions/min_terminated_length": 101.0,
      "epoch": 5.559766763848397,
      "grad_norm": 0.27528148889541626,
      "learning_rate": 1e-06,
      "loss": -0.0221,
      "num_tokens": 341561558.0,
      "reward": 0.5491071939468384,
      "reward_std": 0.1669868528842926,
      "rewards/verify_math_reward/mean": 0.5491071343421936,
      "rewards/verify_math_reward/std": 0.49786055088043213,
      "step": 595
    },
    {
      "clip_ratio/high_max": 0.00207165781466756,
      "clip_ratio/high_mean": 0.0009048133542819414,
      "clip_ratio/low_mean": 0.0005004548697797873,
      "clip_ratio/low_min": 1.8105445633409545e-05,
      "clip_ratio/region_mean": 0.0014052681763132568,
      "completions/clipped_ratio": 0.0825892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3538.0,
      "completions/mean_length": 840.622802734375,
      "completions/mean_terminated_length": 547.5596313476562,
      "completions/min_length": 150.0,
      "completions/min_terminated_length": 150.0,
      "epoch": 5.569096209912536,
      "grad_norm": 0.2678869962692261,
      "learning_rate": 1e-06,
      "loss": -0.0347,
      "num_tokens": 342087452.0,
      "reward": 0.6328125,
      "reward_std": 0.17893162369728088,
      "rewards/verify_math_reward/mean": 0.6328125,
      "rewards/verify_math_reward/std": 0.48230743408203125,
      "step": 596
    },
    {
      "clip_ratio/high_max": 0.001691892248345539,
      "clip_ratio/high_mean": 0.0005671769349646638,
      "clip_ratio/low_mean": 0.00044722312645717466,
      "clip_ratio/low_min": 1.7375590687151998e-05,
      "clip_ratio/region_mean": 0.0010144000789296115,
      "completions/clipped_ratio": 0.0479910714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3886.0,
      "completions/mean_length": 720.739990234375,
      "completions/mean_terminated_length": 550.592041015625,
      "completions/min_length": 141.0,
      "completions/min_terminated_length": 141.0,
      "epoch": 5.578425655976677,
      "grad_norm": 0.21810497343540192,
      "learning_rate": 1e-06,
      "loss": -0.0115,
      "num_tokens": 342634691.0,
      "reward": 0.6718750596046448,
      "reward_std": 0.13035649061203003,
      "rewards/verify_math_reward/mean": 0.671875,
      "rewards/verify_math_reward/std": 0.46979284286499023,
      "step": 597
    },
    {
      "clip_ratio/high_max": 0.0018793922354234383,
      "clip_ratio/high_mean": 0.0006814006374042947,
      "clip_ratio/low_mean": 0.0004309818004912813,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011123824297101237,
      "completions/clipped_ratio": 0.0825892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3201.0,
      "completions/mean_length": 857.7578735351562,
      "completions/mean_terminated_length": 566.2372436523438,
      "completions/min_length": 121.0,
      "completions/min_terminated_length": 121.0,
      "epoch": 5.587755102040816,
      "grad_norm": 0.2967858910560608,
      "learning_rate": 1e-06,
      "loss": -0.035,
      "num_tokens": 343184650.0,
      "reward": 0.606026828289032,
      "reward_std": 0.1594373881816864,
      "rewards/verify_math_reward/mean": 0.6060267686843872,
      "rewards/verify_math_reward/std": 0.48890194296836853,
      "step": 598
    },
    {
      "clip_ratio/high_max": 0.0017086845764424652,
      "clip_ratio/high_mean": 0.000784298299549846,
      "clip_ratio/low_mean": 0.0006106574583100155,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0013949557906016707,
      "completions/clipped_ratio": 0.1004464285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3116.0,
      "completions/mean_length": 908.1138916015625,
      "completions/mean_terminated_length": 552.1464233398438,
      "completions/min_length": 124.0,
      "completions/min_terminated_length": 124.0,
      "epoch": 5.597084548104956,
      "grad_norm": 0.22466321289539337,
      "learning_rate": 1e-06,
      "loss": -0.0288,
      "num_tokens": 343721272.0,
      "reward": 0.5736607313156128,
      "reward_std": 0.17878004908561707,
      "rewards/verify_math_reward/mean": 0.5736607313156128,
      "rewards/verify_math_reward/std": 0.4948205351829529,
      "step": 599
    },
    {
      "clip_ratio/high_max": 0.0017492636834504083,
      "clip_ratio/high_mean": 0.0007136720123526175,
      "clip_ratio/low_mean": 0.0005256916329017258,
      "clip_ratio/low_min": 1.220703143189894e-05,
      "clip_ratio/region_mean": 0.0012393636534397956,
      "completions/clipped_ratio": 0.0870535714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4096.0,
      "completions/mean_length": 899.51904296875,
      "completions/mean_terminated_length": 594.7200927734375,
      "completions/min_length": 161.0,
      "completions/min_terminated_length": 161.0,
      "epoch": 5.606413994169096,
      "grad_norm": 0.18881264328956604,
      "learning_rate": 1e-06,
      "loss": -0.0613,
      "num_tokens": 344295065.0,
      "reward": 0.613839328289032,
      "reward_std": 0.1774245798587799,
      "rewards/verify_math_reward/mean": 0.6138392686843872,
      "rewards/verify_math_reward/std": 0.48714008927345276,
      "step": 600
    },
    {
      "clip_ratio/high_max": 0.001968203840078786,
      "clip_ratio/high_mean": 0.0008214008994400501,
      "clip_ratio/low_mean": 0.0005234422815192374,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001344843167316867,
      "completions/clipped_ratio": 0.0591517857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3169.0,
      "completions/mean_length": 763.0636596679688,
      "completions/mean_terminated_length": 553.5195922851562,
      "completions/min_length": 191.0,
      "completions/min_terminated_length": 191.0,
      "epoch": 5.615743440233236,
      "grad_norm": 0.22382906079292297,
      "learning_rate": 1e-06,
      "loss": -0.0056,
      "num_tokens": 344850122.0,
      "reward": 0.668526828289032,
      "reward_std": 0.1704043447971344,
      "rewards/verify_math_reward/mean": 0.6685267686843872,
      "rewards/verify_math_reward/std": 0.4710056483745575,
      "step": 601
    },
    {
      "clip_ratio/high_max": 0.0020664919356931932,
      "clip_ratio/high_mean": 0.0007152585403673584,
      "clip_ratio/low_mean": 0.0005268582699500257,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012421168466971722,
      "completions/clipped_ratio": 0.0546875,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3786.0,
      "completions/mean_length": 748.1529541015625,
      "completions/mean_terminated_length": 554.4757690429688,
      "completions/min_length": 154.0,
      "completions/min_terminated_length": 154.0,
      "epoch": 5.625072886297376,
      "grad_norm": 0.21465174853801727,
      "learning_rate": 1e-06,
      "loss": -0.0255,
      "num_tokens": 345399363.0,
      "reward": 0.6495535969734192,
      "reward_std": 0.16683463752269745,
      "rewards/verify_math_reward/mean": 0.6495535969734192,
      "rewards/verify_math_reward/std": 0.477376252412796,
      "step": 602
    },
    {
      "clip_ratio/high_max": 0.001694723789114505,
      "clip_ratio/high_mean": 0.0006381245766533539,
      "clip_ratio/low_mean": 0.0003428182244533673,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009809427901927847,
      "completions/clipped_ratio": 0.0703125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2273.0,
      "completions/mean_length": 797.8717041015625,
      "completions/mean_terminated_length": 548.433349609375,
      "completions/min_length": 145.0,
      "completions/min_terminated_length": 145.0,
      "epoch": 5.634402332361516,
      "grad_norm": 0.23483262956142426,
      "learning_rate": 1e-06,
      "loss": -0.0313,
      "num_tokens": 345950984.0,
      "reward": 0.7042410969734192,
      "reward_std": 0.13936907052993774,
      "rewards/verify_math_reward/mean": 0.7042410969734192,
      "rewards/verify_math_reward/std": 0.45663803815841675,
      "step": 603
    },
    {
      "clip_ratio/high_max": 0.0015487017008126713,
      "clip_ratio/high_mean": 0.0007026943740129354,
      "clip_ratio/low_mean": 0.0004674681936194247,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011701625771820545,
      "completions/clipped_ratio": 0.0658482142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3460.0,
      "completions/mean_length": 799.7188110351562,
      "completions/mean_terminated_length": 567.3643798828125,
      "completions/min_length": 126.0,
      "completions/min_terminated_length": 126.0,
      "epoch": 5.643731778425656,
      "grad_norm": 0.21795716881752014,
      "learning_rate": 1e-06,
      "loss": -0.0373,
      "num_tokens": 346512308.0,
      "reward": 0.6551339626312256,
      "reward_std": 0.16566641628742218,
      "rewards/verify_math_reward/mean": 0.6551339030265808,
      "rewards/verify_math_reward/std": 0.4755900502204895,
      "step": 604
    },
    {
      "clip_ratio/high_max": 0.0023101557744666934,
      "clip_ratio/high_mean": 0.0008812618871161249,
      "clip_ratio/low_mean": 0.0005640503950417042,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0014453122712438926,
      "completions/clipped_ratio": 0.078125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3197.0,
      "completions/mean_length": 843.5011596679688,
      "completions/mean_terminated_length": 567.8656616210938,
      "completions/min_length": 154.0,
      "completions/min_terminated_length": 154.0,
      "epoch": 5.653061224489796,
      "grad_norm": 0.2422908991575241,
      "learning_rate": 1e-06,
      "loss": -0.0452,
      "num_tokens": 347070853.0,
      "reward": 0.637276828289032,
      "reward_std": 0.19467879831790924,
      "rewards/verify_math_reward/mean": 0.6372767686843872,
      "rewards/verify_math_reward/std": 0.481054425239563,
      "step": 605
    },
    {
      "clip_ratio/high_max": 0.0016240206314250827,
      "clip_ratio/high_mean": 0.0006713330221828073,
      "clip_ratio/low_mean": 0.0004585997867252445,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001129932796175126,
      "completions/clipped_ratio": 0.0691964285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2902.0,
      "completions/mean_length": 791.4799194335938,
      "completions/mean_terminated_length": 545.8201293945312,
      "completions/min_length": 141.0,
      "completions/min_terminated_length": 141.0,
      "epoch": 5.662390670553936,
      "grad_norm": 0.1954859495162964,
      "learning_rate": 1e-06,
      "loss": -0.0178,
      "num_tokens": 347613251.0,
      "reward": 0.6640625,
      "reward_std": 0.15631458163261414,
      "rewards/verify_math_reward/mean": 0.6640625,
      "rewards/verify_math_reward/std": 0.4725809693336487,
      "step": 606
    },
    {
      "clip_ratio/high_max": 0.0017178686139232013,
      "clip_ratio/high_mean": 0.0005973039187665563,
      "clip_ratio/low_mean": 0.0004332460266596172,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010305499399692053,
      "completions/clipped_ratio": 0.0736607142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3724.0,
      "completions/mean_length": 808.7645263671875,
      "completions/mean_terminated_length": 547.369873046875,
      "completions/min_length": 141.0,
      "completions/min_terminated_length": 141.0,
      "epoch": 5.671720116618076,
      "grad_norm": 0.18111705780029297,
      "learning_rate": 1e-06,
      "loss": -0.0263,
      "num_tokens": 348154360.0,
      "reward": 0.6863839626312256,
      "reward_std": 0.126701220870018,
      "rewards/verify_math_reward/mean": 0.6863839030265808,
      "rewards/verify_math_reward/std": 0.46422144770622253,
      "step": 607
    },
    {
      "clip_ratio/high_max": 0.0017496866712463088,
      "clip_ratio/high_mean": 0.0007021832898317371,
      "clip_ratio/low_mean": 0.00041005166713148355,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011122349460492842,
      "completions/clipped_ratio": 0.0513392857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3501.0,
      "completions/mean_length": 704.2355346679688,
      "completions/mean_terminated_length": 520.68115234375,
      "completions/min_length": 149.0,
      "completions/min_terminated_length": 149.0,
      "epoch": 5.681049562682215,
      "grad_norm": 0.21597105264663696,
      "learning_rate": 1e-06,
      "loss": -0.0229,
      "num_tokens": 348691003.0,
      "reward": 0.6897321939468384,
      "reward_std": 0.14282116293907166,
      "rewards/verify_math_reward/mean": 0.6897321343421936,
      "rewards/verify_math_reward/std": 0.4628615975379944,
      "step": 608
    },
    {
      "clip_ratio/high_max": 0.0018038047419395298,
      "clip_ratio/high_mean": 0.0006084399683459196,
      "clip_ratio/low_mean": 0.0006828202385804616,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001291260192374466,
      "completions/clipped_ratio": 0.0837053571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2171.0,
      "completions/mean_length": 859.4699096679688,
      "completions/mean_terminated_length": 563.8063354492188,
      "completions/min_length": 89.0,
      "completions/min_terminated_length": 89.0,
      "epoch": 5.690379008746356,
      "grad_norm": 0.1902514398097992,
      "learning_rate": 1e-06,
      "loss": -0.0257,
      "num_tokens": 349243272.0,
      "reward": 0.59375,
      "reward_std": 0.15965536236763,
      "rewards/verify_math_reward/mean": 0.59375,
      "rewards/verify_math_reward/std": 0.4914066195487976,
      "step": 609
    },
    {
      "clip_ratio/high_max": 0.00212705120065948,
      "clip_ratio/high_mean": 0.0007869412202126114,
      "clip_ratio/low_mean": 0.0005006231767765712,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012875644024461508,
      "completions/clipped_ratio": 0.0613839285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3585.0,
      "completions/mean_length": 729.0658569335938,
      "completions/mean_terminated_length": 508.87396240234375,
      "completions/min_length": 140.0,
      "completions/min_terminated_length": 140.0,
      "epoch": 5.699708454810495,
      "grad_norm": 0.21717499196529388,
      "learning_rate": 1e-06,
      "loss": -0.0257,
      "num_tokens": 349762723.0,
      "reward": 0.6964285969734192,
      "reward_std": 0.1497408002614975,
      "rewards/verify_math_reward/mean": 0.6964285969734192,
      "rewards/verify_math_reward/std": 0.4600566029548645,
      "step": 610
    },
    {
      "clip_ratio/high_max": 0.0014468907320406288,
      "clip_ratio/high_mean": 0.0005383249972510384,
      "clip_ratio/low_mean": 0.00037257574604154797,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009109007332881447,
      "completions/clipped_ratio": 0.0736607142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3554.0,
      "completions/mean_length": 809.4074096679688,
      "completions/mean_terminated_length": 548.0638427734375,
      "completions/min_length": 114.0,
      "completions/min_terminated_length": 114.0,
      "epoch": 5.709037900874636,
      "grad_norm": 0.16854842007160187,
      "learning_rate": 1e-06,
      "loss": -0.0358,
      "num_tokens": 350313016.0,
      "reward": 0.6462053656578064,
      "reward_std": 0.14233079552650452,
      "rewards/verify_math_reward/mean": 0.6462053656578064,
      "rewards/verify_math_reward/std": 0.478413462638855,
      "step": 611
    },
    {
      "clip_ratio/high_max": 0.0016304294695146382,
      "clip_ratio/high_mean": 0.0006607993400393752,
      "clip_ratio/low_mean": 0.0004933656573484768,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011541649982973468,
      "completions/clipped_ratio": 0.0814732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3801.0,
      "completions/mean_length": 855.9598388671875,
      "completions/mean_terminated_length": 568.5686645507812,
      "completions/min_length": 151.0,
      "completions/min_terminated_length": 151.0,
      "epoch": 5.718367346938775,
      "grad_norm": 0.21609733998775482,
      "learning_rate": 1e-06,
      "loss": -0.0412,
      "num_tokens": 350880596.0,
      "reward": 0.6584821939468384,
      "reward_std": 0.17258423566818237,
      "rewards/verify_math_reward/mean": 0.6584821343421936,
      "rewards/verify_math_reward/std": 0.4744836091995239,
      "step": 612
    },
    {
      "clip_ratio/high_max": 0.0017601579238544218,
      "clip_ratio/high_mean": 0.0005464939931698609,
      "clip_ratio/low_mean": 0.0002638160899550712,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008103100808511954,
      "completions/clipped_ratio": 0.0915178571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3793.0,
      "completions/mean_length": 862.3627319335938,
      "completions/mean_terminated_length": 536.615478515625,
      "completions/min_length": 141.0,
      "completions/min_terminated_length": 141.0,
      "epoch": 5.727696793002916,
      "grad_norm": 0.2475437968969345,
      "learning_rate": 1e-06,
      "loss": -0.0295,
      "num_tokens": 351407057.0,
      "reward": 0.6305803656578064,
      "reward_std": 0.111860491335392,
      "rewards/verify_math_reward/mean": 0.6305803656578064,
      "rewards/verify_math_reward/std": 0.4829172194004059,
      "step": 613
    },
    {
      "clip_ratio/high_max": 0.001923630596138537,
      "clip_ratio/high_mean": 0.000620996192083112,
      "clip_ratio/low_mean": 0.0006291031231739908,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012500993070716504,
      "completions/clipped_ratio": 0.0647321428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3904.0,
      "completions/mean_length": 766.2288208007812,
      "completions/mean_terminated_length": 535.767333984375,
      "completions/min_length": 128.0,
      "completions/min_terminated_length": 128.0,
      "epoch": 5.737026239067055,
      "grad_norm": 0.19469042122364044,
      "learning_rate": 1e-06,
      "loss": -0.0206,
      "num_tokens": 351949766.0,
      "reward": 0.6171875,
      "reward_std": 0.13711459934711456,
      "rewards/verify_math_reward/mean": 0.6171875,
      "rewards/verify_math_reward/std": 0.4863446056842804,
      "step": 614
    },
    {
      "clip_ratio/high_max": 0.0018321072893741075,
      "clip_ratio/high_mean": 0.0006474209503721795,
      "clip_ratio/low_mean": 0.0004664251600843272,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011138460904476233,
      "completions/clipped_ratio": 0.0669642857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3790.0,
      "completions/mean_length": 779.232177734375,
      "completions/mean_terminated_length": 541.1865844726562,
      "completions/min_length": 136.0,
      "completions/min_terminated_length": 136.0,
      "epoch": 5.746355685131196,
      "grad_norm": 0.2441077083349228,
      "learning_rate": 1e-06,
      "loss": -0.0333,
      "num_tokens": 352490694.0,
      "reward": 0.6104910969734192,
      "reward_std": 0.13857996463775635,
      "rewards/verify_math_reward/mean": 0.6104910969734192,
      "rewards/verify_math_reward/std": 0.48791128396987915,
      "step": 615
    },
    {
      "clip_ratio/high_max": 0.0018733175711531658,
      "clip_ratio/high_mean": 0.00071459826631326,
      "clip_ratio/low_mean": 0.00040857206249711453,
      "clip_ratio/low_min": 1.9379844161449e-05,
      "clip_ratio/region_mean": 0.001123170310165733,
      "completions/clipped_ratio": 0.0848214285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3101.0,
      "completions/mean_length": 830.3772583007812,
      "completions/mean_terminated_length": 527.7097778320312,
      "completions/min_length": 142.0,
      "completions/min_terminated_length": 142.0,
      "epoch": 5.755685131195335,
      "grad_norm": 0.2149052768945694,
      "learning_rate": 1e-06,
      "loss": -0.0365,
      "num_tokens": 353016248.0,
      "reward": 0.6573660969734192,
      "reward_std": 0.15935185551643372,
      "rewards/verify_math_reward/mean": 0.6573660969734192,
      "rewards/verify_math_reward/std": 0.47485533356666565,
      "step": 616
    },
    {
      "clip_ratio/high_max": 0.002151643970137229,
      "clip_ratio/high_mean": 0.0008402461171499453,
      "clip_ratio/low_mean": 0.0005802615378343035,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0014205076768121216,
      "completions/clipped_ratio": 0.0736607142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2003.0,
      "completions/mean_length": 798.3672485351562,
      "completions/mean_terminated_length": 536.1458129882812,
      "completions/min_length": 111.0,
      "completions/min_terminated_length": 111.0,
      "epoch": 5.765014577259475,
      "grad_norm": 0.3056844174861908,
      "learning_rate": 1e-06,
      "loss": -0.0375,
      "num_tokens": 353547585.0,
      "reward": 0.6149553656578064,
      "reward_std": 0.16653333604335785,
      "rewards/verify_math_reward/mean": 0.6149553656578064,
      "rewards/verify_math_reward/std": 0.4868776500225067,
      "step": 617
    },
    {
      "clip_ratio/high_max": 0.001931530590809416,
      "clip_ratio/high_mean": 0.0007804630440659821,
      "clip_ratio/low_mean": 0.0004457067543626181,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012261698029760737,
      "completions/clipped_ratio": 0.0736607142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3951.0,
      "completions/mean_length": 836.1741333007812,
      "completions/mean_terminated_length": 576.9590454101562,
      "completions/min_length": 157.0,
      "completions/min_terminated_length": 157.0,
      "epoch": 5.774344023323615,
      "grad_norm": 0.4081643521785736,
      "learning_rate": 1e-06,
      "loss": -0.0382,
      "num_tokens": 354110749.0,
      "reward": 0.6629464626312256,
      "reward_std": 0.1755470633506775,
      "rewards/verify_math_reward/mean": 0.6629464030265808,
      "rewards/verify_math_reward/std": 0.47296738624572754,
      "step": 618
    },
    {
      "clip_ratio/high_max": 0.0021630388000630774,
      "clip_ratio/high_mean": 0.0008867753840604564,
      "clip_ratio/low_mean": 0.0004932618467137218,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0013800371889374219,
      "completions/clipped_ratio": 0.0837053571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3079.0,
      "completions/mean_length": 838.5480346679688,
      "completions/mean_terminated_length": 540.9732055664062,
      "completions/min_length": 156.0,
      "completions/min_terminated_length": 156.0,
      "epoch": 5.783673469387755,
      "grad_norm": 0.25812169909477234,
      "learning_rate": 1e-06,
      "loss": -0.0163,
      "num_tokens": 354644392.0,
      "reward": 0.6194196939468384,
      "reward_std": 0.1750592142343521,
      "rewards/verify_math_reward/mean": 0.6194196343421936,
      "rewards/verify_math_reward/std": 0.48580074310302734,
      "step": 619
    },
    {
      "clip_ratio/high_max": 0.0016750669856264722,
      "clip_ratio/high_mean": 0.0006418176044462598,
      "clip_ratio/low_mean": 0.00045677239268115954,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010985900153173134,
      "completions/clipped_ratio": 0.0669642857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2526.0,
      "completions/mean_length": 752.2076416015625,
      "completions/mean_terminated_length": 512.2224731445312,
      "completions/min_length": 145.0,
      "completions/min_terminated_length": 145.0,
      "epoch": 5.793002915451895,
      "grad_norm": 0.23503591120243073,
      "learning_rate": 1e-06,
      "loss": -0.0442,
      "num_tokens": 355169490.0,
      "reward": 0.6774553656578064,
      "reward_std": 0.147894948720932,
      "rewards/verify_math_reward/mean": 0.6774553656578064,
      "rewards/verify_math_reward/std": 0.4677111804485321,
      "step": 620
    },
    {
      "clip_ratio/high_max": 0.0017263800364162307,
      "clip_ratio/high_mean": 0.000655078043564572,
      "clip_ratio/low_mean": 0.0004387635049170058,
      "clip_ratio/low_min": 2.0868114006589167e-05,
      "clip_ratio/region_mean": 0.001093841539841378,
      "completions/clipped_ratio": 0.0658482142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4073.0,
      "completions/mean_length": 802.0569458007812,
      "completions/mean_terminated_length": 569.8673706054688,
      "completions/min_length": 144.0,
      "completions/min_terminated_length": 144.0,
      "epoch": 5.802332361516035,
      "grad_norm": 0.21068450808525085,
      "learning_rate": 1e-06,
      "loss": -0.0353,
      "num_tokens": 355733549.0,
      "reward": 0.6261160969734192,
      "reward_std": 0.1429726928472519,
      "rewards/verify_math_reward/mean": 0.6261160969734192,
      "rewards/verify_math_reward/std": 0.48410359025001526,
      "step": 621
    },
    {
      "clip_ratio/high_max": 0.002015686441154685,
      "clip_ratio/high_mean": 0.0008106550358206732,
      "clip_ratio/low_mean": 0.0005287678868626244,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0013394228735705838,
      "completions/clipped_ratio": 0.0591517857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3426.0,
      "completions/mean_length": 768.6082763671875,
      "completions/mean_terminated_length": 559.412841796875,
      "completions/min_length": 130.0,
      "completions/min_terminated_length": 130.0,
      "epoch": 5.811661807580175,
      "grad_norm": 0.2241523116827011,
      "learning_rate": 1e-06,
      "loss": -0.0281,
      "num_tokens": 356289342.0,
      "reward": 0.6752232313156128,
      "reward_std": 0.178443044424057,
      "rewards/verify_math_reward/mean": 0.6752232313156128,
      "rewards/verify_math_reward/std": 0.46855294704437256,
      "step": 622
    },
    {
      "clip_ratio/high_max": 0.0019235800136812031,
      "clip_ratio/high_mean": 0.000648291599645745,
      "clip_ratio/low_mean": 0.0004592416796640464,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001107533294998575,
      "completions/clipped_ratio": 0.0892857142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2706.0,
      "completions/mean_length": 912.388427734375,
      "completions/mean_terminated_length": 600.2696533203125,
      "completions/min_length": 109.0,
      "completions/min_terminated_length": 109.0,
      "epoch": 5.820991253644315,
      "grad_norm": 0.19351865351200104,
      "learning_rate": 1e-06,
      "loss": -0.0436,
      "num_tokens": 356879562.0,
      "reward": 0.6149553656578064,
      "reward_std": 0.15221214294433594,
      "rewards/verify_math_reward/mean": 0.6149553656578064,
      "rewards/verify_math_reward/std": 0.4868776500225067,
      "step": 623
    },
    {
      "clip_ratio/high_max": 0.0015534516278421506,
      "clip_ratio/high_mean": 0.0005410439616753138,
      "clip_ratio/low_mean": 0.0003709982795498945,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009120422218984459,
      "completions/clipped_ratio": 0.0870535714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3713.0,
      "completions/mean_length": 906.0636596679688,
      "completions/mean_terminated_length": 601.8887939453125,
      "completions/min_length": 145.0,
      "completions/min_terminated_length": 145.0,
      "epoch": 5.830320699708455,
      "grad_norm": 0.19623930752277374,
      "learning_rate": 1e-06,
      "loss": -0.0474,
      "num_tokens": 357473683.0,
      "reward": 0.5959821939468384,
      "reward_std": 0.1352352499961853,
      "rewards/verify_math_reward/mean": 0.5959821343421936,
      "rewards/verify_math_reward/std": 0.490975022315979,
      "step": 624
    },
    {
      "clip_ratio/high_max": 0.0018489718095224816,
      "clip_ratio/high_mean": 0.0007639082032255828,
      "clip_ratio/low_mean": 0.00044334438871374005,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012072525569237769,
      "completions/clipped_ratio": 0.0825892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3959.0,
      "completions/mean_length": 826.9285888671875,
      "completions/mean_terminated_length": 532.6326293945312,
      "completions/min_length": 149.0,
      "completions/min_terminated_length": 149.0,
      "epoch": 5.839650145772595,
      "grad_norm": 0.19132301211357117,
      "learning_rate": 1e-06,
      "loss": -0.0527,
      "num_tokens": 357991139.0,
      "reward": 0.7042410969734192,
      "reward_std": 0.1562379002571106,
      "rewards/verify_math_reward/mean": 0.7042410969734192,
      "rewards/verify_math_reward/std": 0.45663803815841675,
      "step": 625
    },
    {
      "clip_ratio/high_max": 0.001652845643548062,
      "clip_ratio/high_mean": 0.0005679082350980025,
      "clip_ratio/low_mean": 0.00034982249826498446,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009177307219943032,
      "completions/clipped_ratio": 0.0513392857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3818.0,
      "completions/mean_length": 694.943115234375,
      "completions/mean_terminated_length": 510.8858642578125,
      "completions/min_length": 147.0,
      "completions/min_terminated_length": 147.0,
      "epoch": 5.848979591836734,
      "grad_norm": 0.1636369377374649,
      "learning_rate": 1e-06,
      "loss": -0.0049,
      "num_tokens": 358505368.0,
      "reward": 0.6640625,
      "reward_std": 0.11404222249984741,
      "rewards/verify_math_reward/mean": 0.6640625,
      "rewards/verify_math_reward/std": 0.4725809693336487,
      "step": 626
    },
    {
      "clip_ratio/high_max": 0.001495146207162179,
      "clip_ratio/high_mean": 0.0005264190381240041,
      "clip_ratio/low_mean": 0.0005458227988128783,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010722418483055662,
      "completions/clipped_ratio": 0.0558035714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3368.0,
      "completions/mean_length": 749.0803833007812,
      "completions/mean_terminated_length": 551.2718505859375,
      "completions/min_length": 108.0,
      "completions/min_terminated_length": 108.0,
      "epoch": 5.858309037900875,
      "grad_norm": 0.2107505351305008,
      "learning_rate": 1e-06,
      "loss": -0.0212,
      "num_tokens": 359066704.0,
      "reward": 0.6741071939468384,
      "reward_std": 0.13665854930877686,
      "rewards/verify_math_reward/mean": 0.6741071343421936,
      "rewards/verify_math_reward/std": 0.4689692556858063,
      "step": 627
    },
    {
      "clip_ratio/high_max": 0.0023203105356515152,
      "clip_ratio/high_mean": 0.0006591551582459942,
      "clip_ratio/low_mean": 0.0005891631481063087,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012483183018048294,
      "completions/clipped_ratio": 0.0680803571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3372.0,
      "completions/mean_length": 810.544677734375,
      "completions/mean_terminated_length": 570.5293579101562,
      "completions/min_length": 125.0,
      "completions/min_terminated_length": 125.0,
      "epoch": 5.867638483965014,
      "grad_norm": 0.21155844628810883,
      "learning_rate": 1e-06,
      "loss": -0.034,
      "num_tokens": 359623400.0,
      "reward": 0.640625,
      "reward_std": 0.147451713681221,
      "rewards/verify_math_reward/mean": 0.640625,
      "rewards/verify_math_reward/std": 0.48008525371551514,
      "step": 628
    },
    {
      "clip_ratio/high_max": 0.001622506413696101,
      "clip_ratio/high_mean": 0.0005611461847365717,
      "clip_ratio/low_mean": 0.00047974971175790415,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001040895891492255,
      "completions/clipped_ratio": 0.0736607142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3044.0,
      "completions/mean_length": 802.2098388671875,
      "completions/mean_terminated_length": 540.2940063476562,
      "completions/min_length": 140.0,
      "completions/min_terminated_length": 140.0,
      "epoch": 5.876967930029155,
      "grad_norm": 0.20598623156547546,
      "learning_rate": 1e-06,
      "loss": -0.0319,
      "num_tokens": 360169964.0,
      "reward": 0.645089328289032,
      "reward_std": 0.13534514605998993,
      "rewards/verify_math_reward/mean": 0.6450892686843872,
      "rewards/verify_math_reward/std": 0.4787535071372986,
      "step": 629
    },
    {
      "clip_ratio/high_max": 0.002303055392985698,
      "clip_ratio/high_mean": 0.0008456104351353133,
      "clip_ratio/low_mean": 0.0005595912602984754,
      "clip_ratio/low_min": 2.916013909270987e-05,
      "clip_ratio/region_mean": 0.0014052016849745996,
      "completions/clipped_ratio": 0.0580357142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2753.0,
      "completions/mean_length": 754.3928833007812,
      "completions/mean_terminated_length": 548.5118408203125,
      "completions/min_length": 120.0,
      "completions/min_terminated_length": 120.0,
      "epoch": 5.886297376093294,
      "grad_norm": 0.2988147735595703,
      "learning_rate": 1e-06,
      "loss": -0.0141,
      "num_tokens": 360724940.0,
      "reward": 0.6506696939468384,
      "reward_std": 0.17063213884830475,
      "rewards/verify_math_reward/mean": 0.6506696343421936,
      "rewards/verify_math_reward/std": 0.47702476382255554,
      "step": 630
    },
    {
      "clip_ratio/high_max": 0.0016552548222534824,
      "clip_ratio/high_mean": 0.0004942677851431654,
      "clip_ratio/low_mean": 0.0004892132105851488,
      "clip_ratio/low_min": 4.0523016650695354e-05,
      "clip_ratio/region_mean": 0.0009834810116444714,
      "completions/clipped_ratio": 0.0613839285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4026.0,
      "completions/mean_length": 763.2053833007812,
      "completions/mean_terminated_length": 545.2461547851562,
      "completions/min_length": 136.0,
      "completions/min_terminated_length": 136.0,
      "epoch": 5.895626822157435,
      "grad_norm": 0.1725149303674698,
      "learning_rate": 1e-06,
      "loss": -0.0255,
      "num_tokens": 361271180.0,
      "reward": 0.6328125,
      "reward_std": 0.13226650655269623,
      "rewards/verify_math_reward/mean": 0.6328125,
      "rewards/verify_math_reward/std": 0.48230743408203125,
      "step": 631
    },
    {
      "clip_ratio/high_max": 0.0024809171445667744,
      "clip_ratio/high_mean": 0.0009537415789964143,
      "clip_ratio/low_mean": 0.0006145953857412678,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0015683369456382934,
      "completions/clipped_ratio": 0.0915178571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4064.0,
      "completions/mean_length": 890.0100708007812,
      "completions/mean_terminated_length": 567.0479125976562,
      "completions/min_length": 153.0,
      "completions/min_terminated_length": 153.0,
      "epoch": 5.904956268221574,
      "grad_norm": 0.24993817508220673,
      "learning_rate": 1e-06,
      "loss": -0.0498,
      "num_tokens": 361829709.0,
      "reward": 0.5970982313156128,
      "reward_std": 0.18614406883716583,
      "rewards/verify_math_reward/mean": 0.5970982313156128,
      "rewards/verify_math_reward/std": 0.4907552897930145,
      "step": 632
    },
    {
      "clip_ratio/high_max": 0.001738108621793799,
      "clip_ratio/high_mean": 0.0007113383526302641,
      "clip_ratio/low_mean": 0.00046371721600735327,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011750555713661015,
      "completions/clipped_ratio": 0.0479910714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3509.0,
      "completions/mean_length": 714.6016235351562,
      "completions/mean_terminated_length": 544.1441650390625,
      "completions/min_length": 164.0,
      "completions/min_terminated_length": 164.0,
      "epoch": 5.914285714285715,
      "grad_norm": 0.20464277267456055,
      "learning_rate": 1e-06,
      "loss": -0.0254,
      "num_tokens": 362374872.0,
      "reward": 0.660714328289032,
      "reward_std": 0.15033671259880066,
      "rewards/verify_math_reward/mean": 0.6607142686843872,
      "rewards/verify_math_reward/std": 0.4737313687801361,
      "step": 633
    },
    {
      "clip_ratio/high_max": 0.0019651527181849815,
      "clip_ratio/high_mean": 0.0007409344379993854,
      "clip_ratio/low_mean": 0.0006084804099373287,
      "clip_ratio/low_min": 3.630906212492846e-05,
      "clip_ratio/region_mean": 0.0013494148734025657,
      "completions/clipped_ratio": 0.0658482142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3855.0,
      "completions/mean_length": 790.779052734375,
      "completions/mean_terminated_length": 557.7944946289062,
      "completions/min_length": 143.0,
      "completions/min_terminated_length": 143.0,
      "epoch": 5.923615160349854,
      "grad_norm": 0.24859444797039032,
      "learning_rate": 1e-06,
      "loss": -0.0346,
      "num_tokens": 362933986.0,
      "reward": 0.6584821939468384,
      "reward_std": 0.16792160272598267,
      "rewards/verify_math_reward/mean": 0.6584821343421936,
      "rewards/verify_math_reward/std": 0.4744836091995239,
      "step": 634
    },
    {
      "clip_ratio/high_max": 0.002345296394196339,
      "clip_ratio/high_mean": 0.0009953158478310797,
      "clip_ratio/low_mean": 0.0004788298765561194,
      "clip_ratio/low_min": 1.7615557226235978e-05,
      "clip_ratio/region_mean": 0.0014741457271156833,
      "completions/clipped_ratio": 0.109375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3930.0,
      "completions/mean_length": 945.5547485351562,
      "completions/mean_terminated_length": 558.6578979492188,
      "completions/min_length": 145.0,
      "completions/min_terminated_length": 145.0,
      "epoch": 5.932944606413994,
      "grad_norm": 0.24806180596351624,
      "learning_rate": 1e-06,
      "loss": -0.0533,
      "num_tokens": 363477195.0,
      "reward": 0.6328125,
      "reward_std": 0.1837027221918106,
      "rewards/verify_math_reward/mean": 0.6328125,
      "rewards/verify_math_reward/std": 0.48230743408203125,
      "step": 635
    },
    {
      "clip_ratio/high_max": 0.0017773505969671533,
      "clip_ratio/high_mean": 0.0005512566249308293,
      "clip_ratio/low_mean": 0.0004607334658430773,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010119900944118854,
      "completions/clipped_ratio": 0.0770089285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3295.0,
      "completions/mean_length": 843.6998291015625,
      "completions/mean_terminated_length": 572.3470458984375,
      "completions/min_length": 169.0,
      "completions/min_terminated_length": 169.0,
      "epoch": 5.942274052478134,
      "grad_norm": 0.19955603778362274,
      "learning_rate": 1e-06,
      "loss": -0.0303,
      "num_tokens": 364037646.0,
      "reward": 0.6328125,
      "reward_std": 0.13444572687149048,
      "rewards/verify_math_reward/mean": 0.6328125,
      "rewards/verify_math_reward/std": 0.48230743408203125,
      "step": 636
    },
    {
      "clip_ratio/high_max": 0.002003154790145345,
      "clip_ratio/high_mean": 0.0007183063389675226,
      "clip_ratio/low_mean": 0.0006264768635446671,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0013447832097881474,
      "completions/clipped_ratio": 0.0825892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2371.0,
      "completions/mean_length": 830.1897583007812,
      "completions/mean_terminated_length": 536.1873168945312,
      "completions/min_length": 122.0,
      "completions/min_terminated_length": 122.0,
      "epoch": 5.9516034985422746,
      "grad_norm": 0.19695554673671722,
      "learning_rate": 1e-06,
      "loss": -0.0261,
      "num_tokens": 364574120.0,
      "reward": 0.6015625,
      "reward_std": 0.1510944366455078,
      "rewards/verify_math_reward/mean": 0.6015625,
      "rewards/verify_math_reward/std": 0.48984986543655396,
      "step": 637
    },
    {
      "clip_ratio/high_max": 0.002064436566797667,
      "clip_ratio/high_mean": 0.0007059309355099685,
      "clip_ratio/low_mean": 0.0004430554208738613,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011489863572933245,
      "completions/clipped_ratio": 0.0959821428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3456.0,
      "completions/mean_length": 943.9933471679688,
      "completions/mean_terminated_length": 609.3358154296875,
      "completions/min_length": 171.0,
      "completions/min_terminated_length": 171.0,
      "epoch": 5.960932944606414,
      "grad_norm": 0.22147022187709808,
      "learning_rate": 1e-06,
      "loss": -0.027,
      "num_tokens": 365163418.0,
      "reward": 0.6037946939468384,
      "reward_std": 0.15405866503715515,
      "rewards/verify_math_reward/mean": 0.6037946343421936,
      "rewards/verify_math_reward/std": 0.48938122391700745,
      "step": 638
    },
    {
      "clip_ratio/high_max": 0.0015090778506419156,
      "clip_ratio/high_mean": 0.0004944426536894753,
      "clip_ratio/low_mean": 0.0003357936457177857,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008302363039547345,
      "completions/clipped_ratio": 0.0691964285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2797.0,
      "completions/mean_length": 789.9285888671875,
      "completions/mean_terminated_length": 544.1535034179688,
      "completions/min_length": 171.0,
      "completions/min_terminated_length": 171.0,
      "epoch": 5.970262390670554,
      "grad_norm": 0.1750473976135254,
      "learning_rate": 1e-06,
      "loss": -0.0119,
      "num_tokens": 365708458.0,
      "reward": 0.6305803656578064,
      "reward_std": 0.10941943526268005,
      "rewards/verify_math_reward/mean": 0.6305803656578064,
      "rewards/verify_math_reward/std": 0.48291724920272827,
      "step": 639
    },
    {
      "clip_ratio/high_max": 0.0016332373525074217,
      "clip_ratio/high_mean": 0.0005789284296042752,
      "clip_ratio/low_mean": 0.0004030867044093611,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009820151553867618,
      "completions/clipped_ratio": 0.052455357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3506.0,
      "completions/mean_length": 705.8516235351562,
      "completions/mean_terminated_length": 518.175537109375,
      "completions/min_length": 129.0,
      "completions/min_terminated_length": 129.0,
      "epoch": 5.979591836734694,
      "grad_norm": 0.4237379729747772,
      "learning_rate": 1e-06,
      "loss": -0.0185,
      "num_tokens": 366228629.0,
      "reward": 0.6584821939468384,
      "reward_std": 0.13406775891780853,
      "rewards/verify_math_reward/mean": 0.6584821343421936,
      "rewards/verify_math_reward/std": 0.4744836091995239,
      "step": 640
    },
    {
      "clip_ratio/high_max": 0.001970581157365814,
      "clip_ratio/high_mean": 0.0007922044205770362,
      "clip_ratio/low_mean": 0.0006420257413992658,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0014342301474243868,
      "completions/clipped_ratio": 0.1026785714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3675.0,
      "completions/mean_length": 951.9207763671875,
      "completions/mean_terminated_length": 592.1505126953125,
      "completions/min_length": 171.0,
      "completions/min_terminated_length": 171.0,
      "epoch": 5.988921282798834,
      "grad_norm": 0.2584664225578308,
      "learning_rate": 1e-06,
      "loss": -0.0507,
      "num_tokens": 366804686.0,
      "reward": 0.5569196939468384,
      "reward_std": 0.17942126095294952,
      "rewards/verify_math_reward/mean": 0.5569196343421936,
      "rewards/verify_math_reward/std": 0.49702703952789307,
      "step": 641
    },
    {
      "clip_ratio/high_max": 0.0016068845870904624,
      "clip_ratio/high_mean": 0.0004907044321953435,
      "clip_ratio/low_mean": 0.00035171221770724514,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008424166571785463,
      "completions/clipped_ratio": 0.08238636363636365,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2848.0,
      "completions/mean_length": 860.1875,
      "completions/mean_terminated_length": 569.6655883789062,
      "completions/min_length": 180.0,
      "completions/min_terminated_length": 180.0,
      "epoch": 5.998250728862974,
      "grad_norm": 0.21397212147712708,
      "learning_rate": 1e-06,
      "loss": -0.0036,
      "num_tokens": 367330884.0,
      "reward": 0.637276828289032,
      "reward_std": 0.11404110491275787,
      "rewards/verify_math_reward/mean": 0.6372767686843872,
      "rewards/verify_math_reward/std": 0.481054425239563,
      "step": 642
    },
    {
      "clip_ratio/high_max": 0.002405567061941838,
      "clip_ratio/high_mean": 0.0008714211853657616,
      "clip_ratio/low_mean": 0.0004944297033944167,
      "clip_ratio/low_min": 1.0056315659312531e-05,
      "clip_ratio/region_mean": 0.0013658508541993797,
      "completions/clipped_ratio": 0.0714285714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3641.0,
      "completions/mean_length": 829.6172485351562,
      "completions/mean_terminated_length": 578.3569946289062,
      "completions/min_length": 156.0,
      "completions/min_terminated_length": 156.0,
      "epoch": 6.0093294460641395,
      "grad_norm": 0.23643708229064941,
      "learning_rate": 1e-06,
      "loss": -0.0302,
      "num_tokens": 367917021.0,
      "reward": 0.6383928656578064,
      "reward_std": 0.15782159566879272,
      "rewards/verify_math_reward/mean": 0.6383928656578064,
      "rewards/verify_math_reward/std": 0.4807341992855072,
      "step": 643
    },
    {
      "clip_ratio/high_max": 0.001965926945558749,
      "clip_ratio/high_mean": 0.0008047975861700252,
      "clip_ratio/low_mean": 0.0005486659656526172,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0013534635691030417,
      "completions/clipped_ratio": 0.0714285714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3214.0,
      "completions/mean_length": 764.6763916015625,
      "completions/mean_terminated_length": 508.4206848144531,
      "completions/min_length": 136.0,
      "completions/min_terminated_length": 136.0,
      "epoch": 6.01865889212828,
      "grad_norm": 0.26358529925346375,
      "learning_rate": 1e-06,
      "loss": -0.0405,
      "num_tokens": 368417643.0,
      "reward": 0.6852678656578064,
      "reward_std": 0.18727271258831024,
      "rewards/verify_math_reward/mean": 0.6852678656578064,
      "rewards/verify_math_reward/std": 0.46466848254203796,
      "step": 644
    },
    {
      "clip_ratio/high_max": 0.0019731192442122847,
      "clip_ratio/high_mean": 0.0008096116944216192,
      "clip_ratio/low_mean": 0.0005940697265032213,
      "clip_ratio/low_min": 1.8242848454974592e-05,
      "clip_ratio/region_mean": 0.0014036814282007981,
      "completions/clipped_ratio": 0.1060267857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2381.0,
      "completions/mean_length": 927.7779541015625,
      "completions/mean_terminated_length": 552.021240234375,
      "completions/min_length": 153.0,
      "completions/min_terminated_length": 153.0,
      "epoch": 6.0279883381924195,
      "grad_norm": 0.3170129060745239,
      "learning_rate": 1e-06,
      "loss": -0.0426,
      "num_tokens": 368951340.0,
      "reward": 0.6473214626312256,
      "reward_std": 0.18066614866256714,
      "rewards/verify_math_reward/mean": 0.6473214030265808,
      "rewards/verify_math_reward/std": 0.47807058691978455,
      "step": 645
    },
    {
      "clip_ratio/high_max": 0.0015176476808846928,
      "clip_ratio/high_mean": 0.0005830992067785701,
      "clip_ratio/low_mean": 0.0005638477869069902,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011469469936855603,
      "completions/clipped_ratio": 0.0904017857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3910.0,
      "completions/mean_length": 894.935302734375,
      "completions/mean_terminated_length": 576.7926635742188,
      "completions/min_length": 164.0,
      "completions/min_terminated_length": 164.0,
      "epoch": 6.03731778425656,
      "grad_norm": 0.23401102423667908,
      "learning_rate": 1e-06,
      "loss": -0.0183,
      "num_tokens": 369522258.0,
      "reward": 0.5915178656578064,
      "reward_std": 0.151692196726799,
      "rewards/verify_math_reward/mean": 0.5915178656578064,
      "rewards/verify_math_reward/std": 0.49182769656181335,
      "step": 646
    },
    {
      "clip_ratio/high_max": 0.0018379630200797692,
      "clip_ratio/high_mean": 0.0007818344874976901,
      "clip_ratio/low_mean": 0.0005430814962892327,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0013249159819679335,
      "completions/clipped_ratio": 0.0736607142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3348.0,
      "completions/mean_length": 876.3672485351562,
      "completions/mean_terminated_length": 620.3482055664062,
      "completions/min_length": 127.0,
      "completions/min_terminated_length": 127.0,
      "epoch": 6.0466472303206995,
      "grad_norm": 0.24321548640727997,
      "learning_rate": 1e-06,
      "loss": -0.0147,
      "num_tokens": 370121483.0,
      "reward": 0.6350446939468384,
      "reward_std": 0.1859932243824005,
      "rewards/verify_math_reward/mean": 0.6350446343421936,
      "rewards/verify_math_reward/std": 0.481686532497406,
      "step": 647
    },
    {
      "clip_ratio/high_max": 0.0015598051140841562,
      "clip_ratio/high_mean": 0.000518383514645393,
      "clip_ratio/low_mean": 0.0004901263337160344,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010085098510899115,
      "completions/clipped_ratio": 0.0680803571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3214.0,
      "completions/mean_length": 778.114990234375,
      "completions/mean_terminated_length": 535.7305297851562,
      "completions/min_length": 144.0,
      "completions/min_terminated_length": 144.0,
      "epoch": 6.05597667638484,
      "grad_norm": 0.16377153992652893,
      "learning_rate": 1e-06,
      "loss": -0.0194,
      "num_tokens": 370656354.0,
      "reward": 0.6863839626312256,
      "reward_std": 0.1388038545846939,
      "rewards/verify_math_reward/mean": 0.6863839030265808,
      "rewards/verify_math_reward/std": 0.46422141790390015,
      "step": 648
    },
    {
      "clip_ratio/high_max": 0.0016828532570798416,
      "clip_ratio/high_mean": 0.0006458414413827995,
      "clip_ratio/low_mean": 0.000329727351982001,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009755687806318747,
      "completions/clipped_ratio": 0.0647321428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3460.0,
      "completions/mean_length": 813.5960083007812,
      "completions/mean_terminated_length": 586.4129028320312,
      "completions/min_length": 160.0,
      "completions/min_terminated_length": 160.0,
      "epoch": 6.0653061224489795,
      "grad_norm": 0.19361768662929535,
      "learning_rate": 1e-06,
      "loss": -0.0242,
      "num_tokens": 371235488.0,
      "reward": 0.6640625,
      "reward_std": 0.1301603466272354,
      "rewards/verify_math_reward/mean": 0.6640625,
      "rewards/verify_math_reward/std": 0.4725809693336487,
      "step": 649
    },
    {
      "clip_ratio/high_max": 0.00226844877761323,
      "clip_ratio/high_mean": 0.0007824697695468785,
      "clip_ratio/low_mean": 0.00036311576377556776,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011455855274107307,
      "completions/clipped_ratio": 0.0669642857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3923.0,
      "completions/mean_length": 761.9185791015625,
      "completions/mean_terminated_length": 522.63037109375,
      "completions/min_length": 124.0,
      "completions/min_terminated_length": 124.0,
      "epoch": 6.07463556851312,
      "grad_norm": 0.20134110748767853,
      "learning_rate": 1e-06,
      "loss": -0.0371,
      "num_tokens": 371763743.0,
      "reward": 0.6540178656578064,
      "reward_std": 0.11986782401800156,
      "rewards/verify_math_reward/mean": 0.6540178656578064,
      "rewards/verify_math_reward/std": 0.4759531021118164,
      "step": 650
    },
    {
      "clip_ratio/high_max": 0.0017510913785372395,
      "clip_ratio/high_mean": 0.0005578294021688635,
      "clip_ratio/low_mean": 0.0004051493924634997,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009629787855374161,
      "completions/clipped_ratio": 0.0479910714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3783.0,
      "completions/mean_length": 696.9955444335938,
      "completions/mean_terminated_length": 525.650634765625,
      "completions/min_length": 126.0,
      "completions/min_terminated_length": 126.0,
      "epoch": 6.0839650145772595,
      "grad_norm": 0.24923075735569,
      "learning_rate": 1e-06,
      "loss": -0.0213,
      "num_tokens": 372303115.0,
      "reward": 0.6517857313156128,
      "reward_std": 0.12223109602928162,
      "rewards/verify_math_reward/mean": 0.6517857313156128,
      "rewards/verify_math_reward/std": 0.47667041420936584,
      "step": 651
    },
    {
      "clip_ratio/high_max": 0.0016269245134026278,
      "clip_ratio/high_mean": 0.0004937498461003997,
      "clip_ratio/low_mean": 0.0005023160556447692,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009960658990166849,
      "completions/clipped_ratio": 0.0691964285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1837.0,
      "completions/mean_length": 764.6484985351562,
      "completions/mean_terminated_length": 516.9940185546875,
      "completions/min_length": 122.0,
      "completions/min_terminated_length": 122.0,
      "epoch": 6.093294460641399,
      "grad_norm": 0.2499609887599945,
      "learning_rate": 1e-06,
      "loss": -0.0101,
      "num_tokens": 372822464.0,
      "reward": 0.6584821939468384,
      "reward_std": 0.11637409776449203,
      "rewards/verify_math_reward/mean": 0.6584821343421936,
      "rewards/verify_math_reward/std": 0.4744836091995239,
      "step": 652
    },
    {
      "clip_ratio/high_max": 0.0017889736518554855,
      "clip_ratio/high_mean": 0.0006591872588614933,
      "clip_ratio/low_mean": 0.0006058605977159459,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012650478711293545,
      "completions/clipped_ratio": 0.0758928571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3414.0,
      "completions/mean_length": 814.4408569335938,
      "completions/mean_terminated_length": 544.9407958984375,
      "completions/min_length": 171.0,
      "completions/min_terminated_length": 171.0,
      "epoch": 6.1026239067055394,
      "grad_norm": 0.2120325118303299,
      "learning_rate": 1e-06,
      "loss": -0.0142,
      "num_tokens": 373364467.0,
      "reward": 0.637276828289032,
      "reward_std": 0.14995622634887695,
      "rewards/verify_math_reward/mean": 0.6372767686843872,
      "rewards/verify_math_reward/std": 0.481054425239563,
      "step": 653
    },
    {
      "clip_ratio/high_max": 0.0019145760343235452,
      "clip_ratio/high_mean": 0.000730699280211411,
      "clip_ratio/low_mean": 0.0006068510438126395,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0013375503294810187,
      "completions/clipped_ratio": 0.078125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3881.0,
      "completions/mean_length": 852.5647583007812,
      "completions/mean_terminated_length": 577.6973876953125,
      "completions/min_length": 149.0,
      "completions/min_terminated_length": 149.0,
      "epoch": 6.111953352769679,
      "grad_norm": 0.406982958316803,
      "learning_rate": 1e-06,
      "loss": -0.0342,
      "num_tokens": 373938861.0,
      "reward": 0.6149553656578064,
      "reward_std": 0.1619105339050293,
      "rewards/verify_math_reward/mean": 0.6149553656578064,
      "rewards/verify_math_reward/std": 0.4868776500225067,
      "step": 654
    },
    {
      "clip_ratio/high_max": 0.0015829457479412667,
      "clip_ratio/high_mean": 0.0005346698862922494,
      "clip_ratio/low_mean": 0.0004069591695952113,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009416290286026197,
      "completions/clipped_ratio": 0.0837053571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3615.0,
      "completions/mean_length": 816.2623291015625,
      "completions/mean_terminated_length": 516.6516723632812,
      "completions/min_length": 166.0,
      "completions/min_terminated_length": 166.0,
      "epoch": 6.121282798833819,
      "grad_norm": 0.18141846358776093,
      "learning_rate": 1e-06,
      "loss": -0.0302,
      "num_tokens": 374445176.0,
      "reward": 0.6272321939468384,
      "reward_std": 0.12407511472702026,
      "rewards/verify_math_reward/mean": 0.6272321343421936,
      "rewards/verify_math_reward/std": 0.4838111698627472,
      "step": 655
    },
    {
      "clip_ratio/high_max": 0.001749757408106234,
      "clip_ratio/high_mean": 0.0006996469383011572,
      "clip_ratio/low_mean": 0.0004230048552926746,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.00112265181451221,
      "completions/clipped_ratio": 0.0613839285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3431.0,
      "completions/mean_length": 747.4006958007812,
      "completions/mean_terminated_length": 528.4078369140625,
      "completions/min_length": 139.0,
      "completions/min_terminated_length": 139.0,
      "epoch": 6.130612244897959,
      "grad_norm": 0.21173200011253357,
      "learning_rate": 1e-06,
      "loss": -0.0286,
      "num_tokens": 374966799.0,
      "reward": 0.723214328289032,
      "reward_std": 0.14624707400798798,
      "rewards/verify_math_reward/mean": 0.7232142686843872,
      "rewards/verify_math_reward/std": 0.44765952229499817,
      "step": 656
    },
    {
      "clip_ratio/high_max": 0.0023784947334206663,
      "clip_ratio/high_mean": 0.0008999665551527869,
      "clip_ratio/low_mean": 0.00035073289313913847,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012506994462455623,
      "completions/clipped_ratio": 0.0502232142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3135.0,
      "completions/mean_length": 748.3638916015625,
      "completions/mean_terminated_length": 571.3442993164062,
      "completions/min_length": 161.0,
      "completions/min_terminated_length": 161.0,
      "epoch": 6.139941690962099,
      "grad_norm": 0.24232593178749084,
      "learning_rate": 1e-06,
      "loss": -0.02,
      "num_tokens": 375538013.0,
      "reward": 0.6662946939468384,
      "reward_std": 0.17186996340751648,
      "rewards/verify_math_reward/mean": 0.6662946343421936,
      "rewards/verify_math_reward/std": 0.47179922461509705,
      "step": 657
    },
    {
      "clip_ratio/high_max": 0.0017698585870675743,
      "clip_ratio/high_mean": 0.0006790151292079827,
      "clip_ratio/low_mean": 0.0005332792488843552,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012122944062866736,
      "completions/clipped_ratio": 0.0770089285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3974.0,
      "completions/mean_length": 810.5022583007812,
      "completions/mean_terminated_length": 536.3796997070312,
      "completions/min_length": 127.0,
      "completions/min_terminated_length": 127.0,
      "epoch": 6.149271137026239,
      "grad_norm": 0.23561832308769226,
      "learning_rate": 1e-06,
      "loss": -0.0409,
      "num_tokens": 376075407.0,
      "reward": 0.629464328289032,
      "reward_std": 0.15665017068386078,
      "rewards/verify_math_reward/mean": 0.6294642686843872,
      "rewards/verify_math_reward/std": 0.4832179844379425,
      "step": 658
    },
    {
      "clip_ratio/high_max": 0.002033540342381457,
      "clip_ratio/high_mean": 0.0007511484600399854,
      "clip_ratio/low_mean": 0.00048375802634836873,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012349064963927958,
      "completions/clipped_ratio": 0.0669642857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2416.0,
      "completions/mean_length": 765.6217041015625,
      "completions/mean_terminated_length": 526.5992431640625,
      "completions/min_length": 111.0,
      "completions/min_terminated_length": 111.0,
      "epoch": 6.158600583090379,
      "grad_norm": 0.22931915521621704,
      "learning_rate": 1e-06,
      "loss": -0.0253,
      "num_tokens": 376600716.0,
      "reward": 0.7087053656578064,
      "reward_std": 0.1317012757062912,
      "rewards/verify_math_reward/mean": 0.7087053656578064,
      "rewards/verify_math_reward/std": 0.45461276173591614,
      "step": 659
    },
    {
      "clip_ratio/high_max": 0.001794919964595465,
      "clip_ratio/high_mean": 0.0006537741446663858,
      "clip_ratio/low_mean": 0.0005107600077280949,
      "clip_ratio/low_min": 1.1952572094742209e-05,
      "clip_ratio/region_mean": 0.0011645341619441751,
      "completions/clipped_ratio": 0.0747767857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2942.0,
      "completions/mean_length": 808.6707763671875,
      "completions/mean_terminated_length": 542.9879150390625,
      "completions/min_length": 149.0,
      "completions/min_terminated_length": 149.0,
      "epoch": 6.167930029154519,
      "grad_norm": 0.37244313955307007,
      "learning_rate": 1e-06,
      "loss": -0.0245,
      "num_tokens": 377137397.0,
      "reward": 0.6194196939468384,
      "reward_std": 0.15613023936748505,
      "rewards/verify_math_reward/mean": 0.6194196343421936,
      "rewards/verify_math_reward/std": 0.48580074310302734,
      "step": 660
    },
    {
      "clip_ratio/high_max": 0.0016825433958729263,
      "clip_ratio/high_mean": 0.0006719929642713396,
      "clip_ratio/low_mean": 0.0004389477489894489,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001110940702346852,
      "completions/clipped_ratio": 0.0814732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3923.0,
      "completions/mean_length": 909.794677734375,
      "completions/mean_terminated_length": 627.1786499023438,
      "completions/min_length": 165.0,
      "completions/min_terminated_length": 165.0,
      "epoch": 6.1772594752186585,
      "grad_norm": 0.38394808769226074,
      "learning_rate": 1e-06,
      "loss": -0.019,
      "num_tokens": 377755845.0,
      "reward": 0.5546875,
      "reward_std": 0.16856057941913605,
      "rewards/verify_math_reward/mean": 0.5546875,
      "rewards/verify_math_reward/std": 0.4972778558731079,
      "step": 661
    },
    {
      "clip_ratio/high_max": 0.001949212615727447,
      "clip_ratio/high_mean": 0.0008322285739268409,
      "clip_ratio/low_mean": 0.0004455939683793986,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012778225900547113,
      "completions/clipped_ratio": 0.0513392857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2791.0,
      "completions/mean_length": 749.2879638671875,
      "completions/mean_terminated_length": 568.1717529296875,
      "completions/min_length": 141.0,
      "completions/min_terminated_length": 141.0,
      "epoch": 6.186588921282799,
      "grad_norm": 0.22008521854877472,
      "learning_rate": 1e-06,
      "loss": -0.0302,
      "num_tokens": 378321719.0,
      "reward": 0.6852678656578064,
      "reward_std": 0.15958118438720703,
      "rewards/verify_math_reward/mean": 0.6852678656578064,
      "rewards/verify_math_reward/std": 0.46466848254203796,
      "step": 662
    },
    {
      "clip_ratio/high_max": 0.0018381416484771762,
      "clip_ratio/high_mean": 0.0007279891924554249,
      "clip_ratio/low_mean": 0.0005336279491530149,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012616171334229875,
      "completions/clipped_ratio": 0.0758928571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3719.0,
      "completions/mean_length": 797.904052734375,
      "completions/mean_terminated_length": 527.0458984375,
      "completions/min_length": 176.0,
      "completions/min_terminated_length": 176.0,
      "epoch": 6.1959183673469385,
      "grad_norm": 0.22327379882335663,
      "learning_rate": 1e-06,
      "loss": -0.0435,
      "num_tokens": 378851529.0,
      "reward": 0.6863839626312256,
      "reward_std": 0.14507634937763214,
      "rewards/verify_math_reward/mean": 0.6863839030265808,
      "rewards/verify_math_reward/std": 0.46422141790390015,
      "step": 663
    },
    {
      "clip_ratio/high_max": 0.0014841456613794435,
      "clip_ratio/high_mean": 0.0005774707378805033,
      "clip_ratio/low_mean": 0.00037224599600449437,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009497167211520718,
      "completions/clipped_ratio": 0.0535714285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3343.0,
      "completions/mean_length": 739.6585083007812,
      "completions/mean_terminated_length": 549.6768798828125,
      "completions/min_length": 164.0,
      "completions/min_terminated_length": 164.0,
      "epoch": 6.205247813411079,
      "grad_norm": 0.19939115643501282,
      "learning_rate": 1e-06,
      "loss": -0.0013,
      "num_tokens": 379398783.0,
      "reward": 0.6495535969734192,
      "reward_std": 0.11881405115127563,
      "rewards/verify_math_reward/mean": 0.6495535969734192,
      "rewards/verify_math_reward/std": 0.477376252412796,
      "step": 664
    },
    {
      "clip_ratio/high_max": 0.00168674882115738,
      "clip_ratio/high_mean": 0.000540895513495343,
      "clip_ratio/low_mean": 0.00047623256841689,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010171280755457701,
      "completions/clipped_ratio": 0.0680803571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2777.0,
      "completions/mean_length": 837.6451416015625,
      "completions/mean_terminated_length": 599.609619140625,
      "completions/min_length": 122.0,
      "completions/min_terminated_length": 122.0,
      "epoch": 6.214577259475218,
      "grad_norm": 0.16969957947731018,
      "learning_rate": 1e-06,
      "loss": -0.0165,
      "num_tokens": 379998337.0,
      "reward": 0.5323660969734192,
      "reward_std": 0.12490066885948181,
      "rewards/verify_math_reward/mean": 0.5323660969734192,
      "rewards/verify_math_reward/std": 0.4992299973964691,
      "step": 665
    },
    {
      "clip_ratio/high_max": 0.0017531042649352457,
      "clip_ratio/high_mean": 0.0006934321909284336,
      "clip_ratio/low_mean": 0.0002739244739586866,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009673566437413683,
      "completions/clipped_ratio": 0.0658482142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2126.0,
      "completions/mean_length": 744.1016235351562,
      "completions/mean_terminated_length": 507.8267517089844,
      "completions/min_length": 130.0,
      "completions/min_terminated_length": 130.0,
      "epoch": 6.223906705539359,
      "grad_norm": 0.24664264917373657,
      "learning_rate": 1e-06,
      "loss": -0.0166,
      "num_tokens": 380501764.0,
      "reward": 0.699776828289032,
      "reward_std": 0.1265924572944641,
      "rewards/verify_math_reward/mean": 0.6997767686843872,
      "rewards/verify_math_reward/std": 0.4586109220981598,
      "step": 666
    },
    {
      "clip_ratio/high_max": 0.0015495972147618886,
      "clip_ratio/high_mean": 0.0006333369365165709,
      "clip_ratio/low_mean": 0.00040289902472068206,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010362359498685692,
      "completions/clipped_ratio": 0.0770089285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3186.0,
      "completions/mean_length": 862.904052734375,
      "completions/mean_terminated_length": 593.153564453125,
      "completions/min_length": 164.0,
      "completions/min_terminated_length": 164.0,
      "epoch": 6.233236151603498,
      "grad_norm": 0.28692084550857544,
      "learning_rate": 1e-06,
      "loss": -0.0408,
      "num_tokens": 381074422.0,
      "reward": 0.6462053656578064,
      "reward_std": 0.15195390582084656,
      "rewards/verify_math_reward/mean": 0.6462053656578064,
      "rewards/verify_math_reward/std": 0.478413462638855,
      "step": 667
    },
    {
      "clip_ratio/high_max": 0.0021219025147729553,
      "clip_ratio/high_mean": 0.0007889531007094774,
      "clip_ratio/low_mean": 0.0006719961311318912,
      "clip_ratio/low_min": 2.1777002984890714e-05,
      "clip_ratio/region_mean": 0.001460949260945199,
      "completions/clipped_ratio": 0.0892857142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3754.0,
      "completions/mean_length": 876.638427734375,
      "completions/mean_terminated_length": 561.0147094726562,
      "completions/min_length": 161.0,
      "completions/min_terminated_length": 161.0,
      "epoch": 6.242565597667639,
      "grad_norm": 0.2616720497608185,
      "learning_rate": 1e-06,
      "loss": -0.0279,
      "num_tokens": 381627994.0,
      "reward": 0.6171875,
      "reward_std": 0.1624757945537567,
      "rewards/verify_math_reward/mean": 0.6171875,
      "rewards/verify_math_reward/std": 0.4863446056842804,
      "step": 668
    },
    {
      "clip_ratio/high_max": 0.0017941140267794253,
      "clip_ratio/high_mean": 0.0005700217761841486,
      "clip_ratio/low_mean": 0.0004883920100837713,
      "clip_ratio/low_min": 2.200704147981014e-05,
      "clip_ratio/region_mean": 0.0010584137926343828,
      "completions/clipped_ratio": 0.0658482142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3982.0,
      "completions/mean_length": 797.9699096679688,
      "completions/mean_terminated_length": 565.4921875,
      "completions/min_length": 165.0,
      "completions/min_terminated_length": 165.0,
      "epoch": 6.251895043731778,
      "grad_norm": 0.177029550075531,
      "learning_rate": 1e-06,
      "loss": -0.0206,
      "num_tokens": 382192663.0,
      "reward": 0.5993303656578064,
      "reward_std": 0.12944427132606506,
      "rewards/verify_math_reward/mean": 0.5993303656578064,
      "rewards/verify_math_reward/std": 0.49030786752700806,
      "step": 669
    },
    {
      "clip_ratio/high_max": 0.0021771472274849657,
      "clip_ratio/high_mean": 0.0008874079157976666,
      "clip_ratio/low_mean": 0.00047173102757369634,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0013591390052170027,
      "completions/clipped_ratio": 0.078125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2376.0,
      "completions/mean_length": 791.4330444335938,
      "completions/mean_terminated_length": 511.385009765625,
      "completions/min_length": 155.0,
      "completions/min_terminated_length": 155.0,
      "epoch": 6.261224489795918,
      "grad_norm": 0.23426395654678345,
      "learning_rate": 1e-06,
      "loss": -0.048,
      "num_tokens": 382705299.0,
      "reward": 0.6908482313156128,
      "reward_std": 0.13879750669002533,
      "rewards/verify_math_reward/mean": 0.6908482313156128,
      "rewards/verify_math_reward/std": 0.46240198612213135,
      "step": 670
    },
    {
      "clip_ratio/high_max": 0.0020564780843415065,
      "clip_ratio/high_mean": 0.0008107266194201657,
      "clip_ratio/low_mean": 0.0006503005106424098,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0014610271318815649,
      "completions/clipped_ratio": 0.0803571428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3896.0,
      "completions/mean_length": 821.0424194335938,
      "completions/mean_terminated_length": 534.8810424804688,
      "completions/min_length": 149.0,
      "completions/min_terminated_length": 149.0,
      "epoch": 6.270553935860058,
      "grad_norm": 0.2185281217098236,
      "learning_rate": 1e-06,
      "loss": -0.0403,
      "num_tokens": 383235081.0,
      "reward": 0.6674107313156128,
      "reward_std": 0.15372419357299805,
      "rewards/verify_math_reward/mean": 0.6674107313156128,
      "rewards/verify_math_reward/std": 0.47140392661094666,
      "step": 671
    },
    {
      "clip_ratio/high_max": 0.0018346465112699661,
      "clip_ratio/high_mean": 0.0006521518334920984,
      "clip_ratio/low_mean": 0.0004316251993259357,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010837770096259192,
      "completions/clipped_ratio": 0.0546875,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2778.0,
      "completions/mean_length": 763.9051513671875,
      "completions/mean_terminated_length": 571.1392822265625,
      "completions/min_length": 153.0,
      "completions/min_terminated_length": 153.0,
      "epoch": 6.279883381924198,
      "grad_norm": 0.23892897367477417,
      "learning_rate": 1e-06,
      "loss": -0.0213,
      "num_tokens": 383813140.0,
      "reward": 0.6908482313156128,
      "reward_std": 0.14620429277420044,
      "rewards/verify_math_reward/mean": 0.6908482313156128,
      "rewards/verify_math_reward/std": 0.46240198612213135,
      "step": 672
    },
    {
      "clip_ratio/high_max": 0.0018636267450347077,
      "clip_ratio/high_mean": 0.0006842506536486326,
      "clip_ratio/low_mean": 0.0004896163745797821,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011738670073100366,
      "completions/clipped_ratio": 0.0647321428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3406.0,
      "completions/mean_length": 768.0904541015625,
      "completions/mean_terminated_length": 537.7577514648438,
      "completions/min_length": 142.0,
      "completions/min_terminated_length": 142.0,
      "epoch": 6.289212827988338,
      "grad_norm": 0.1821121871471405,
      "learning_rate": 1e-06,
      "loss": -0.0246,
      "num_tokens": 384363509.0,
      "reward": 0.6238839626312256,
      "reward_std": 0.13853536546230316,
      "rewards/verify_math_reward/mean": 0.6238839030265808,
      "rewards/verify_math_reward/std": 0.48468026518821716,
      "step": 673
    },
    {
      "clip_ratio/high_max": 0.0016410047792305704,
      "clip_ratio/high_mean": 0.0007759028194413986,
      "clip_ratio/low_mean": 0.0006135401199571788,
      "clip_ratio/low_min": 1.314405926677864e-05,
      "clip_ratio/region_mean": 0.0013894428993808106,
      "completions/clipped_ratio": 0.0580357142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3348.0,
      "completions/mean_length": 765.5000610351562,
      "completions/mean_terminated_length": 560.3033447265625,
      "completions/min_length": 171.0,
      "completions/min_terminated_length": 171.0,
      "epoch": 6.298542274052478,
      "grad_norm": 0.22997897863388062,
      "learning_rate": 1e-06,
      "loss": -0.0275,
      "num_tokens": 384923245.0,
      "reward": 0.6618303656578064,
      "reward_std": 0.1810377687215805,
      "rewards/verify_math_reward/mean": 0.6618303656578064,
      "rewards/verify_math_reward/std": 0.4733508229255676,
      "step": 674
    },
    {
      "clip_ratio/high_max": 0.0017424870384274982,
      "clip_ratio/high_mean": 0.0005803296608064556,
      "clip_ratio/low_mean": 0.00042845564257731894,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010087852824653964,
      "completions/clipped_ratio": 0.0825892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3366.0,
      "completions/mean_length": 846.7076416015625,
      "completions/mean_terminated_length": 554.1921997070312,
      "completions/min_length": 144.0,
      "completions/min_terminated_length": 144.0,
      "epoch": 6.307871720116618,
      "grad_norm": 0.1782207190990448,
      "learning_rate": 1e-06,
      "loss": -0.0547,
      "num_tokens": 385454535.0,
      "reward": 0.6428571939468384,
      "reward_std": 0.1400102972984314,
      "rewards/verify_math_reward/mean": 0.6428571343421936,
      "rewards/verify_math_reward/std": 0.4794250428676605,
      "step": 675
    },
    {
      "clip_ratio/high_max": 0.0019060583326790947,
      "clip_ratio/high_mean": 0.0007721769043200766,
      "clip_ratio/low_mean": 0.0004835599893340259,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012557368900161237,
      "completions/clipped_ratio": 0.0859375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4075.0,
      "completions/mean_length": 880.3482666015625,
      "completions/mean_terminated_length": 578.02197265625,
      "completions/min_length": 171.0,
      "completions/min_terminated_length": 171.0,
      "epoch": 6.317201166180758,
      "grad_norm": 0.19786468148231506,
      "learning_rate": 1e-06,
      "loss": -0.0409,
      "num_tokens": 386015039.0,
      "reward": 0.6316964626312256,
      "reward_std": 0.16311588883399963,
      "rewards/verify_math_reward/mean": 0.6316964030265808,
      "rewards/verify_math_reward/std": 0.4826137125492096,
      "step": 676
    },
    {
      "clip_ratio/high_max": 0.0018042715710180346,
      "clip_ratio/high_mean": 0.0006700275789626176,
      "clip_ratio/low_mean": 0.00042331810800533276,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010933456505881622,
      "completions/clipped_ratio": 0.0870535714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2396.0,
      "completions/mean_length": 837.7689819335938,
      "completions/mean_terminated_length": 527.0819091796875,
      "completions/min_length": 133.0,
      "completions/min_terminated_length": 133.0,
      "epoch": 6.326530612244898,
      "grad_norm": 0.23652194440364838,
      "learning_rate": 1e-06,
      "loss": -0.0206,
      "num_tokens": 386537552.0,
      "reward": 0.6473214626312256,
      "reward_std": 0.14015227556228638,
      "rewards/verify_math_reward/mean": 0.6473214030265808,
      "rewards/verify_math_reward/std": 0.47807058691978455,
      "step": 677
    },
    {
      "clip_ratio/high_max": 0.0014771329642826458,
      "clip_ratio/high_mean": 0.0004302015258872416,
      "clip_ratio/low_mean": 0.000347447462445416,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007776490001560887,
      "completions/clipped_ratio": 0.0680803571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3341.0,
      "completions/mean_length": 781.2232666015625,
      "completions/mean_terminated_length": 539.0658569335938,
      "completions/min_length": 135.0,
      "completions/min_terminated_length": 135.0,
      "epoch": 6.335860058309038,
      "grad_norm": 0.17091988027095795,
      "learning_rate": 1e-06,
      "loss": -0.0169,
      "num_tokens": 387090720.0,
      "reward": 0.6573660969734192,
      "reward_std": 0.10926970839500427,
      "rewards/verify_math_reward/mean": 0.6573660969734192,
      "rewards/verify_math_reward/std": 0.47485533356666565,
      "step": 678
    },
    {
      "clip_ratio/high_max": 0.0018406154558761045,
      "clip_ratio/high_mean": 0.0007871619691286469,
      "clip_ratio/low_mean": 0.0005521555058294325,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001339317470410606,
      "completions/clipped_ratio": 0.0669642857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3343.0,
      "completions/mean_length": 776.7266235351562,
      "completions/mean_terminated_length": 538.5011596679688,
      "completions/min_length": 124.0,
      "completions/min_terminated_length": 124.0,
      "epoch": 6.345189504373177,
      "grad_norm": 0.24148808419704437,
      "learning_rate": 1e-06,
      "loss": -0.0425,
      "num_tokens": 387629403.0,
      "reward": 0.7042410969734192,
      "reward_std": 0.16075189411640167,
      "rewards/verify_math_reward/mean": 0.7042410969734192,
      "rewards/verify_math_reward/std": 0.45663803815841675,
      "step": 679
    },
    {
      "clip_ratio/high_max": 0.0016842037912283558,
      "clip_ratio/high_mean": 0.0006951858504180564,
      "clip_ratio/low_mean": 0.00044807020867665415,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001143256053182995,
      "completions/clipped_ratio": 0.0714285714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3784.0,
      "completions/mean_length": 843.3795166015625,
      "completions/mean_terminated_length": 593.1779174804688,
      "completions/min_length": 133.0,
      "completions/min_terminated_length": 133.0,
      "epoch": 6.354518950437318,
      "grad_norm": 0.31907927989959717,
      "learning_rate": 1e-06,
      "loss": -0.0432,
      "num_tokens": 388208791.0,
      "reward": 0.6640625,
      "reward_std": 0.15901413559913635,
      "rewards/verify_math_reward/mean": 0.6640625,
      "rewards/verify_math_reward/std": 0.4725809693336487,
      "step": 680
    },
    {
      "clip_ratio/high_max": 0.0017580458552401979,
      "clip_ratio/high_mean": 0.0006089514367886295,
      "clip_ratio/low_mean": 0.0005764675233876915,
      "clip_ratio/low_min": 3.156007096549729e-05,
      "clip_ratio/region_mean": 0.0011854189324367326,
      "completions/clipped_ratio": 0.1004464285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2567.0,
      "completions/mean_length": 930.5803833007812,
      "completions/mean_terminated_length": 577.12158203125,
      "completions/min_length": 166.0,
      "completions/min_terminated_length": 166.0,
      "epoch": 6.363848396501457,
      "grad_norm": 0.22167593240737915,
      "learning_rate": 1e-06,
      "loss": -0.0355,
      "num_tokens": 388764751.0,
      "reward": 0.5691964626312256,
      "reward_std": 0.15255165100097656,
      "rewards/verify_math_reward/mean": 0.5691964030265808,
      "rewards/verify_math_reward/std": 0.4954652488231659,
      "step": 681
    },
    {
      "clip_ratio/high_max": 0.0015328846602642443,
      "clip_ratio/high_mean": 0.000523390966009174,
      "clip_ratio/low_mean": 0.0005523648501366551,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010757558084151242,
      "completions/clipped_ratio": 0.0848214285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3860.0,
      "completions/mean_length": 861.0000610351562,
      "completions/mean_terminated_length": 561.1707153320312,
      "completions/min_length": 149.0,
      "completions/min_terminated_length": 149.0,
      "epoch": 6.373177842565598,
      "grad_norm": 0.20637856423854828,
      "learning_rate": 1e-06,
      "loss": -0.0227,
      "num_tokens": 389320255.0,
      "reward": 0.621651828289032,
      "reward_std": 0.1396312266588211,
      "rewards/verify_math_reward/mean": 0.6216517686843872,
      "rewards/verify_math_reward/std": 0.485245943069458,
      "step": 682
    },
    {
      "clip_ratio/high_max": 0.0023740889264445286,
      "clip_ratio/high_mean": 0.0007973391748237191,
      "clip_ratio/low_mean": 0.0005626817628581193,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001360020920401439,
      "completions/clipped_ratio": 0.0535714285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2416.0,
      "completions/mean_length": 721.8158569335938,
      "completions/mean_terminated_length": 530.8242797851562,
      "completions/min_length": 147.0,
      "completions/min_terminated_length": 147.0,
      "epoch": 6.382507288629737,
      "grad_norm": 0.23185193538665771,
      "learning_rate": 1e-06,
      "loss": -0.0247,
      "num_tokens": 389862994.0,
      "reward": 0.723214328289032,
      "reward_std": 0.1592869609594345,
      "rewards/verify_math_reward/mean": 0.7232142686843872,
      "rewards/verify_math_reward/std": 0.44765952229499817,
      "step": 683
    },
    {
      "clip_ratio/high_max": 0.0017013444885378703,
      "clip_ratio/high_mean": 0.0006676474968116963,
      "clip_ratio/low_mean": 0.0006344834073388483,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001302130905969534,
      "completions/clipped_ratio": 0.0825892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3117.0,
      "completions/mean_length": 852.130615234375,
      "completions/mean_terminated_length": 560.1033935546875,
      "completions/min_length": 153.0,
      "completions/min_terminated_length": 153.0,
      "epoch": 6.391836734693878,
      "grad_norm": 0.2287149280309677,
      "learning_rate": 1e-06,
      "loss": -0.0491,
      "num_tokens": 390413719.0,
      "reward": 0.5926339626312256,
      "reward_std": 0.17532162368297577,
      "rewards/verify_math_reward/mean": 0.5926339030265808,
      "rewards/verify_math_reward/std": 0.49161845445632935,
      "step": 684
    },
    {
      "clip_ratio/high_max": 0.0020252322574378923,
      "clip_ratio/high_mean": 0.000795369150409897,
      "clip_ratio/low_mean": 0.00041700781093823025,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012123769738536794,
      "completions/clipped_ratio": 0.0647321428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3954.0,
      "completions/mean_length": 748.2957763671875,
      "completions/mean_terminated_length": 516.5930786132812,
      "completions/min_length": 141.0,
      "completions/min_terminated_length": 141.0,
      "epoch": 6.401166180758017,
      "grad_norm": 0.27178215980529785,
      "learning_rate": 1e-06,
      "loss": -0.0175,
      "num_tokens": 390938272.0,
      "reward": 0.7299107313156128,
      "reward_std": 0.15105310082435608,
      "rewards/verify_math_reward/mean": 0.7299107313156128,
      "rewards/verify_math_reward/std": 0.44425368309020996,
      "step": 685
    },
    {
      "clip_ratio/high_max": 0.0016065499585238285,
      "clip_ratio/high_mean": 0.0005255454384496261,
      "clip_ratio/low_mean": 0.0005874646922165994,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011130101411254145,
      "completions/clipped_ratio": 0.1004464285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2741.0,
      "completions/mean_length": 877.1719360351562,
      "completions/mean_terminated_length": 517.7493896484375,
      "completions/min_length": 130.0,
      "completions/min_terminated_length": 130.0,
      "epoch": 6.410495626822158,
      "grad_norm": 0.18121960759162903,
      "learning_rate": 1e-06,
      "loss": -0.0323,
      "num_tokens": 391443082.0,
      "reward": 0.6428571939468384,
      "reward_std": 0.12388080358505249,
      "rewards/verify_math_reward/mean": 0.6428571343421936,
      "rewards/verify_math_reward/std": 0.4794250428676605,
      "step": 686
    },
    {
      "clip_ratio/high_max": 0.0022832265167380683,
      "clip_ratio/high_mean": 0.0007447617481375346,
      "clip_ratio/low_mean": 0.000574076830616832,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0013188385964895133,
      "completions/clipped_ratio": 0.0770089285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4062.0,
      "completions/mean_length": 843.0324096679688,
      "completions/mean_terminated_length": 571.6239624023438,
      "completions/min_length": 182.0,
      "completions/min_terminated_length": 182.0,
      "epoch": 6.419825072886297,
      "grad_norm": 7.517394065856934,
      "learning_rate": 1e-06,
      "loss": 0.0018,
      "num_tokens": 392000247.0,
      "reward": 0.6116071939468384,
      "reward_std": 0.164948508143425,
      "rewards/verify_math_reward/mean": 0.6116071343421936,
      "rewards/verify_math_reward/std": 0.48765692114830017,
      "step": 687
    },
    {
      "clip_ratio/high_max": 0.0017661859492363874,
      "clip_ratio/high_mean": 0.000780365189712029,
      "clip_ratio/low_mean": 0.00047947918574209325,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012598443645401858,
      "completions/clipped_ratio": 0.0959821428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2984.0,
      "completions/mean_length": 910.2042846679688,
      "completions/mean_terminated_length": 571.9592895507812,
      "completions/min_length": 122.0,
      "completions/min_terminated_length": 122.0,
      "epoch": 6.429154518950437,
      "grad_norm": 0.2341417372226715,
      "learning_rate": 1e-06,
      "loss": -0.0502,
      "num_tokens": 392551086.0,
      "reward": 0.6261160969734192,
      "reward_std": 0.1670539826154709,
      "rewards/verify_math_reward/mean": 0.6261160969734192,
      "rewards/verify_math_reward/std": 0.48410359025001526,
      "step": 688
    },
    {
      "clip_ratio/high_max": 0.001945983760379022,
      "clip_ratio/high_mean": 0.0006622571581829106,
      "clip_ratio/low_mean": 0.0005116532493047998,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011739104083972052,
      "completions/clipped_ratio": 0.0859375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3404.0,
      "completions/mean_length": 873.4085083007812,
      "completions/mean_terminated_length": 570.4298095703125,
      "completions/min_length": 122.0,
      "completions/min_terminated_length": 122.0,
      "epoch": 6.438483965014577,
      "grad_norm": 0.2397717386484146,
      "learning_rate": 1e-06,
      "loss": -0.0167,
      "num_tokens": 393109132.0,
      "reward": 0.5959821939468384,
      "reward_std": 0.1514211744070053,
      "rewards/verify_math_reward/mean": 0.5959821343421936,
      "rewards/verify_math_reward/std": 0.490975022315979,
      "step": 689
    },
    {
      "clip_ratio/high_max": 0.0021740635274909437,
      "clip_ratio/high_mean": 0.0009290729904023465,
      "clip_ratio/low_mean": 0.0006227267604117515,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0015517997162532993,
      "completions/clipped_ratio": 0.0770089285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3743.0,
      "completions/mean_length": 842.5881958007812,
      "completions/mean_terminated_length": 571.1427001953125,
      "completions/min_length": 156.0,
      "completions/min_terminated_length": 156.0,
      "epoch": 6.447813411078717,
      "grad_norm": 0.25231674313545227,
      "learning_rate": 1e-06,
      "loss": -0.0516,
      "num_tokens": 393666299.0,
      "reward": 0.660714328289032,
      "reward_std": 0.1946806162595749,
      "rewards/verify_math_reward/mean": 0.6607142686843872,
      "rewards/verify_math_reward/std": 0.4737313687801361,
      "step": 690
    },
    {
      "clip_ratio/high_max": 0.0023352392454398796,
      "clip_ratio/high_mean": 0.0008996025208034553,
      "clip_ratio/low_mean": 0.000517100941578974,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0014167034496495035,
      "completions/clipped_ratio": 0.09375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3534.0,
      "completions/mean_length": 905.6328735351562,
      "completions/mean_terminated_length": 575.5947875976562,
      "completions/min_length": 150.0,
      "completions/min_terminated_length": 150.0,
      "epoch": 6.457142857142857,
      "grad_norm": 0.21373829245567322,
      "learning_rate": 1e-06,
      "loss": -0.0411,
      "num_tokens": 394222154.0,
      "reward": 0.6428571939468384,
      "reward_std": 0.184951514005661,
      "rewards/verify_math_reward/mean": 0.6428571343421936,
      "rewards/verify_math_reward/std": 0.4794250428676605,
      "step": 691
    },
    {
      "clip_ratio/high_max": 0.0014546386701113079,
      "clip_ratio/high_mean": 0.0005665116441377904,
      "clip_ratio/low_mean": 0.00039359544462058693,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009601070814824197,
      "completions/clipped_ratio": 0.0435267857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3667.0,
      "completions/mean_length": 709.2332763671875,
      "completions/mean_terminated_length": 555.1096801757812,
      "completions/min_length": 151.0,
      "completions/min_terminated_length": 151.0,
      "epoch": 6.466472303206997,
      "grad_norm": 0.2313837856054306,
      "learning_rate": 1e-06,
      "loss": -0.0087,
      "num_tokens": 394788171.0,
      "reward": 0.6674107313156128,
      "reward_std": 0.1511615812778473,
      "rewards/verify_math_reward/mean": 0.6674107313156128,
      "rewards/verify_math_reward/std": 0.47140389680862427,
      "step": 692
    },
    {
      "clip_ratio/high_max": 0.001757234273100039,
      "clip_ratio/high_mean": 0.0007398223715426866,
      "clip_ratio/low_mean": 0.00039782238400221104,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011376447582733817,
      "completions/clipped_ratio": 0.0881696428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2660.0,
      "completions/mean_length": 845.5938110351562,
      "completions/mean_terminated_length": 531.2949829101562,
      "completions/min_length": 89.0,
      "completions/min_terminated_length": 89.0,
      "epoch": 6.475801749271137,
      "grad_norm": 0.21056608855724335,
      "learning_rate": 1e-06,
      "loss": -0.0507,
      "num_tokens": 395304631.0,
      "reward": 0.652901828289032,
      "reward_std": 0.15300630033016205,
      "rewards/verify_math_reward/mean": 0.6529017686843872,
      "rewards/verify_math_reward/std": 0.47631320357322693,
      "step": 693
    },
    {
      "clip_ratio/high_max": 0.0017511395672045182,
      "clip_ratio/high_mean": 0.0006473053281297325,
      "clip_ratio/low_mean": 0.00040133827769750496,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010486436185601633,
      "completions/clipped_ratio": 0.0680803571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3572.0,
      "completions/mean_length": 753.8002319335938,
      "completions/mean_terminated_length": 509.6395263671875,
      "completions/min_length": 134.0,
      "completions/min_terminated_length": 134.0,
      "epoch": 6.485131195335277,
      "grad_norm": 0.2462763786315918,
      "learning_rate": 1e-06,
      "loss": -0.0178,
      "num_tokens": 395821748.0,
      "reward": 0.6696428656578064,
      "reward_std": 0.12783098220825195,
      "rewards/verify_math_reward/mean": 0.6696428656578064,
      "rewards/verify_math_reward/std": 0.47060438990592957,
      "step": 694
    },
    {
      "clip_ratio/high_max": 0.0021231266655377112,
      "clip_ratio/high_mean": 0.0008790855936240405,
      "clip_ratio/low_mean": 0.00041230998681385245,
      "clip_ratio/low_min": 2.891510484914761e-05,
      "clip_ratio/region_mean": 0.0012913956124975812,
      "completions/clipped_ratio": 0.0837053571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3830.0,
      "completions/mean_length": 828.591552734375,
      "completions/mean_terminated_length": 530.1072387695312,
      "completions/min_length": 147.0,
      "completions/min_terminated_length": 147.0,
      "epoch": 6.494460641399417,
      "grad_norm": 0.4874882400035858,
      "learning_rate": 1e-06,
      "loss": -0.0521,
      "num_tokens": 396344326.0,
      "reward": 0.6573660969734192,
      "reward_std": 0.16101041436195374,
      "rewards/verify_math_reward/mean": 0.6573660969734192,
      "rewards/verify_math_reward/std": 0.47485533356666565,
      "step": 695
    },
    {
      "clip_ratio/high_max": 0.0015126053513085935,
      "clip_ratio/high_mean": 0.0004811641765627428,
      "clip_ratio/low_mean": 0.0004651214394471026,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009462856378377182,
      "completions/clipped_ratio": 0.0736607142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3821.0,
      "completions/mean_length": 837.0045166015625,
      "completions/mean_terminated_length": 577.8554077148438,
      "completions/min_length": 128.0,
      "completions/min_terminated_length": 128.0,
      "epoch": 6.503790087463557,
      "grad_norm": 0.2033204585313797,
      "learning_rate": 1e-06,
      "loss": -0.0074,
      "num_tokens": 396917538.0,
      "reward": 0.6116071939468384,
      "reward_std": 0.13650770485401154,
      "rewards/verify_math_reward/mean": 0.6116071343421936,
      "rewards/verify_math_reward/std": 0.48765692114830017,
      "step": 696
    },
    {
      "clip_ratio/high_max": 0.0021814837818965316,
      "clip_ratio/high_mean": 0.0007460377855750266,
      "clip_ratio/low_mean": 0.00048013355899456656,
      "clip_ratio/low_min": 1.0751763511507306e-05,
      "clip_ratio/region_mean": 0.0012261713527550455,
      "completions/clipped_ratio": 0.1060267857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3853.0,
      "completions/mean_length": 950.0234985351562,
      "completions/mean_terminated_length": 576.9050903320312,
      "completions/min_length": 152.0,
      "completions/min_terminated_length": 152.0,
      "epoch": 6.513119533527696,
      "grad_norm": 0.2154729664325714,
      "learning_rate": 1e-06,
      "loss": -0.0574,
      "num_tokens": 397476311.0,
      "reward": 0.6383928656578064,
      "reward_std": 0.15751849114894867,
      "rewards/verify_math_reward/mean": 0.6383928656578064,
      "rewards/verify_math_reward/std": 0.4807341694831848,
      "step": 697
    },
    {
      "clip_ratio/high_max": 0.002022316286456771,
      "clip_ratio/high_mean": 0.0006916968668519985,
      "clip_ratio/low_mean": 0.00048438541489304043,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011760822853830177,
      "completions/clipped_ratio": 0.0881696428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2652.0,
      "completions/mean_length": 862.8370971679688,
      "completions/mean_terminated_length": 550.2056274414062,
      "completions/min_length": 160.0,
      "completions/min_terminated_length": 160.0,
      "epoch": 6.522448979591837,
      "grad_norm": 0.23432853817939758,
      "learning_rate": 1e-06,
      "loss": -0.0229,
      "num_tokens": 398025581.0,
      "reward": 0.6328125,
      "reward_std": 0.15285545587539673,
      "rewards/verify_math_reward/mean": 0.6328125,
      "rewards/verify_math_reward/std": 0.48230743408203125,
      "step": 698
    },
    {
      "clip_ratio/high_max": 0.00194493422168307,
      "clip_ratio/high_mean": 0.0007136685198929626,
      "clip_ratio/low_mean": 0.0004591452625390957,
      "clip_ratio/low_min": 2.2986392650636844e-05,
      "clip_ratio/region_mean": 0.001172813746961765,
      "completions/clipped_ratio": 0.0881696428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4066.0,
      "completions/mean_length": 825.2131958007812,
      "completions/mean_terminated_length": 508.9436950683594,
      "completions/min_length": 123.0,
      "completions/min_terminated_length": 123.0,
      "epoch": 6.531778425655976,
      "grad_norm": 0.21726079285144806,
      "learning_rate": 1e-06,
      "loss": -0.038,
      "num_tokens": 398531116.0,
      "reward": 0.7098214626312256,
      "reward_std": 0.138991117477417,
      "rewards/verify_math_reward/mean": 0.7098214030265808,
      "rewards/verify_math_reward/std": 0.454098105430603,
      "step": 699
    },
    {
      "clip_ratio/high_max": 0.0018781147518893704,
      "clip_ratio/high_mean": 0.0007367989092017524,
      "clip_ratio/low_mean": 0.00044935276400792645,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011861516613862477,
      "completions/clipped_ratio": 0.0691964285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2622.0,
      "completions/mean_length": 766.6295166015625,
      "completions/mean_terminated_length": 519.122314453125,
      "completions/min_length": 160.0,
      "completions/min_terminated_length": 160.0,
      "epoch": 6.541107871720117,
      "grad_norm": 0.22654405236244202,
      "learning_rate": 1e-06,
      "loss": -0.0295,
      "num_tokens": 399044248.0,
      "reward": 0.6729910969734192,
      "reward_std": 0.1536468118429184,
      "rewards/verify_math_reward/mean": 0.6729910969734192,
      "rewards/verify_math_reward/std": 0.46938255429267883,
      "step": 700
    },
    {
      "clip_ratio/high_max": 0.0016621490285615437,
      "clip_ratio/high_mean": 0.0005753648920290289,
      "clip_ratio/low_mean": 0.0003774890003569453,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009528538903396111,
      "completions/clipped_ratio": 0.0825892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3531.0,
      "completions/mean_length": 872.8516235351562,
      "completions/mean_terminated_length": 582.6897583007812,
      "completions/min_length": 99.0,
      "completions/min_terminated_length": 99.0,
      "epoch": 6.550437317784256,
      "grad_norm": 0.19929608702659607,
      "learning_rate": 1e-06,
      "loss": -0.0212,
      "num_tokens": 399608355.0,
      "reward": 0.6417410969734192,
      "reward_std": 0.13324108719825745,
      "rewards/verify_math_reward/mean": 0.6417410969734192,
      "rewards/verify_math_reward/std": 0.47975659370422363,
      "step": 701
    },
    {
      "clip_ratio/high_max": 0.0019489681944833137,
      "clip_ratio/high_mean": 0.0007136997555790003,
      "clip_ratio/low_mean": 0.00040497460850019706,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011186743431608193,
      "completions/clipped_ratio": 0.0680803571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3820.0,
      "completions/mean_length": 777.1819458007812,
      "completions/mean_terminated_length": 534.7293701171875,
      "completions/min_length": 146.0,
      "completions/min_terminated_length": 146.0,
      "epoch": 6.559766763848397,
      "grad_norm": 0.20507614314556122,
      "learning_rate": 1e-06,
      "loss": -0.0297,
      "num_tokens": 400139158.0,
      "reward": 0.6484375,
      "reward_std": 0.12020343542098999,
      "rewards/verify_math_reward/mean": 0.6484375,
      "rewards/verify_math_reward/std": 0.4777248501777649,
      "step": 702
    },
    {
      "clip_ratio/high_max": 0.0018310301566089038,
      "clip_ratio/high_mean": 0.0006095708095017471,
      "clip_ratio/low_mean": 0.0005028325331295491,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011124033262603916,
      "completions/clipped_ratio": 0.1138392857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2436.0,
      "completions/mean_length": 984.6763916015625,
      "completions/mean_terminated_length": 584.98486328125,
      "completions/min_length": 151.0,
      "completions/min_terminated_length": 151.0,
      "epoch": 6.569096209912536,
      "grad_norm": 0.801872730255127,
      "learning_rate": 1e-06,
      "loss": -0.052,
      "num_tokens": 400679980.0,
      "reward": 0.5814732313156128,
      "reward_std": 0.146018847823143,
      "rewards/verify_math_reward/mean": 0.5814732313156128,
      "rewards/verify_math_reward/std": 0.4935929775238037,
      "step": 703
    },
    {
      "clip_ratio/high_max": 0.002049558133876417,
      "clip_ratio/high_mean": 0.0007897551877249498,
      "clip_ratio/low_mean": 0.00045285856003829394,
      "clip_ratio/low_min": 1.3199577551858965e-05,
      "clip_ratio/region_mean": 0.0012426137327565812,
      "completions/clipped_ratio": 0.0535714285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3632.0,
      "completions/mean_length": 737.536865234375,
      "completions/mean_terminated_length": 547.4351196289062,
      "completions/min_length": 181.0,
      "completions/min_terminated_length": 181.0,
      "epoch": 6.578425655976677,
      "grad_norm": 0.22027653455734253,
      "learning_rate": 1e-06,
      "loss": -0.0413,
      "num_tokens": 401229757.0,
      "reward": 0.6674107313156128,
      "reward_std": 0.17551273107528687,
      "rewards/verify_math_reward/mean": 0.6674107313156128,
      "rewards/verify_math_reward/std": 0.47140392661094666,
      "step": 704
    },
    {
      "clip_ratio/high_max": 0.002380739701038692,
      "clip_ratio/high_mean": 0.0009879878580250079,
      "clip_ratio/low_mean": 0.0004997300620743772,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0014877178909955546,
      "completions/clipped_ratio": 0.0703125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2423.0,
      "completions/mean_length": 838.864990234375,
      "completions/mean_terminated_length": 592.5269775390625,
      "completions/min_length": 161.0,
      "completions/min_terminated_length": 161.0,
      "epoch": 6.587755102040816,
      "grad_norm": 0.22483842074871063,
      "learning_rate": 1e-06,
      "loss": -0.0247,
      "num_tokens": 401804492.0,
      "reward": 0.6651785969734192,
      "reward_std": 0.18280190229415894,
      "rewards/verify_math_reward/mean": 0.6651785969734192,
      "rewards/verify_math_reward/std": 0.47219157218933105,
      "step": 705
    },
    {
      "clip_ratio/high_max": 0.0015944882870826405,
      "clip_ratio/high_mean": 0.0006383999811987451,
      "clip_ratio/low_mean": 0.00037323112701415084,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010116311332240002,
      "completions/clipped_ratio": 0.0814732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2502.0,
      "completions/mean_length": 858.6283569335938,
      "completions/mean_terminated_length": 571.473876953125,
      "completions/min_length": 136.0,
      "completions/min_terminated_length": 136.0,
      "epoch": 6.597084548104956,
      "grad_norm": 0.2551387548446655,
      "learning_rate": 1e-06,
      "loss": -0.0244,
      "num_tokens": 402367871.0,
      "reward": 0.6328125,
      "reward_std": 0.13982413709163666,
      "rewards/verify_math_reward/mean": 0.6328125,
      "rewards/verify_math_reward/std": 0.48230743408203125,
      "step": 706
    },
    {
      "clip_ratio/high_max": 0.0014203772880136967,
      "clip_ratio/high_mean": 0.0004773060836669174,
      "clip_ratio/low_mean": 0.00036724474284710595,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000844550824695034,
      "completions/clipped_ratio": 0.0591517857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3039.0,
      "completions/mean_length": 748.7767944335938,
      "completions/mean_terminated_length": 538.3345336914062,
      "completions/min_length": 159.0,
      "completions/min_terminated_length": 159.0,
      "epoch": 6.606413994169096,
      "grad_norm": 0.2324991375207901,
      "learning_rate": 1e-06,
      "loss": -0.0215,
      "num_tokens": 402913079.0,
      "reward": 0.6417410969734192,
      "reward_std": 0.12756815552711487,
      "rewards/verify_math_reward/mean": 0.6417410969734192,
      "rewards/verify_math_reward/std": 0.47975659370422363,
      "step": 707
    },
    {
      "clip_ratio/high_max": 0.0019358624704182148,
      "clip_ratio/high_mean": 0.000905928885913454,
      "clip_ratio/low_mean": 0.00046523944092768943,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0013711683350265957,
      "completions/clipped_ratio": 0.0691964285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3393.0,
      "completions/mean_length": 777.9832763671875,
      "completions/mean_terminated_length": 531.3201293945312,
      "completions/min_length": 90.0,
      "completions/min_terminated_length": 90.0,
      "epoch": 6.615743440233236,
      "grad_norm": 0.29323431849479675,
      "learning_rate": 1e-06,
      "loss": -0.0507,
      "num_tokens": 403446328.0,
      "reward": 0.6886160969734192,
      "reward_std": 0.1590908318758011,
      "rewards/verify_math_reward/mean": 0.6886160969734192,
      "rewards/verify_math_reward/std": 0.46331802010536194,
      "step": 708
    },
    {
      "clip_ratio/high_max": 0.0020780169870704412,
      "clip_ratio/high_mean": 0.0006810736704210285,
      "clip_ratio/low_mean": 0.00048153315719901,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011626068117038812,
      "completions/clipped_ratio": 0.0770089285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3561.0,
      "completions/mean_length": 816.2455444335938,
      "completions/mean_terminated_length": 542.6021728515625,
      "completions/min_length": 113.0,
      "completions/min_terminated_length": 113.0,
      "epoch": 6.625072886297376,
      "grad_norm": 0.22786401212215424,
      "learning_rate": 1e-06,
      "loss": -0.0366,
      "num_tokens": 403984356.0,
      "reward": 0.6540178656578064,
      "reward_std": 0.15537026524543762,
      "rewards/verify_math_reward/mean": 0.6540178656578064,
      "rewards/verify_math_reward/std": 0.4759531021118164,
      "step": 709
    },
    {
      "clip_ratio/high_max": 0.0019382916543690953,
      "clip_ratio/high_mean": 0.0007331270335271256,
      "clip_ratio/low_mean": 0.0002815239149640547,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010146509393962333,
      "completions/clipped_ratio": 0.0859375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3715.0,
      "completions/mean_length": 852.4096069335938,
      "completions/mean_terminated_length": 547.4566650390625,
      "completions/min_length": 164.0,
      "completions/min_terminated_length": 164.0,
      "epoch": 6.634402332361516,
      "grad_norm": 45.36116027832031,
      "learning_rate": 1e-06,
      "loss": 0.0086,
      "num_tokens": 404532515.0,
      "reward": 0.6584821939468384,
      "reward_std": 0.14391450583934784,
      "rewards/verify_math_reward/mean": 0.6584821343421936,
      "rewards/verify_math_reward/std": 0.4744836091995239,
      "step": 710
    },
    {
      "clip_ratio/high_max": 0.0013017620076425374,
      "clip_ratio/high_mean": 0.0004013598336314317,
      "clip_ratio/low_mean": 0.00034816852894437034,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000749528353480855,
      "completions/clipped_ratio": 0.0703125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3106.0,
      "completions/mean_length": 813.5201416015625,
      "completions/mean_terminated_length": 565.2652587890625,
      "completions/min_length": 155.0,
      "completions/min_terminated_length": 155.0,
      "epoch": 6.643731778425656,
      "grad_norm": 0.19472123682498932,
      "learning_rate": 1e-06,
      "loss": -0.0259,
      "num_tokens": 405087237.0,
      "reward": 0.6473214626312256,
      "reward_std": 0.11445339024066925,
      "rewards/verify_math_reward/mean": 0.6473214030265808,
      "rewards/verify_math_reward/std": 0.47807058691978455,
      "step": 711
    },
    {
      "clip_ratio/high_max": 0.0016469336114823818,
      "clip_ratio/high_mean": 0.0005770206389570376,
      "clip_ratio/low_mean": 0.0005242761690169573,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001101296813430963,
      "completions/clipped_ratio": 0.1071428571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3273.0,
      "completions/mean_length": 921.318115234375,
      "completions/mean_terminated_length": 540.3562622070312,
      "completions/min_length": 112.0,
      "completions/min_terminated_length": 112.0,
      "epoch": 6.653061224489796,
      "grad_norm": 1109248256.0,
      "learning_rate": 1e-06,
      "loss": 30502.4707,
      "num_tokens": 405615890.0,
      "reward": 0.6049107313156128,
      "reward_std": 0.1540568619966507,
      "rewards/verify_math_reward/mean": 0.6049107313156128,
      "rewards/verify_math_reward/std": 0.48914292454719543,
      "step": 712
    },
    {
      "clip_ratio/high_max": 0.0017509955614514183,
      "clip_ratio/high_mean": 0.0005239955335127888,
      "clip_ratio/low_mean": 0.0002240728153992677,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007480683543690247,
      "completions/clipped_ratio": 0.0625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3967.0,
      "completions/mean_length": 770.5814819335938,
      "completions/mean_terminated_length": 548.8869018554688,
      "completions/min_length": 124.0,
      "completions/min_terminated_length": 124.0,
      "epoch": 6.662390670553936,
      "grad_norm": 0.17097845673561096,
      "learning_rate": 1e-06,
      "loss": -0.0336,
      "num_tokens": 406167971.0,
      "reward": 0.6752232313156128,
      "reward_std": 0.09818372875452042,
      "rewards/verify_math_reward/mean": 0.6752232313156128,
      "rewards/verify_math_reward/std": 0.46855294704437256,
      "step": 713
    },
    {
      "clip_ratio/high_max": 0.0017584624647497549,
      "clip_ratio/high_mean": 0.0005277417051274824,
      "clip_ratio/low_mean": 0.0004908358516786393,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010185775645368267,
      "completions/clipped_ratio": 0.078125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4041.0,
      "completions/mean_length": 852.0379638671875,
      "completions/mean_terminated_length": 577.1259155273438,
      "completions/min_length": 142.0,
      "completions/min_terminated_length": 142.0,
      "epoch": 6.671720116618076,
      "grad_norm": 0.16864407062530518,
      "learning_rate": 1e-06,
      "loss": -0.0298,
      "num_tokens": 406743789.0,
      "reward": 0.613839328289032,
      "reward_std": 0.11629742383956909,
      "rewards/verify_math_reward/mean": 0.6138392686843872,
      "rewards/verify_math_reward/std": 0.48714008927345276,
      "step": 714
    },
    {
      "clip_ratio/high_max": 0.0020065759199496824,
      "clip_ratio/high_mean": 0.0006850403196949628,
      "clip_ratio/low_mean": 0.00035257973286206834,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010376200516475365,
      "completions/clipped_ratio": 0.1060267857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3452.0,
      "completions/mean_length": 905.8605346679688,
      "completions/mean_terminated_length": 527.5043334960938,
      "completions/min_length": 154.0,
      "completions/min_terminated_length": 154.0,
      "epoch": 6.681049562682215,
      "grad_norm": 0.2451452910900116,
      "learning_rate": 1e-06,
      "loss": -0.0423,
      "num_tokens": 407257056.0,
      "reward": 0.6573660969734192,
      "reward_std": 0.1242266595363617,
      "rewards/verify_math_reward/mean": 0.6573660969734192,
      "rewards/verify_math_reward/std": 0.47485533356666565,
      "step": 715
    },
    {
      "clip_ratio/high_max": 0.0021253882805467583,
      "clip_ratio/high_mean": 0.0008916977321860031,
      "clip_ratio/low_mean": 0.0005053585273344652,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001397056257701479,
      "completions/clipped_ratio": 0.0848214285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3484.0,
      "completions/mean_length": 865.935302734375,
      "completions/mean_terminated_length": 566.5634155273438,
      "completions/min_length": 137.0,
      "completions/min_terminated_length": 137.0,
      "epoch": 6.690379008746356,
      "grad_norm": 0.21814365684986115,
      "learning_rate": 1e-06,
      "loss": -0.0215,
      "num_tokens": 407812390.0,
      "reward": 0.5970982313156128,
      "reward_std": 0.17130474746227264,
      "rewards/verify_math_reward/mean": 0.5970982313156128,
      "rewards/verify_math_reward/std": 0.49075525999069214,
      "step": 716
    },
    {
      "clip_ratio/high_max": 0.0018165794099331833,
      "clip_ratio/high_mean": 0.0007312041234399658,
      "clip_ratio/low_mean": 0.0004184676008662791,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011496717233967502,
      "completions/clipped_ratio": 0.0982142857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3870.0,
      "completions/mean_length": 937.302490234375,
      "completions/mean_terminated_length": 593.285888671875,
      "completions/min_length": 131.0,
      "completions/min_terminated_length": 131.0,
      "epoch": 6.699708454810495,
      "grad_norm": 0.204402357339859,
      "learning_rate": 1e-06,
      "loss": -0.0472,
      "num_tokens": 408380773.0,
      "reward": 0.621651828289032,
      "reward_std": 0.1486877053976059,
      "rewards/verify_math_reward/mean": 0.6216517686843872,
      "rewards/verify_math_reward/std": 0.4852459728717804,
      "step": 717
    },
    {
      "clip_ratio/high_max": 0.0015623804483766435,
      "clip_ratio/high_mean": 0.0006160191819617467,
      "clip_ratio/low_mean": 0.0004172031394773512,
      "clip_ratio/low_min": 1.2077294741175137e-05,
      "clip_ratio/region_mean": 0.0010332223173463717,
      "completions/clipped_ratio": 0.0703125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3620.0,
      "completions/mean_length": 801.1986694335938,
      "completions/mean_terminated_length": 552.011962890625,
      "completions/min_length": 142.0,
      "completions/min_terminated_length": 142.0,
      "epoch": 6.709037900874636,
      "grad_norm": 0.2643035352230072,
      "learning_rate": 1e-06,
      "loss": -0.0184,
      "num_tokens": 408924919.0,
      "reward": 0.6796875596046448,
      "reward_std": 0.13771051168441772,
      "rewards/verify_math_reward/mean": 0.6796875,
      "rewards/verify_math_reward/std": 0.4668572247028351,
      "step": 718
    },
    {
      "clip_ratio/high_max": 0.0015866108260524925,
      "clip_ratio/high_mean": 0.0006346721202135086,
      "clip_ratio/low_mean": 0.00043240988088655286,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010670819756342098,
      "completions/clipped_ratio": 0.0636160714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2123.0,
      "completions/mean_length": 771.7522583007812,
      "completions/mean_terminated_length": 545.909423828125,
      "completions/min_length": 159.0,
      "completions/min_terminated_length": 159.0,
      "epoch": 6.718367346938775,
      "grad_norm": 1.0090049505233765,
      "learning_rate": 1e-06,
      "loss": -0.0179,
      "num_tokens": 409481817.0,
      "reward": 0.6696428656578064,
      "reward_std": 0.1456729769706726,
      "rewards/verify_math_reward/mean": 0.6696428656578064,
      "rewards/verify_math_reward/std": 0.47060438990592957,
      "step": 719
    },
    {
      "clip_ratio/high_max": 0.0019794810286839493,
      "clip_ratio/high_mean": 0.0007128198585633072,
      "clip_ratio/low_mean": 0.0005413898770711967,
      "clip_ratio/low_min": 1.2902560229122173e-05,
      "clip_ratio/region_mean": 0.0012542097392724827,
      "completions/clipped_ratio": 0.0848214285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3631.0,
      "completions/mean_length": 856.7020263671875,
      "completions/mean_terminated_length": 556.474365234375,
      "completions/min_length": 151.0,
      "completions/min_terminated_length": 151.0,
      "epoch": 6.727696793002916,
      "grad_norm": 0.2689896523952484,
      "learning_rate": 1e-06,
      "loss": -0.0201,
      "num_tokens": 410027046.0,
      "reward": 0.6183035969734192,
      "reward_std": 0.15315786004066467,
      "rewards/verify_math_reward/mean": 0.6183035969734192,
      "rewards/verify_math_reward/std": 0.4860740303993225,
      "step": 720
    },
    {
      "clip_ratio/high_max": 0.0017890060844365507,
      "clip_ratio/high_mean": 0.0006061323074391112,
      "clip_ratio/low_mean": 0.0003104347533735563,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009165670671791304,
      "completions/clipped_ratio": 0.0714285714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3616.0,
      "completions/mean_length": 804.7042846679688,
      "completions/mean_terminated_length": 551.5276489257812,
      "completions/min_length": 136.0,
      "completions/min_terminated_length": 136.0,
      "epoch": 6.737026239067055,
      "grad_norm": 0.20530693233013153,
      "learning_rate": 1e-06,
      "loss": -0.0207,
      "num_tokens": 410566837.0,
      "reward": 0.6774553656578064,
      "reward_std": 0.12767691910266876,
      "rewards/verify_math_reward/mean": 0.6774553656578064,
      "rewards/verify_math_reward/std": 0.4677111804485321,
      "step": 721
    },
    {
      "clip_ratio/high_max": 0.0017850015283329412,
      "clip_ratio/high_mean": 0.0006699838495478616,
      "clip_ratio/low_mean": 0.00028910670334880706,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009590905501681846,
      "completions/clipped_ratio": 0.0803571428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3791.0,
      "completions/mean_length": 813.3080444335938,
      "completions/mean_terminated_length": 526.4708862304688,
      "completions/min_length": 160.0,
      "completions/min_terminated_length": 160.0,
      "epoch": 6.746355685131196,
      "grad_norm": 0.2605624794960022,
      "learning_rate": 1e-06,
      "loss": -0.0302,
      "num_tokens": 411103425.0,
      "reward": 0.637276828289032,
      "reward_std": 0.12760023772716522,
      "rewards/verify_math_reward/mean": 0.6372767686843872,
      "rewards/verify_math_reward/std": 0.481054425239563,
      "step": 722
    },
    {
      "clip_ratio/high_max": 0.0013954752721474506,
      "clip_ratio/high_mean": 0.00039860400420366204,
      "clip_ratio/low_mean": 0.00026434793812768476,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006629519339185208,
      "completions/clipped_ratio": 0.0647321428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2090.0,
      "completions/mean_length": 752.2533569335938,
      "completions/mean_terminated_length": 520.8245849609375,
      "completions/min_length": 145.0,
      "completions/min_terminated_length": 145.0,
      "epoch": 6.755685131195335,
      "grad_norm": 0.9102131128311157,
      "learning_rate": 1e-06,
      "loss": -0.0267,
      "num_tokens": 411627812.0,
      "reward": 0.625,
      "reward_std": 0.09119697660207748,
      "rewards/verify_math_reward/mean": 0.625,
      "rewards/verify_math_reward/std": 0.48439329862594604,
      "step": 723
    },
    {
      "clip_ratio/high_max": 0.0020355298402137123,
      "clip_ratio/high_mean": 0.0007807658621459268,
      "clip_ratio/low_mean": 0.0005408401111708372,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0013216059505793964,
      "completions/clipped_ratio": 0.0580357142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1668.0,
      "completions/mean_length": 734.8392944335938,
      "completions/mean_terminated_length": 527.7536010742188,
      "completions/min_length": 157.0,
      "completions/min_terminated_length": 157.0,
      "epoch": 6.765014577259475,
      "grad_norm": 0.2034630924463272,
      "learning_rate": 1e-06,
      "loss": -0.0549,
      "num_tokens": 412154268.0,
      "reward": 0.6897321939468384,
      "reward_std": 0.14902472496032715,
      "rewards/verify_math_reward/mean": 0.6897321343421936,
      "rewards/verify_math_reward/std": 0.4628615975379944,
      "step": 724
    },
    {
      "clip_ratio/high_max": 0.001617327634448884,
      "clip_ratio/high_mean": 0.0005615910349661135,
      "clip_ratio/low_mean": 0.0002966302085951611,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008582212394685484,
      "completions/clipped_ratio": 0.0870535714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3917.0,
      "completions/mean_length": 881.1027221679688,
      "completions/mean_terminated_length": 574.5477294921875,
      "completions/min_length": 126.0,
      "completions/min_terminated_length": 126.0,
      "epoch": 6.774344023323615,
      "grad_norm": 0.2027897983789444,
      "learning_rate": 1e-06,
      "loss": -0.0287,
      "num_tokens": 412709896.0,
      "reward": 0.6116071939468384,
      "reward_std": 0.13113674521446228,
      "rewards/verify_math_reward/mean": 0.6116071343421936,
      "rewards/verify_math_reward/std": 0.4876568913459778,
      "step": 725
    },
    {
      "clip_ratio/high_max": 0.002235222033050377,
      "clip_ratio/high_mean": 0.000967257887168671,
      "clip_ratio/low_mean": 0.0004345703191575012,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0014018282199685927,
      "completions/clipped_ratio": 0.0859375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2156.0,
      "completions/mean_length": 845.0111694335938,
      "completions/mean_terminated_length": 539.3626708984375,
      "completions/min_length": 164.0,
      "completions/min_terminated_length": 164.0,
      "epoch": 6.783673469387755,
      "grad_norm": 1.8573672771453857,
      "learning_rate": 1e-06,
      "loss": -0.0735,
      "num_tokens": 413239082.0,
      "reward": 0.6696428656578064,
      "reward_std": 0.17574027180671692,
      "rewards/verify_math_reward/mean": 0.6696428656578064,
      "rewards/verify_math_reward/std": 0.47060438990592957,
      "step": 726
    },
    {
      "clip_ratio/high_max": 0.0015628930868842872,
      "clip_ratio/high_mean": 0.0005348108979887911,
      "clip_ratio/low_mean": 0.00035897696534448187,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008937878665165044,
      "completions/clipped_ratio": 0.0926339285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2173.0,
      "completions/mean_length": 875.8471069335938,
      "completions/mean_terminated_length": 547.098388671875,
      "completions/min_length": 142.0,
      "completions/min_terminated_length": 142.0,
      "epoch": 6.793002915451895,
      "grad_norm": 0.19924987852573395,
      "learning_rate": 1e-06,
      "loss": -0.0646,
      "num_tokens": 413774273.0,
      "reward": 0.5970982313156128,
      "reward_std": 0.13835102319717407,
      "rewards/verify_math_reward/mean": 0.5970982313156128,
      "rewards/verify_math_reward/std": 0.49075525999069214,
      "step": 727
    },
    {
      "clip_ratio/high_max": 0.0018707213748712093,
      "clip_ratio/high_mean": 0.0007416300595650682,
      "clip_ratio/low_mean": 0.00043128803736181,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011729180878319312,
      "completions/clipped_ratio": 0.0948660714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3907.0,
      "completions/mean_length": 906.9732666015625,
      "completions/mean_terminated_length": 572.73486328125,
      "completions/min_length": 128.0,
      "completions/min_terminated_length": 128.0,
      "epoch": 6.802332361516035,
      "grad_norm": 0.21239390969276428,
      "learning_rate": 1e-06,
      "loss": -0.065,
      "num_tokens": 414322769.0,
      "reward": 0.5859375,
      "reward_std": 0.1555984914302826,
      "rewards/verify_math_reward/mean": 0.5859375,
      "rewards/verify_math_reward/std": 0.4928344786167145,
      "step": 728
    },
    {
      "clip_ratio/high_max": 0.0017851747797976714,
      "clip_ratio/high_mean": 0.0007487396505894139,
      "clip_ratio/low_mean": 0.0004084533720742911,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011571930153877474,
      "completions/clipped_ratio": 0.0792410714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2880.0,
      "completions/mean_length": 847.0301513671875,
      "completions/mean_terminated_length": 567.4218139648438,
      "completions/min_length": 119.0,
      "completions/min_terminated_length": 119.0,
      "epoch": 6.811661807580175,
      "grad_norm": 0.40516436100006104,
      "learning_rate": 1e-06,
      "loss": -0.0535,
      "num_tokens": 414872268.0,
      "reward": 0.6752232313156128,
      "reward_std": 0.16096945106983185,
      "rewards/verify_math_reward/mean": 0.6752232313156128,
      "rewards/verify_math_reward/std": 0.46855294704437256,
      "step": 729
    },
    {
      "clip_ratio/high_max": 0.0018265884900756646,
      "clip_ratio/high_mean": 0.0005265837353363167,
      "clip_ratio/low_mean": 0.0005540081328945234,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010805918573169038,
      "completions/clipped_ratio": 0.0926339285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3536.0,
      "completions/mean_length": 879.1038208007812,
      "completions/mean_terminated_length": 550.6875610351562,
      "completions/min_length": 164.0,
      "completions/min_terminated_length": 164.0,
      "epoch": 6.820991253644315,
      "grad_norm": 0.25719979405403137,
      "learning_rate": 1e-06,
      "loss": -0.0195,
      "num_tokens": 415396513.0,
      "reward": 0.5680803656578064,
      "reward_std": 0.13752618432044983,
      "rewards/verify_math_reward/mean": 0.5680803656578064,
      "rewards/verify_math_reward/std": 0.4956200420856476,
      "step": 730
    },
    {
      "clip_ratio/high_max": 0.001776021550540463,
      "clip_ratio/high_mean": 0.0006765694761270424,
      "clip_ratio/low_mean": 0.0003450459280429641,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010216153896180913,
      "completions/clipped_ratio": 0.1104910714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2321.0,
      "completions/mean_length": 939.0480346679688,
      "completions/mean_terminated_length": 546.9046630859375,
      "completions/min_length": 173.0,
      "completions/min_terminated_length": 173.0,
      "epoch": 6.830320699708455,
      "grad_norm": 0.25183525681495667,
      "learning_rate": 1e-06,
      "loss": -0.0372,
      "num_tokens": 415921196.0,
      "reward": 0.5993303656578064,
      "reward_std": 0.14158260822296143,
      "rewards/verify_math_reward/mean": 0.5993303656578064,
      "rewards/verify_math_reward/std": 0.49030786752700806,
      "step": 731
    },
    {
      "clip_ratio/high_max": 0.001664623723627301,
      "clip_ratio/high_mean": 0.0005984655344946077,
      "clip_ratio/low_mean": 0.0004878929175902158,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010863584830076434,
      "completions/clipped_ratio": 0.0859375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4065.0,
      "completions/mean_length": 886.1920166015625,
      "completions/mean_terminated_length": 584.4151611328125,
      "completions/min_length": 116.0,
      "completions/min_terminated_length": 116.0,
      "epoch": 6.839650145772595,
      "grad_norm": 0.22967371344566345,
      "learning_rate": 1e-06,
      "loss": -0.035,
      "num_tokens": 416490416.0,
      "reward": 0.6361607313156128,
      "reward_std": 0.1456383466720581,
      "rewards/verify_math_reward/mean": 0.6361607313156128,
      "rewards/verify_math_reward/std": 0.4813718795776367,
      "step": 732
    },
    {
      "clip_ratio/high_max": 0.002098257900797762,
      "clip_ratio/high_mean": 0.0007389553775283275,
      "clip_ratio/low_mean": 0.00045374384535534773,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011926992301596329,
      "completions/clipped_ratio": 0.0859375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3985.0,
      "completions/mean_length": 851.9185791015625,
      "completions/mean_terminated_length": 546.91943359375,
      "completions/min_length": 134.0,
      "completions/min_terminated_length": 134.0,
      "epoch": 6.848979591836734,
      "grad_norm": 0.2931455075740814,
      "learning_rate": 1e-06,
      "loss": -0.0326,
      "num_tokens": 417039431.0,
      "reward": 0.6395089626312256,
      "reward_std": 0.1689721643924713,
      "rewards/verify_math_reward/mean": 0.6395089030265808,
      "rewards/verify_math_reward/std": 0.4804111421108246,
      "step": 733
    },
    {
      "clip_ratio/high_max": 0.00213176980469143,
      "clip_ratio/high_mean": 0.0006978256369620794,
      "clip_ratio/low_mean": 0.00043826582168549066,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011360914322722238,
      "completions/clipped_ratio": 0.1015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4002.0,
      "completions/mean_length": 897.9397583007812,
      "completions/mean_terminated_length": 536.4198608398438,
      "completions/min_length": 138.0,
      "completions/min_terminated_length": 138.0,
      "epoch": 6.858309037900875,
      "grad_norm": 0.21264854073524475,
      "learning_rate": 1e-06,
      "loss": -0.0384,
      "num_tokens": 417559961.0,
      "reward": 0.6227678656578064,
      "reward_std": 0.143612802028656,
      "rewards/verify_math_reward/mean": 0.6227678656578064,
      "rewards/verify_math_reward/std": 0.4849644899368286,
      "step": 734
    },
    {
      "clip_ratio/high_max": 0.0015545234564342536,
      "clip_ratio/high_mean": 0.0006458678044509725,
      "clip_ratio/low_mean": 0.0004670431944759912,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001112910973461112,
      "completions/clipped_ratio": 0.109375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2442.0,
      "completions/mean_length": 928.6719360351562,
      "completions/mean_terminated_length": 539.7017211914062,
      "completions/min_length": 168.0,
      "completions/min_terminated_length": 168.0,
      "epoch": 6.867638483965014,
      "grad_norm": 0.2152799367904663,
      "learning_rate": 1e-06,
      "loss": -0.0297,
      "num_tokens": 418090427.0,
      "reward": 0.6104910969734192,
      "reward_std": 0.13027937710285187,
      "rewards/verify_math_reward/mean": 0.6104910969734192,
      "rewards/verify_math_reward/std": 0.48791128396987915,
      "step": 735
    },
    {
      "clip_ratio/high_max": 0.0021052377014711965,
      "clip_ratio/high_mean": 0.0008278625191451283,
      "clip_ratio/low_mean": 0.0005098735819046851,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0013377360883168876,
      "completions/clipped_ratio": 0.0736607142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3988.0,
      "completions/mean_length": 805.4766235351562,
      "completions/mean_terminated_length": 543.8204956054688,
      "completions/min_length": 156.0,
      "completions/min_terminated_length": 156.0,
      "epoch": 6.876967930029155,
      "grad_norm": 0.2666328549385071,
      "learning_rate": 1e-06,
      "loss": -0.0377,
      "num_tokens": 418641934.0,
      "reward": 0.6707589626312256,
      "reward_std": 0.15349483489990234,
      "rewards/verify_math_reward/mean": 0.6707589030265808,
      "rewards/verify_math_reward/std": 0.4702001214027405,
      "step": 736
    },
    {
      "clip_ratio/high_max": 0.0012388359282340389,
      "clip_ratio/high_mean": 0.00039999733917284175,
      "clip_ratio/low_mean": 0.00034508345288486453,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007450808043358847,
      "completions/clipped_ratio": 0.0647321428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3093.0,
      "completions/mean_length": 754.3158569335938,
      "completions/mean_terminated_length": 523.0298461914062,
      "completions/min_length": 164.0,
      "completions/min_terminated_length": 164.0,
      "epoch": 6.886297376093294,
      "grad_norm": 0.19998709857463837,
      "learning_rate": 1e-06,
      "loss": -0.0192,
      "num_tokens": 419179249.0,
      "reward": 0.6517857313156128,
      "reward_std": 0.11419376730918884,
      "rewards/verify_math_reward/mean": 0.6517857313156128,
      "rewards/verify_math_reward/std": 0.47667041420936584,
      "step": 737
    },
    {
      "clip_ratio/high_max": 0.0018614743385114707,
      "clip_ratio/high_mean": 0.0006670547882094979,
      "clip_ratio/low_mean": 0.0005492903319463949,
      "clip_ratio/low_min": 1.2512512512330431e-05,
      "clip_ratio/region_mean": 0.0012163451392552815,
      "completions/clipped_ratio": 0.109375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2785.0,
      "completions/mean_length": 967.966552734375,
      "completions/mean_terminated_length": 583.822021484375,
      "completions/min_length": 184.0,
      "completions/min_terminated_length": 184.0,
      "epoch": 6.895626822157435,
      "grad_norm": 0.19895361363887787,
      "learning_rate": 1e-06,
      "loss": -0.0415,
      "num_tokens": 419739211.0,
      "reward": 0.5725446939468384,
      "reward_std": 0.1481582224369049,
      "rewards/verify_math_reward/mean": 0.5725446343421936,
      "rewards/verify_math_reward/std": 0.49498558044433594,
      "step": 738
    },
    {
      "clip_ratio/high_max": 0.0014793432565056719,
      "clip_ratio/high_mean": 0.0005714016688216361,
      "clip_ratio/low_mean": 0.0005917988237342797,
      "clip_ratio/low_min": 3.0882758437655866e-05,
      "clip_ratio/region_mean": 0.0011632004916464211,
      "completions/clipped_ratio": 0.1004464285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3461.0,
      "completions/mean_length": 876.9297485351562,
      "completions/mean_terminated_length": 517.4801635742188,
      "completions/min_length": 105.0,
      "completions/min_terminated_length": 105.0,
      "epoch": 6.904956268221574,
      "grad_norm": 0.2275323122739792,
      "learning_rate": 1e-06,
      "loss": -0.0231,
      "num_tokens": 420237484.0,
      "reward": 0.6473214626312256,
      "reward_std": 0.1413978785276413,
      "rewards/verify_math_reward/mean": 0.6473214030265808,
      "rewards/verify_math_reward/std": 0.47807058691978455,
      "step": 739
    },
    {
      "clip_ratio/high_max": 0.001665983862039866,
      "clip_ratio/high_mean": 0.00047589814585080603,
      "clip_ratio/low_mean": 0.0003746619590856426,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008505601072101854,
      "completions/clipped_ratio": 0.1015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2560.0,
      "completions/mean_length": 907.9219360351562,
      "completions/mean_terminated_length": 547.5303955078125,
      "completions/min_length": 148.0,
      "completions/min_terminated_length": 148.0,
      "epoch": 6.914285714285715,
      "grad_norm": 0.2475191056728363,
      "learning_rate": 1e-06,
      "loss": -0.0283,
      "num_tokens": 420772646.0,
      "reward": 0.5870535969734192,
      "reward_std": 0.12031038105487823,
      "rewards/verify_math_reward/mean": 0.5870535969734192,
      "rewards/verify_math_reward/std": 0.49263834953308105,
      "step": 740
    },
    {
      "clip_ratio/high_max": 0.0019404951526666991,
      "clip_ratio/high_mean": 0.0007807975161995273,
      "clip_ratio/low_mean": 0.00048363557652919553,
      "clip_ratio/low_min": 3.6296260077506304e-05,
      "clip_ratio/region_mean": 0.0012644331000046805,
      "completions/clipped_ratio": 0.0770089285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3166.0,
      "completions/mean_length": 792.4933471679688,
      "completions/mean_terminated_length": 516.8682250976562,
      "completions/min_length": 129.0,
      "completions/min_terminated_length": 129.0,
      "epoch": 6.923615160349854,
      "grad_norm": 0.286824107170105,
      "learning_rate": 1e-06,
      "loss": -0.0509,
      "num_tokens": 421280584.0,
      "reward": 0.6662946939468384,
      "reward_std": 0.17228437960147858,
      "rewards/verify_math_reward/mean": 0.6662946343421936,
      "rewards/verify_math_reward/std": 0.47179922461509705,
      "step": 741
    },
    {
      "clip_ratio/high_max": 0.0018482838677300606,
      "clip_ratio/high_mean": 0.0005934440705459565,
      "clip_ratio/low_mean": 0.0006454563026636606,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001238900385942543,
      "completions/clipped_ratio": 0.0736607142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3417.0,
      "completions/mean_length": 798.521240234375,
      "completions/mean_terminated_length": 536.3120727539062,
      "completions/min_length": 123.0,
      "completions/min_terminated_length": 123.0,
      "epoch": 6.932944606413994,
      "grad_norm": 0.23824188113212585,
      "learning_rate": 1e-06,
      "loss": -0.0232,
      "num_tokens": 421810227.0,
      "reward": 0.652901828289032,
      "reward_std": 0.15338465571403503,
      "rewards/verify_math_reward/mean": 0.6529017686843872,
      "rewards/verify_math_reward/std": 0.47631317377090454,
      "step": 742
    },
    {
      "clip_ratio/high_max": 0.001606914273907023,
      "clip_ratio/high_mean": 0.0005033282220665569,
      "clip_ratio/low_mean": 0.0003377502998773707,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008410785121668596,
      "completions/clipped_ratio": 0.0691964285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3995.0,
      "completions/mean_length": 758.1663208007812,
      "completions/mean_terminated_length": 510.0299987792969,
      "completions/min_length": 160.0,
      "completions/min_terminated_length": 160.0,
      "epoch": 6.942274052478134,
      "grad_norm": 0.5451865792274475,
      "learning_rate": 1e-06,
      "loss": -0.023,
      "num_tokens": 422330560.0,
      "reward": 0.6495535969734192,
      "reward_std": 0.11675135046243668,
      "rewards/verify_math_reward/mean": 0.6495535969734192,
      "rewards/verify_math_reward/std": 0.477376252412796,
      "step": 743
    },
    {
      "clip_ratio/high_max": 0.001633227540878579,
      "clip_ratio/high_mean": 0.0006699478708469542,
      "clip_ratio/low_mean": 0.0005339775061656837,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012039253997500055,
      "completions/clipped_ratio": 0.0993303571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4063.0,
      "completions/mean_length": 917.8404541015625,
      "completions/mean_terminated_length": 567.3370361328125,
      "completions/min_length": 120.0,
      "completions/min_terminated_length": 120.0,
      "epoch": 6.9516034985422746,
      "grad_norm": 0.3352295160293579,
      "learning_rate": 1e-06,
      "loss": -0.0321,
      "num_tokens": 422874705.0,
      "reward": 0.566964328289032,
      "reward_std": 0.1612718552350998,
      "rewards/verify_math_reward/mean": 0.5669642686843872,
      "rewards/verify_math_reward/std": 0.49577224254608154,
      "step": 744
    },
    {
      "clip_ratio/high_max": 0.00205148749955697,
      "clip_ratio/high_mean": 0.000676715019835683,
      "clip_ratio/low_mean": 0.000427590067829442,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001104305089029367,
      "completions/clipped_ratio": 0.0848214285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2751.0,
      "completions/mean_length": 851.4029541015625,
      "completions/mean_terminated_length": 550.6841430664062,
      "completions/min_length": 125.0,
      "completions/min_terminated_length": 125.0,
      "epoch": 6.960932944606414,
      "grad_norm": 0.22099699079990387,
      "learning_rate": 1e-06,
      "loss": -0.0384,
      "num_tokens": 423416826.0,
      "reward": 0.6506696939468384,
      "reward_std": 0.1401529759168625,
      "rewards/verify_math_reward/mean": 0.6506696343421936,
      "rewards/verify_math_reward/std": 0.47702476382255554,
      "step": 745
    },
    {
      "clip_ratio/high_max": 0.0021962550017633475,
      "clip_ratio/high_mean": 0.0007704723448114237,
      "clip_ratio/low_mean": 0.00033543988092787913,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011059122334700078,
      "completions/clipped_ratio": 0.0770089285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3455.0,
      "completions/mean_length": 812.8326416015625,
      "completions/mean_terminated_length": 538.9044799804688,
      "completions/min_length": 148.0,
      "completions/min_terminated_length": 148.0,
      "epoch": 6.970262390670554,
      "grad_norm": 0.21213407814502716,
      "learning_rate": 1e-06,
      "loss": -0.0406,
      "num_tokens": 423953420.0,
      "reward": 0.6796875596046448,
      "reward_std": 0.14248555898666382,
      "rewards/verify_math_reward/mean": 0.6796875,
      "rewards/verify_math_reward/std": 0.4668572247028351,
      "step": 746
    },
    {
      "clip_ratio/high_max": 0.001903683692944469,
      "clip_ratio/high_mean": 0.000747757512726821,
      "clip_ratio/low_mean": 0.0005013756072003162,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001249133114470169,
      "completions/clipped_ratio": 0.0814732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4029.0,
      "completions/mean_length": 823.5089721679688,
      "completions/mean_terminated_length": 533.2393798828125,
      "completions/min_length": 127.0,
      "completions/min_terminated_length": 127.0,
      "epoch": 6.979591836734694,
      "grad_norm": 0.2869330942630768,
      "learning_rate": 1e-06,
      "loss": -0.0155,
      "num_tokens": 424478788.0,
      "reward": 0.6417410969734192,
      "reward_std": 0.15575045347213745,
      "rewards/verify_math_reward/mean": 0.6417410969734192,
      "rewards/verify_math_reward/std": 0.47975656390190125,
      "step": 747
    },
    {
      "clip_ratio/high_max": 0.0017768329671525862,
      "clip_ratio/high_mean": 0.0005946480705460999,
      "clip_ratio/low_mean": 0.0003190028014614654,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009136508633673657,
      "completions/clipped_ratio": 0.0881696428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3612.0,
      "completions/mean_length": 877.8114013671875,
      "completions/mean_terminated_length": 566.6279296875,
      "completions/min_length": 142.0,
      "completions/min_terminated_length": 142.0,
      "epoch": 6.988921282798834,
      "grad_norm": 0.2015768140554428,
      "learning_rate": 1e-06,
      "loss": -0.0324,
      "num_tokens": 425022099.0,
      "reward": 0.6852678656578064,
      "reward_std": 0.11419377475976944,
      "rewards/verify_math_reward/mean": 0.6852678656578064,
      "rewards/verify_math_reward/std": 0.46466848254203796,
      "step": 748
    },
    {
      "clip_ratio/high_max": 0.0020705315000668634,
      "clip_ratio/high_mean": 0.0007473792957171099,
      "clip_ratio/low_mean": 0.0005184333222132409,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001265812614292372,
      "completions/clipped_ratio": 0.08806818181818177,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2477.0,
      "completions/mean_length": 816.102294921875,
      "completions/mean_terminated_length": 499.3520202636719,
      "completions/min_length": 112.0,
      "completions/min_terminated_length": 112.0,
      "epoch": 6.998250728862974,
      "grad_norm": 0.21387995779514313,
      "learning_rate": 1e-06,
      "loss": -0.0716,
      "num_tokens": 425536239.0,
      "reward": 0.574776828289032,
      "reward_std": 0.15417632460594177,
      "rewards/verify_math_reward/mean": 0.5747767686843872,
      "rewards/verify_math_reward/std": 0.49465295672416687,
      "step": 749
    },
    {
      "clip_ratio/high_max": 0.00155554620505427,
      "clip_ratio/high_mean": 0.0005096999029774452,
      "clip_ratio/low_mean": 0.0005069395642749441,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010166394840780413,
      "completions/clipped_ratio": 0.0848214285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2647.0,
      "completions/mean_length": 826.6506958007812,
      "completions/mean_terminated_length": 523.6378173828125,
      "completions/min_length": 143.0,
      "completions/min_terminated_length": 143.0,
      "epoch": 7.0093294460641395,
      "grad_norm": 0.2210661917924881,
      "learning_rate": 1e-06,
      "loss": -0.0454,
      "num_tokens": 426061510.0,
      "reward": 0.6339285969734192,
      "reward_std": 0.14815637469291687,
      "rewards/verify_math_reward/mean": 0.6339285969734192,
      "rewards/verify_math_reward/std": 0.48199835419654846,
      "step": 750
    },
    {
      "clip_ratio/high_max": 0.0018054342217510566,
      "clip_ratio/high_mean": 0.0006222102547326358,
      "clip_ratio/low_mean": 0.00036785796783078695,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009900682234729175,
      "completions/clipped_ratio": 0.0814732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3571.0,
      "completions/mean_length": 822.5391235351562,
      "completions/mean_terminated_length": 532.1834716796875,
      "completions/min_length": 134.0,
      "completions/min_terminated_length": 134.0,
      "epoch": 7.01865889212828,
      "grad_norm": 0.21002645790576935,
      "learning_rate": 1e-06,
      "loss": -0.0197,
      "num_tokens": 426590681.0,
      "reward": 0.6908482313156128,
      "reward_std": 0.1256142258644104,
      "rewards/verify_math_reward/mean": 0.6908482313156128,
      "rewards/verify_math_reward/std": 0.46240198612213135,
      "step": 751
    },
    {
      "clip_ratio/high_max": 0.0018424143236188684,
      "clip_ratio/high_mean": 0.000759250679948309,
      "clip_ratio/low_mean": 0.00038130590655782726,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001140556614700472,
      "completions/clipped_ratio": 0.1049107142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3871.0,
      "completions/mean_length": 920.7332763671875,
      "completions/mean_terminated_length": 548.56982421875,
      "completions/min_length": 158.0,
      "completions/min_terminated_length": 158.0,
      "epoch": 7.0279883381924195,
      "grad_norm": 0.22753813862800598,
      "learning_rate": 1e-06,
      "loss": -0.0525,
      "num_tokens": 427114946.0,
      "reward": 0.652901828289032,
      "reward_std": 0.14669284224510193,
      "rewards/verify_math_reward/mean": 0.6529017686843872,
      "rewards/verify_math_reward/std": 0.47631317377090454,
      "step": 752
    },
    {
      "clip_ratio/high_max": 0.0020993458601878956,
      "clip_ratio/high_mean": 0.0006957223295103176,
      "clip_ratio/low_mean": 0.0003861168702314899,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010818392147484701,
      "completions/clipped_ratio": 0.0948660714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4013.0,
      "completions/mean_length": 905.4498291015625,
      "completions/mean_terminated_length": 571.0517578125,
      "completions/min_length": 149.0,
      "completions/min_terminated_length": 149.0,
      "epoch": 7.03731778425656,
      "grad_norm": 0.23804092407226562,
      "learning_rate": 1e-06,
      "loss": -0.0381,
      "num_tokens": 427663725.0,
      "reward": 0.6350446939468384,
      "reward_std": 0.13549810647964478,
      "rewards/verify_math_reward/mean": 0.6350446343421936,
      "rewards/verify_math_reward/std": 0.481686532497406,
      "step": 753
    },
    {
      "clip_ratio/high_max": 0.0021927592824795283,
      "clip_ratio/high_mean": 0.0008181643352145329,
      "clip_ratio/low_mean": 0.0005232930584497808,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0013414574059424922,
      "completions/clipped_ratio": 0.09375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4064.0,
      "completions/mean_length": 884.075927734375,
      "completions/mean_terminated_length": 551.807861328125,
      "completions/min_length": 156.0,
      "completions/min_terminated_length": 156.0,
      "epoch": 7.0466472303206995,
      "grad_norm": 0.4311201572418213,
      "learning_rate": 1e-06,
      "loss": -0.023,
      "num_tokens": 428198697.0,
      "reward": 0.640625,
      "reward_std": 0.15567448735237122,
      "rewards/verify_math_reward/mean": 0.640625,
      "rewards/verify_math_reward/std": 0.48008525371551514,
      "step": 754
    },
    {
      "clip_ratio/high_max": 0.001530787460069405,
      "clip_ratio/high_mean": 0.0005595842558250297,
      "clip_ratio/low_mean": 0.00039646024833928095,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009560445068927947,
      "completions/clipped_ratio": 0.0691964285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3343.0,
      "completions/mean_length": 801.9207763671875,
      "completions/mean_terminated_length": 557.0371704101562,
      "completions/min_length": 149.0,
      "completions/min_terminated_length": 149.0,
      "epoch": 7.05597667638484,
      "grad_norm": 0.20450225472450256,
      "learning_rate": 1e-06,
      "loss": -0.0262,
      "num_tokens": 428763946.0,
      "reward": 0.645089328289032,
      "reward_std": 0.1300102025270462,
      "rewards/verify_math_reward/mean": 0.6450892686843872,
      "rewards/verify_math_reward/std": 0.4787535071372986,
      "step": 755
    },
    {
      "clip_ratio/high_max": 0.0017400143115082756,
      "clip_ratio/high_mean": 0.0006631210417253897,
      "clip_ratio/low_mean": 0.0003824581331173249,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001045579178025946,
      "completions/clipped_ratio": 0.0803571428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3717.0,
      "completions/mean_length": 849.6484985351562,
      "completions/mean_terminated_length": 565.9866333007812,
      "completions/min_length": 170.0,
      "completions/min_terminated_length": 170.0,
      "epoch": 7.0653061224489795,
      "grad_norm": 0.21997500956058502,
      "learning_rate": 1e-06,
      "loss": -0.0276,
      "num_tokens": 429319391.0,
      "reward": 0.660714328289032,
      "reward_std": 0.13842590153217316,
      "rewards/verify_math_reward/mean": 0.6607142686843872,
      "rewards/verify_math_reward/std": 0.4737313389778137,
      "step": 756
    },
    {
      "clip_ratio/high_max": 0.0018967443211295176,
      "clip_ratio/high_mean": 0.000704463614965789,
      "clip_ratio/low_mean": 0.0002965810149362369,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010010446276282892,
      "completions/clipped_ratio": 0.0948660714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3225.0,
      "completions/mean_length": 846.357177734375,
      "completions/mean_terminated_length": 505.7657165527344,
      "completions/min_length": 158.0,
      "completions/min_terminated_length": 158.0,
      "epoch": 7.07463556851312,
      "grad_norm": 1.8910417556762695,
      "learning_rate": 1e-06,
      "loss": -0.0454,
      "num_tokens": 429816807.0,
      "reward": 0.6640625,
      "reward_std": 0.1328292191028595,
      "rewards/verify_math_reward/mean": 0.6640625,
      "rewards/verify_math_reward/std": 0.4725809693336487,
      "step": 757
    },
    {
      "clip_ratio/high_max": 0.001921982580824988,
      "clip_ratio/high_mean": 0.0006678210411337204,
      "clip_ratio/low_mean": 0.0003846672611871327,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010524883018661058,
      "completions/clipped_ratio": 0.0870535714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3981.0,
      "completions/mean_length": 838.3225708007812,
      "completions/mean_terminated_length": 527.6882934570312,
      "completions/min_length": 142.0,
      "completions/min_terminated_length": 142.0,
      "epoch": 7.0839650145772595,
      "grad_norm": 0.2775750160217285,
      "learning_rate": 1e-06,
      "loss": -0.023,
      "num_tokens": 430336448.0,
      "reward": 0.6886160969734192,
      "reward_std": 0.1339571624994278,
      "rewards/verify_math_reward/mean": 0.6886160969734192,
      "rewards/verify_math_reward/std": 0.46331802010536194,
      "step": 758
    },
    {
      "clip_ratio/high_max": 0.001697431940556271,
      "clip_ratio/high_mean": 0.0005855698491359362,
      "clip_ratio/low_mean": 0.0004389806065319135,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010245504636259284,
      "completions/clipped_ratio": 0.0814732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3409.0,
      "completions/mean_length": 816.7388916015625,
      "completions/mean_terminated_length": 525.8687744140625,
      "completions/min_length": 124.0,
      "completions/min_terminated_length": 124.0,
      "epoch": 7.093294460641399,
      "grad_norm": 0.25227269530296326,
      "learning_rate": 1e-06,
      "loss": -0.041,
      "num_tokens": 430861438.0,
      "reward": 0.6506696939468384,
      "reward_std": 0.15138980746269226,
      "rewards/verify_math_reward/mean": 0.6506696343421936,
      "rewards/verify_math_reward/std": 0.47702476382255554,
      "step": 759
    },
    {
      "clip_ratio/high_max": 0.001748860981024336,
      "clip_ratio/high_mean": 0.0007294713250303175,
      "clip_ratio/low_mean": 0.0003953832233491994,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011248545670241583,
      "completions/clipped_ratio": 0.0703125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4091.0,
      "completions/mean_length": 818.3225708007812,
      "completions/mean_terminated_length": 570.4309692382812,
      "completions/min_length": 126.0,
      "completions/min_terminated_length": 126.0,
      "epoch": 7.1026239067055394,
      "grad_norm": 0.20775267481803894,
      "learning_rate": 1e-06,
      "loss": -0.0319,
      "num_tokens": 431428527.0,
      "reward": 0.6651785969734192,
      "reward_std": 0.1403031200170517,
      "rewards/verify_math_reward/mean": 0.6651785969734192,
      "rewards/verify_math_reward/std": 0.47219160199165344,
      "step": 760
    },
    {
      "clip_ratio/high_max": 0.0017372585971315857,
      "clip_ratio/high_mean": 0.0005428749236671138,
      "clip_ratio/low_mean": 0.00031455166845262283,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008574265866627684,
      "completions/clipped_ratio": 0.0904017857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4086.0,
      "completions/mean_length": 858.169677734375,
      "completions/mean_terminated_length": 536.3729858398438,
      "completions/min_length": 132.0,
      "completions/min_terminated_length": 132.0,
      "epoch": 7.111953352769679,
      "grad_norm": 0.18922269344329834,
      "learning_rate": 1e-06,
      "loss": -0.0222,
      "num_tokens": 431948399.0,
      "reward": 0.6595982313156128,
      "reward_std": 0.10941943526268005,
      "rewards/verify_math_reward/mean": 0.6595982313156128,
      "rewards/verify_math_reward/std": 0.4741089344024658,
      "step": 761
    },
    {
      "clip_ratio/high_max": 0.0013112820088281296,
      "clip_ratio/high_mean": 0.0005209065923281742,
      "clip_ratio/low_mean": 0.00040278148708239314,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009236880778189516,
      "completions/clipped_ratio": 0.109375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3489.0,
      "completions/mean_length": 967.7410888671875,
      "completions/mean_terminated_length": 583.5689086914062,
      "completions/min_length": 129.0,
      "completions/min_terminated_length": 129.0,
      "epoch": 7.121282798833819,
      "grad_norm": 0.18740127980709076,
      "learning_rate": 1e-06,
      "loss": -0.0377,
      "num_tokens": 432504431.0,
      "reward": 0.6049107313156128,
      "reward_std": 0.11501862108707428,
      "rewards/verify_math_reward/mean": 0.6049107313156128,
      "rewards/verify_math_reward/std": 0.48914292454719543,
      "step": 762
    },
    {
      "clip_ratio/high_max": 0.0021683051927539054,
      "clip_ratio/high_mean": 0.0006840880923846271,
      "clip_ratio/low_mean": 0.00042543773088254966,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011095258250861662,
      "completions/clipped_ratio": 0.0870535714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2823.0,
      "completions/mean_length": 865.8281860351562,
      "completions/mean_terminated_length": 557.816650390625,
      "completions/min_length": 144.0,
      "completions/min_terminated_length": 144.0,
      "epoch": 7.130612244897959,
      "grad_norm": 0.2093333750963211,
      "learning_rate": 1e-06,
      "loss": -0.055,
      "num_tokens": 433055477.0,
      "reward": 0.6484375,
      "reward_std": 0.14053022861480713,
      "rewards/verify_math_reward/mean": 0.6484375,
      "rewards/verify_math_reward/std": 0.4777248501777649,
      "step": 763
    },
    {
      "clip_ratio/high_max": 0.0018374361170572229,
      "clip_ratio/high_mean": 0.0006714117116644047,
      "clip_ratio/low_mean": 0.0004735927745969093,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011450044767116196,
      "completions/clipped_ratio": 0.0625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2586.0,
      "completions/mean_length": 768.7935791015625,
      "completions/mean_terminated_length": 546.9797973632812,
      "completions/min_length": 122.0,
      "completions/min_terminated_length": 122.0,
      "epoch": 7.139941690962099,
      "grad_norm": 0.2233939915895462,
      "learning_rate": 1e-06,
      "loss": -0.031,
      "num_tokens": 433595140.0,
      "reward": 0.6830357313156128,
      "reward_std": 0.13519318401813507,
      "rewards/verify_math_reward/mean": 0.6830357313156128,
      "rewards/verify_math_reward/std": 0.46555325388908386,
      "step": 764
    },
    {
      "clip_ratio/high_max": 0.0017288717081100913,
      "clip_ratio/high_mean": 0.0006363531338138273,
      "clip_ratio/low_mean": 0.0005515497514352319,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001187902864330681,
      "completions/clipped_ratio": 0.1049107142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3743.0,
      "completions/mean_length": 944.1250610351562,
      "completions/mean_terminated_length": 574.7032470703125,
      "completions/min_length": 155.0,
      "completions/min_terminated_length": 155.0,
      "epoch": 7.149271137026239,
      "grad_norm": 0.2202746421098709,
      "learning_rate": 1e-06,
      "loss": -0.0478,
      "num_tokens": 434155524.0,
      "reward": 0.6238839626312256,
      "reward_std": 0.1420365571975708,
      "rewards/verify_math_reward/mean": 0.6238839030265808,
      "rewards/verify_math_reward/std": 0.48468026518821716,
      "step": 765
    },
    {
      "clip_ratio/high_max": 0.0018139493549824692,
      "clip_ratio/high_mean": 0.0006565253888766165,
      "clip_ratio/low_mean": 0.0001985701794637862,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008550955626560608,
      "completions/clipped_ratio": 0.1261160714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2983.0,
      "completions/mean_length": 957.060302734375,
      "completions/mean_terminated_length": 504.0587463378906,
      "completions/min_length": 154.0,
      "completions/min_terminated_length": 154.0,
      "epoch": 7.158600583090379,
      "grad_norm": 0.2239382266998291,
      "learning_rate": 1e-06,
      "loss": -0.0501,
      "num_tokens": 434633154.0,
      "reward": 0.676339328289032,
      "reward_std": 0.10690322518348694,
      "rewards/verify_math_reward/mean": 0.6763392686843872,
      "rewards/verify_math_reward/std": 0.4681335687637329,
      "step": 766
    },
    {
      "clip_ratio/high_max": 0.001899087154015433,
      "clip_ratio/high_mean": 0.000682533589497325,
      "clip_ratio/low_mean": 0.0005771335800091038,
      "clip_ratio/low_min": 1.577884358994197e-05,
      "clip_ratio/region_mean": 0.0012596671549545135,
      "completions/clipped_ratio": 0.1049107142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3301.0,
      "completions/mean_length": 927.075927734375,
      "completions/mean_terminated_length": 555.6558837890625,
      "completions/min_length": 165.0,
      "completions/min_terminated_length": 165.0,
      "epoch": 7.167930029154519,
      "grad_norm": 0.26125797629356384,
      "learning_rate": 1e-06,
      "loss": -0.0315,
      "num_tokens": 435172174.0,
      "reward": 0.6071428656578064,
      "reward_std": 0.15390713512897491,
      "rewards/verify_math_reward/mean": 0.6071428656578064,
      "rewards/verify_math_reward/std": 0.48865827918052673,
      "step": 767
    },
    {
      "clip_ratio/high_max": 0.0013509400996554177,
      "clip_ratio/high_mean": 0.0005508841413757182,
      "clip_ratio/low_mean": 0.0004875524605267856,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010384366250946186,
      "completions/clipped_ratio": 0.1116071428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3377.0,
      "completions/mean_length": 918.5803833007812,
      "completions/mean_terminated_length": 519.4070434570312,
      "completions/min_length": 160.0,
      "completions/min_terminated_length": 160.0,
      "epoch": 7.1772594752186585,
      "grad_norm": 0.22931401431560516,
      "learning_rate": 1e-06,
      "loss": -0.0156,
      "num_tokens": 435671702.0,
      "reward": 0.6696428656578064,
      "reward_std": 0.1274154782295227,
      "rewards/verify_math_reward/mean": 0.6696428656578064,
      "rewards/verify_math_reward/std": 0.47060438990592957,
      "step": 768
    },
    {
      "clip_ratio/high_max": 0.0018453910488460679,
      "clip_ratio/high_mean": 0.0006329586622086936,
      "clip_ratio/low_mean": 0.0005600428394245682,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011930015025427565,
      "completions/clipped_ratio": 0.0758928571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2554.0,
      "completions/mean_length": 822.0335083007812,
      "completions/mean_terminated_length": 553.156982421875,
      "completions/min_length": 182.0,
      "completions/min_terminated_length": 182.0,
      "epoch": 7.186588921282799,
      "grad_norm": 0.6465548276901245,
      "learning_rate": 1e-06,
      "loss": -0.0542,
      "num_tokens": 436217756.0,
      "reward": 0.6785714626312256,
      "reward_std": 0.15398016571998596,
      "rewards/verify_math_reward/mean": 0.6785714030265808,
      "rewards/verify_math_reward/std": 0.46728572249412537,
      "step": 769
    },
    {
      "clip_ratio/high_max": 0.0020515040305326693,
      "clip_ratio/high_mean": 0.0007472602846974041,
      "clip_ratio/low_mean": 0.0003975188401454943,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011447791184764355,
      "completions/clipped_ratio": 0.1015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2206.0,
      "completions/mean_length": 897.200927734375,
      "completions/mean_terminated_length": 535.5975341796875,
      "completions/min_length": 118.0,
      "completions/min_terminated_length": 118.0,
      "epoch": 7.1959183673469385,
      "grad_norm": 0.2725692093372345,
      "learning_rate": 1e-06,
      "loss": -0.0464,
      "num_tokens": 436741432.0,
      "reward": 0.6584821939468384,
      "reward_std": 0.14414341747760773,
      "rewards/verify_math_reward/mean": 0.6584821343421936,
      "rewards/verify_math_reward/std": 0.4744836091995239,
      "step": 770
    },
    {
      "clip_ratio/high_max": 0.0015428995538968593,
      "clip_ratio/high_mean": 0.0005118942772242008,
      "clip_ratio/low_mean": 0.0004454997917946457,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009573940587870311,
      "completions/clipped_ratio": 0.1116071428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4059.0,
      "completions/mean_length": 975.9464721679688,
      "completions/mean_terminated_length": 583.9799194335938,
      "completions/min_length": 151.0,
      "completions/min_terminated_length": 151.0,
      "epoch": 7.205247813411079,
      "grad_norm": 0.18831664323806763,
      "learning_rate": 1e-06,
      "loss": -0.0084,
      "num_tokens": 437291712.0,
      "reward": 0.59375,
      "reward_std": 0.1335471272468567,
      "rewards/verify_math_reward/mean": 0.59375,
      "rewards/verify_math_reward/std": 0.4914066195487976,
      "step": 771
    },
    {
      "clip_ratio/high_max": 0.0021033159573562443,
      "clip_ratio/high_mean": 0.0007534949218097609,
      "clip_ratio/low_mean": 0.00028411590278665244,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010376108239142923,
      "completions/clipped_ratio": 0.1127232142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3738.0,
      "completions/mean_length": 931.1317138671875,
      "completions/mean_terminated_length": 529.0540771484375,
      "completions/min_length": 123.0,
      "completions/min_terminated_length": 123.0,
      "epoch": 7.214577259475218,
      "grad_norm": 0.246206596493721,
      "learning_rate": 1e-06,
      "loss": -0.0473,
      "num_tokens": 437805702.0,
      "reward": 0.6540178656578064,
      "reward_std": 0.13696163892745972,
      "rewards/verify_math_reward/mean": 0.6540178656578064,
      "rewards/verify_math_reward/std": 0.4759531021118164,
      "step": 772
    },
    {
      "clip_ratio/high_max": 0.002066510023723822,
      "clip_ratio/high_mean": 0.0007286792097147554,
      "clip_ratio/low_mean": 0.0004168878567725187,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011455670464783907,
      "completions/clipped_ratio": 0.1116071428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1846.0,
      "completions/mean_length": 883.2199096679688,
      "completions/mean_terminated_length": 479.6042785644531,
      "completions/min_length": 187.0,
      "completions/min_terminated_length": 187.0,
      "epoch": 7.223906705539359,
      "grad_norm": 0.2433871030807495,
      "learning_rate": 1e-06,
      "loss": -0.0467,
      "num_tokens": 438282347.0,
      "reward": 0.6082589626312256,
      "reward_std": 0.15341675281524658,
      "rewards/verify_math_reward/mean": 0.6082589030265808,
      "rewards/verify_math_reward/std": 0.48841196298599243,
      "step": 773
    },
    {
      "clip_ratio/high_max": 0.001539781667815987,
      "clip_ratio/high_mean": 0.0006261663638724713,
      "clip_ratio/low_mean": 0.0004075622259733791,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010337285821151454,
      "completions/clipped_ratio": 0.0758928571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3905.0,
      "completions/mean_length": 826.3995971679688,
      "completions/mean_terminated_length": 557.8816528320312,
      "completions/min_length": 150.0,
      "completions/min_terminated_length": 150.0,
      "epoch": 7.233236151603498,
      "grad_norm": 0.5587377548217773,
      "learning_rate": 1e-06,
      "loss": -0.027,
      "num_tokens": 438836089.0,
      "reward": 0.613839328289032,
      "reward_std": 0.14560766518115997,
      "rewards/verify_math_reward/mean": 0.6138392686843872,
      "rewards/verify_math_reward/std": 0.48714008927345276,
      "step": 774
    },
    {
      "clip_ratio/high_max": 0.001900513940199744,
      "clip_ratio/high_mean": 0.0005801326587970834,
      "clip_ratio/low_mean": 0.0002264898992052622,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008066225582297193,
      "completions/clipped_ratio": 0.0859375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3882.0,
      "completions/mean_length": 881.1138916015625,
      "completions/mean_terminated_length": 578.859619140625,
      "completions/min_length": 188.0,
      "completions/min_terminated_length": 188.0,
      "epoch": 7.242565597667639,
      "grad_norm": 0.19209402799606323,
      "learning_rate": 1e-06,
      "loss": -0.0427,
      "num_tokens": 439401463.0,
      "reward": 0.6886160969734192,
      "reward_std": 0.12181740999221802,
      "rewards/verify_math_reward/mean": 0.6886160969734192,
      "rewards/verify_math_reward/std": 0.46331802010536194,
      "step": 775
    },
    {
      "clip_ratio/high_max": 0.0018584589997772127,
      "clip_ratio/high_mean": 0.0005739138869103044,
      "clip_ratio/low_mean": 0.0003227495324154006,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008966634013631847,
      "completions/clipped_ratio": 0.0982142857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2293.0,
      "completions/mean_length": 912.6004638671875,
      "completions/mean_terminated_length": 565.8935546875,
      "completions/min_length": 187.0,
      "completions/min_terminated_length": 187.0,
      "epoch": 7.251895043731778,
      "grad_norm": 0.18052180111408234,
      "learning_rate": 1e-06,
      "loss": -0.034,
      "num_tokens": 439944689.0,
      "reward": 0.6328125,
      "reward_std": 0.12155778706073761,
      "rewards/verify_math_reward/mean": 0.6328125,
      "rewards/verify_math_reward/std": 0.48230743408203125,
      "step": 776
    },
    {
      "clip_ratio/high_max": 0.0018825598635885399,
      "clip_ratio/high_mean": 0.0007097283178154612,
      "clip_ratio/low_mean": 0.0003853289053949993,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010950572395813651,
      "completions/clipped_ratio": 0.0770089285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2795.0,
      "completions/mean_length": 806.2221069335938,
      "completions/mean_terminated_length": 531.742431640625,
      "completions/min_length": 192.0,
      "completions/min_terminated_length": 192.0,
      "epoch": 7.261224489795918,
      "grad_norm": 0.19756871461868286,
      "learning_rate": 1e-06,
      "loss": -0.0236,
      "num_tokens": 440466704.0,
      "reward": 0.7220982313156128,
      "reward_std": 0.13707223534584045,
      "rewards/verify_math_reward/mean": 0.7220982313156128,
      "rewards/verify_math_reward/std": 0.44821491837501526,
      "step": 777
    },
    {
      "clip_ratio/high_max": 0.0020003035642730538,
      "clip_ratio/high_mean": 0.0007616958573635202,
      "clip_ratio/low_mean": 0.0003803055578828207,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011420014379837085,
      "completions/clipped_ratio": 0.0725446428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2329.0,
      "completions/mean_length": 822.8292846679688,
      "completions/mean_terminated_length": 566.8050537109375,
      "completions/min_length": 104.0,
      "completions/min_terminated_length": 104.0,
      "epoch": 7.270553935860058,
      "grad_norm": 0.19197532534599304,
      "learning_rate": 1e-06,
      "loss": -0.0256,
      "num_tokens": 441023191.0,
      "reward": 0.6473214626312256,
      "reward_std": 0.144252210855484,
      "rewards/verify_math_reward/mean": 0.6473214030265808,
      "rewards/verify_math_reward/std": 0.47807061672210693,
      "step": 778
    },
    {
      "clip_ratio/high_max": 0.0019053590112889651,
      "clip_ratio/high_mean": 0.0007509957613365259,
      "clip_ratio/low_mean": 0.0005742122648371151,
      "clip_ratio/low_min": 2.489048165443819e-05,
      "clip_ratio/region_mean": 0.0013252080025267787,
      "completions/clipped_ratio": 0.1261160714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3273.0,
      "completions/mean_length": 1045.7098388671875,
      "completions/mean_terminated_length": 605.5018920898438,
      "completions/min_length": 193.0,
      "completions/min_terminated_length": 193.0,
      "epoch": 7.279883381924198,
      "grad_norm": 0.2804083526134491,
      "learning_rate": 1e-06,
      "loss": -0.0394,
      "num_tokens": 441589835.0,
      "reward": 0.5714285969734192,
      "reward_std": 0.18134015798568726,
      "rewards/verify_math_reward/mean": 0.5714285969734192,
      "rewards/verify_math_reward/std": 0.49514803290367126,
      "step": 779
    },
    {
      "clip_ratio/high_max": 0.002064025196887087,
      "clip_ratio/high_mean": 0.000743654773941671,
      "clip_ratio/low_mean": 0.00040148734206013614,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001145142101449892,
      "completions/clipped_ratio": 0.0725446428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4026.0,
      "completions/mean_length": 797.536865234375,
      "completions/mean_terminated_length": 539.5343017578125,
      "completions/min_length": 141.0,
      "completions/min_terminated_length": 141.0,
      "epoch": 7.289212827988338,
      "grad_norm": 0.22385987639427185,
      "learning_rate": 1e-06,
      "loss": -0.0288,
      "num_tokens": 442126412.0,
      "reward": 0.652901828289032,
      "reward_std": 0.13655048608779907,
      "rewards/verify_math_reward/mean": 0.6529017686843872,
      "rewards/verify_math_reward/std": 0.47631320357322693,
      "step": 780
    },
    {
      "clip_ratio/high_max": 0.0018116132778231986,
      "clip_ratio/high_mean": 0.0006781190586480079,
      "clip_ratio/low_mean": 0.00033917441101039003,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001017293468976277,
      "completions/clipped_ratio": 0.0770089285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2908.0,
      "completions/mean_length": 803.5067138671875,
      "completions/mean_terminated_length": 528.8004760742188,
      "completions/min_length": 130.0,
      "completions/min_terminated_length": 130.0,
      "epoch": 7.298542274052478,
      "grad_norm": 0.2284812480211258,
      "learning_rate": 1e-06,
      "loss": -0.0501,
      "num_tokens": 442646034.0,
      "reward": 0.6875000596046448,
      "reward_std": 0.12892432510852814,
      "rewards/verify_math_reward/mean": 0.6875,
      "rewards/verify_math_reward/std": 0.4637712836265564,
      "step": 781
    },
    {
      "clip_ratio/high_max": 0.0020614859968191013,
      "clip_ratio/high_mean": 0.0007707177501288243,
      "clip_ratio/low_mean": 0.00039828915396356024,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011690069222822785,
      "completions/clipped_ratio": 0.1339285714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3631.0,
      "completions/mean_length": 1049.9263916015625,
      "completions/mean_terminated_length": 578.884033203125,
      "completions/min_length": 147.0,
      "completions/min_terminated_length": 147.0,
      "epoch": 7.307871720116618,
      "grad_norm": 0.23519906401634216,
      "learning_rate": 1e-06,
      "loss": -0.0567,
      "num_tokens": 443188656.0,
      "reward": 0.5881696939468384,
      "reward_std": 0.15680059790611267,
      "rewards/verify_math_reward/mean": 0.5881696343421936,
      "rewards/verify_math_reward/std": 0.4924396276473999,
      "step": 782
    },
    {
      "clip_ratio/high_max": 0.0014961940578359645,
      "clip_ratio/high_mean": 0.0004797290375790908,
      "clip_ratio/low_mean": 0.0005081013705421356,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009878304026642581,
      "completions/clipped_ratio": 0.0870535714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2558.0,
      "completions/mean_length": 857.5714721679688,
      "completions/mean_terminated_length": 548.7726440429688,
      "completions/min_length": 135.0,
      "completions/min_terminated_length": 135.0,
      "epoch": 7.317201166180758,
      "grad_norm": 0.20529448986053467,
      "learning_rate": 1e-06,
      "loss": -0.036,
      "num_tokens": 443722896.0,
      "reward": 0.668526828289032,
      "reward_std": 0.12207955121994019,
      "rewards/verify_math_reward/mean": 0.6685267686843872,
      "rewards/verify_math_reward/std": 0.4710056483745575,
      "step": 783
    },
    {
      "clip_ratio/high_max": 0.002317278296686709,
      "clip_ratio/high_mean": 0.0008041735609367606,
      "clip_ratio/low_mean": 0.0005017438325012336,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0013059173616056796,
      "completions/clipped_ratio": 0.0970982142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3539.0,
      "completions/mean_length": 872.6105346679688,
      "completions/mean_terminated_length": 525.9666137695312,
      "completions/min_length": 124.0,
      "completions/min_terminated_length": 124.0,
      "epoch": 7.326530612244898,
      "grad_norm": 0.21070453524589539,
      "learning_rate": 1e-06,
      "loss": -0.0538,
      "num_tokens": 444238347.0,
      "reward": 0.6774553656578064,
      "reward_std": 0.14222341775894165,
      "rewards/verify_math_reward/mean": 0.6774553656578064,
      "rewards/verify_math_reward/std": 0.4677111804485321,
      "step": 784
    },
    {
      "clip_ratio/high_max": 0.0013067056206637062,
      "clip_ratio/high_mean": 0.0004163580815657042,
      "clip_ratio/low_mean": 0.00034855551939472207,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007649136241525412,
      "completions/clipped_ratio": 0.0636160714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2023.0,
      "completions/mean_length": 766.2433471679688,
      "completions/mean_terminated_length": 540.0262451171875,
      "completions/min_length": 154.0,
      "completions/min_terminated_length": 154.0,
      "epoch": 7.335860058309038,
      "grad_norm": 0.5768417119979858,
      "learning_rate": 1e-06,
      "loss": -0.0227,
      "num_tokens": 444773069.0,
      "reward": 0.6651785969734192,
      "reward_std": 0.115808866918087,
      "rewards/verify_math_reward/mean": 0.6651785969734192,
      "rewards/verify_math_reward/std": 0.47219157218933105,
      "step": 785
    },
    {
      "clip_ratio/high_max": 0.0016020918628782965,
      "clip_ratio/high_mean": 0.0005133051927259658,
      "clip_ratio/low_mean": 0.00039374745938403066,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009070526430150494,
      "completions/clipped_ratio": 0.0926339285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2951.0,
      "completions/mean_length": 872.4241333007812,
      "completions/mean_terminated_length": 543.325927734375,
      "completions/min_length": 141.0,
      "completions/min_terminated_length": 141.0,
      "epoch": 7.345189504373177,
      "grad_norm": 0.21419286727905273,
      "learning_rate": 1e-06,
      "loss": -0.0395,
      "num_tokens": 445304777.0,
      "reward": 0.598214328289032,
      "reward_std": 0.12823893129825592,
      "rewards/verify_math_reward/mean": 0.5982142686843872,
      "rewards/verify_math_reward/std": 0.49053287506103516,
      "step": 786
    },
    {
      "clip_ratio/high_max": 0.0018684573806240223,
      "clip_ratio/high_mean": 0.0007049983287288342,
      "clip_ratio/low_mean": 0.00036744167391589144,
      "clip_ratio/low_min": 2.0128823962295428e-05,
      "clip_ratio/region_mean": 0.0010724400308390614,
      "completions/clipped_ratio": 0.1149553571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3301.0,
      "completions/mean_length": 938.7600708007812,
      "completions/mean_terminated_length": 528.6771850585938,
      "completions/min_length": 118.0,
      "completions/min_terminated_length": 118.0,
      "epoch": 7.354518950437318,
      "grad_norm": 0.22720560431480408,
      "learning_rate": 1e-06,
      "loss": -0.0478,
      "num_tokens": 445804274.0,
      "reward": 0.6261160969734192,
      "reward_std": 0.13316552340984344,
      "rewards/verify_math_reward/mean": 0.6261160969734192,
      "rewards/verify_math_reward/std": 0.48410359025001526,
      "step": 787
    },
    {
      "clip_ratio/high_max": 0.0017978480973397382,
      "clip_ratio/high_mean": 0.0006723729893565178,
      "clip_ratio/low_mean": 0.0003866412009756459,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010590141791908536,
      "completions/clipped_ratio": 0.0848214285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3350.0,
      "completions/mean_length": 880.50341796875,
      "completions/mean_terminated_length": 582.481689453125,
      "completions/min_length": 132.0,
      "completions/min_terminated_length": 132.0,
      "epoch": 7.363848396501457,
      "grad_norm": 0.21505652368068695,
      "learning_rate": 1e-06,
      "loss": -0.0422,
      "num_tokens": 446377477.0,
      "reward": 0.6584821939468384,
      "reward_std": 0.15067441761493683,
      "rewards/verify_math_reward/mean": 0.6584821343421936,
      "rewards/verify_math_reward/std": 0.4744836091995239,
      "step": 788
    },
    {
      "clip_ratio/high_max": 0.0020738798266393133,
      "clip_ratio/high_mean": 0.0007104585038177902,
      "clip_ratio/low_mean": 0.0003293472329914948,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010398057293059537,
      "completions/clipped_ratio": 0.0926339285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3430.0,
      "completions/mean_length": 868.763427734375,
      "completions/mean_terminated_length": 539.29150390625,
      "completions/min_length": 150.0,
      "completions/min_terminated_length": 150.0,
      "epoch": 7.373177842565598,
      "grad_norm": 0.41033071279525757,
      "learning_rate": 1e-06,
      "loss": -0.0318,
      "num_tokens": 446901761.0,
      "reward": 0.6819196939468384,
      "reward_std": 0.13361060619354248,
      "rewards/verify_math_reward/mean": 0.6819196343421936,
      "rewards/verify_math_reward/std": 0.46599099040031433,
      "step": 789
    },
    {
      "clip_ratio/high_max": 0.001817087919334881,
      "clip_ratio/high_mean": 0.0007647078982699895,
      "clip_ratio/low_mean": 0.00035661983656609664,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011213277393835597,
      "completions/clipped_ratio": 0.0535714285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3829.0,
      "completions/mean_length": 720.2645263671875,
      "completions/mean_terminated_length": 529.1851196289062,
      "completions/min_length": 147.0,
      "completions/min_terminated_length": 147.0,
      "epoch": 7.382507288629737,
      "grad_norm": 0.21566364169120789,
      "learning_rate": 1e-06,
      "loss": -0.0255,
      "num_tokens": 447440510.0,
      "reward": 0.7020089626312256,
      "reward_std": 0.15439385175704956,
      "rewards/verify_math_reward/mean": 0.7020089030265808,
      "rewards/verify_math_reward/std": 0.45763099193573,
      "step": 790
    },
    {
      "clip_ratio/high_max": 0.0015683761339460034,
      "clip_ratio/high_mean": 0.0005569472532442887,
      "clip_ratio/low_mean": 0.00030527737999364035,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008622246423328761,
      "completions/clipped_ratio": 0.0691964285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3516.0,
      "completions/mean_length": 776.7299194335938,
      "completions/mean_terminated_length": 529.9736328125,
      "completions/min_length": 103.0,
      "completions/min_terminated_length": 103.0,
      "epoch": 7.391836734693878,
      "grad_norm": 0.18326812982559204,
      "learning_rate": 1e-06,
      "loss": -0.0338,
      "num_tokens": 447973084.0,
      "reward": 0.7053571939468384,
      "reward_std": 0.11283689737319946,
      "rewards/verify_math_reward/mean": 0.7053571343421936,
      "rewards/verify_math_reward/std": 0.45613667368888855,
      "step": 791
    },
    {
      "clip_ratio/high_max": 0.002036717880400829,
      "clip_ratio/high_mean": 0.0006804739823564887,
      "clip_ratio/low_mean": 0.0003834617098164017,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010639356914907694,
      "completions/clipped_ratio": 0.0691964285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1847.0,
      "completions/mean_length": 742.9263916015625,
      "completions/mean_terminated_length": 493.6570739746094,
      "completions/min_length": 133.0,
      "completions/min_terminated_length": 133.0,
      "epoch": 7.401166180758017,
      "grad_norm": 0.26743683218955994,
      "learning_rate": 1e-06,
      "loss": -0.0163,
      "num_tokens": 448482466.0,
      "reward": 0.6752232313156128,
      "reward_std": 0.1305733323097229,
      "rewards/verify_math_reward/mean": 0.6752232313156128,
      "rewards/verify_math_reward/std": 0.46855294704437256,
      "step": 792
    },
    {
      "clip_ratio/high_max": 0.0018211142196378205,
      "clip_ratio/high_mean": 0.0006659587324975291,
      "clip_ratio/low_mean": 0.00044132539369456936,
      "clip_ratio/low_min": 1.855149821494706e-05,
      "clip_ratio/region_mean": 0.0011072841407440137,
      "completions/clipped_ratio": 0.0904017857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2224.0,
      "completions/mean_length": 843.2824096679688,
      "completions/mean_terminated_length": 520.006103515625,
      "completions/min_length": 147.0,
      "completions/min_terminated_length": 147.0,
      "epoch": 7.410495626822158,
      "grad_norm": 0.2430814653635025,
      "learning_rate": 1e-06,
      "loss": -0.0327,
      "num_tokens": 448995319.0,
      "reward": 0.7064732313156128,
      "reward_std": 0.1288815438747406,
      "rewards/verify_math_reward/mean": 0.7064732313156128,
      "rewards/verify_math_reward/std": 0.4556320011615753,
      "step": 793
    },
    {
      "clip_ratio/high_max": 0.00199321500986116,
      "clip_ratio/high_mean": 0.0007223097145470092,
      "clip_ratio/low_mean": 0.00025829350238382176,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009806032066990156,
      "completions/clipped_ratio": 0.0580357142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3297.0,
      "completions/mean_length": 763.2600708007812,
      "completions/mean_terminated_length": 557.9253540039062,
      "completions/min_length": 144.0,
      "completions/min_terminated_length": 144.0,
      "epoch": 7.419825072886297,
      "grad_norm": 0.19316665828227997,
      "learning_rate": 1e-06,
      "loss": -0.0393,
      "num_tokens": 449552056.0,
      "reward": 0.7064732313156128,
      "reward_std": 0.13301467895507812,
      "rewards/verify_math_reward/mean": 0.7064732313156128,
      "rewards/verify_math_reward/std": 0.4556320011615753,
      "step": 794
    },
    {
      "clip_ratio/high_max": 0.0015548774172202684,
      "clip_ratio/high_mean": 0.0005065612785983831,
      "clip_ratio/low_mean": 0.00033220593707028456,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008387672187382123,
      "completions/clipped_ratio": 0.0736607142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3267.0,
      "completions/mean_length": 790.0792846679688,
      "completions/mean_terminated_length": 527.1987915039062,
      "completions/min_length": 121.0,
      "completions/min_terminated_length": 121.0,
      "epoch": 7.429154518950437,
      "grad_norm": 0.25985169410705566,
      "learning_rate": 1e-06,
      "loss": -0.0396,
      "num_tokens": 450069903.0,
      "reward": 0.7087053656578064,
      "reward_std": 0.11532352864742279,
      "rewards/verify_math_reward/mean": 0.7087053656578064,
      "rewards/verify_math_reward/std": 0.45461276173591614,
      "step": 795
    },
    {
      "clip_ratio/high_max": 0.001736435733619146,
      "clip_ratio/high_mean": 0.0006576968880835921,
      "clip_ratio/low_mean": 0.0004766974152516923,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001134394304244779,
      "completions/clipped_ratio": 0.1160714285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3986.0,
      "completions/mean_length": 1004.1239013671875,
      "completions/mean_terminated_length": 598.1199340820312,
      "completions/min_length": 141.0,
      "completions/min_terminated_length": 141.0,
      "epoch": 7.438483965014577,
      "grad_norm": 0.21131226420402527,
      "learning_rate": 1e-06,
      "loss": -0.0365,
      "num_tokens": 450643230.0,
      "reward": 0.5479910969734192,
      "reward_std": 0.1590908169746399,
      "rewards/verify_math_reward/mean": 0.5479910969734192,
      "rewards/verify_math_reward/std": 0.49796950817108154,
      "step": 796
    },
    {
      "clip_ratio/high_max": 0.0021376903823693283,
      "clip_ratio/high_mean": 0.0009354207468277309,
      "clip_ratio/low_mean": 0.0005537491324503208,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001489169902924914,
      "completions/clipped_ratio": 0.0892857142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2331.0,
      "completions/mean_length": 889.2578735351562,
      "completions/mean_terminated_length": 574.871337890625,
      "completions/min_length": 108.0,
      "completions/min_terminated_length": 108.0,
      "epoch": 7.447813411078717,
      "grad_norm": 0.2466975450515747,
      "learning_rate": 1e-06,
      "loss": -0.0379,
      "num_tokens": 451199261.0,
      "reward": 0.6194196939468384,
      "reward_std": 0.1874253898859024,
      "rewards/verify_math_reward/mean": 0.6194196343421936,
      "rewards/verify_math_reward/std": 0.48580074310302734,
      "step": 797
    },
    {
      "clip_ratio/high_max": 0.0018289342988282442,
      "clip_ratio/high_mean": 0.0007239878341351869,
      "clip_ratio/low_mean": 0.0005331186221155804,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012571064726216719,
      "completions/clipped_ratio": 0.09375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2967.0,
      "completions/mean_length": 897.7444458007812,
      "completions/mean_terminated_length": 566.890380859375,
      "completions/min_length": 152.0,
      "completions/min_terminated_length": 152.0,
      "epoch": 7.457142857142857,
      "grad_norm": 0.2551487684249878,
      "learning_rate": 1e-06,
      "loss": -0.0424,
      "num_tokens": 451755744.0,
      "reward": 0.6662946939468384,
      "reward_std": 0.15687981247901917,
      "rewards/verify_math_reward/mean": 0.6662946343421936,
      "rewards/verify_math_reward/std": 0.47179925441741943,
      "step": 798
    },
    {
      "clip_ratio/high_max": 0.0015417725917359348,
      "clip_ratio/high_mean": 0.0005219379295340332,
      "clip_ratio/low_mean": 0.00035861645937984576,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008805543911876157,
      "completions/clipped_ratio": 0.0859375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3499.0,
      "completions/mean_length": 890.4297485351562,
      "completions/mean_terminated_length": 589.0513305664062,
      "completions/min_length": 168.0,
      "completions/min_terminated_length": 168.0,
      "epoch": 7.466472303206997,
      "grad_norm": 0.1948302984237671,
      "learning_rate": 1e-06,
      "loss": -0.0366,
      "num_tokens": 452329833.0,
      "reward": 0.6707589626312256,
      "reward_std": 0.12982404232025146,
      "rewards/verify_math_reward/mean": 0.6707589030265808,
      "rewards/verify_math_reward/std": 0.4702001214027405,
      "step": 799
    },
    {
      "clip_ratio/high_max": 0.0016610091115580872,
      "clip_ratio/high_mean": 0.0004783436525030993,
      "clip_ratio/low_mean": 0.0002772621162421274,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007556057662441162,
      "completions/clipped_ratio": 0.0758928571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3150.0,
      "completions/mean_length": 820.6082763671875,
      "completions/mean_terminated_length": 551.61474609375,
      "completions/min_length": 169.0,
      "completions/min_terminated_length": 169.0,
      "epoch": 7.475801749271137,
      "grad_norm": 0.17610272765159607,
      "learning_rate": 1e-06,
      "loss": -0.0197,
      "num_tokens": 452877802.0,
      "reward": 0.6785714626312256,
      "reward_std": 0.09923569113016129,
      "rewards/verify_math_reward/mean": 0.6785714030265808,
      "rewards/verify_math_reward/std": 0.46728572249412537,
      "step": 800
    },
    {
      "clip_ratio/high_max": 0.0026171008648816496,
      "clip_ratio/high_mean": 0.0009410387829120737,
      "clip_ratio/low_mean": 0.00043235775774519425,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0013733965461142361,
      "completions/clipped_ratio": 0.1071428571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3584.0,
      "completions/mean_length": 925.06591796875,
      "completions/mean_terminated_length": 544.5537109375,
      "completions/min_length": 127.0,
      "completions/min_terminated_length": 127.0,
      "epoch": 7.485131195335277,
      "grad_norm": 0.25882914662361145,
      "learning_rate": 1e-06,
      "loss": -0.0623,
      "num_tokens": 453396245.0,
      "reward": 0.6395089626312256,
      "reward_std": 0.17044779658317566,
      "rewards/verify_math_reward/mean": 0.6395089030265808,
      "rewards/verify_math_reward/std": 0.4804111421108246,
      "step": 801
    },
    {
      "clip_ratio/high_max": 0.0019658309865917545,
      "clip_ratio/high_mean": 0.0007635818656126503,
      "clip_ratio/low_mean": 0.00036012548162034363,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011237073413212784,
      "completions/clipped_ratio": 0.1149553571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2235.0,
      "completions/mean_length": 981.7266235351562,
      "completions/mean_terminated_length": 577.2244873046875,
      "completions/min_length": 141.0,
      "completions/min_terminated_length": 141.0,
      "epoch": 7.494460641399417,
      "grad_norm": 0.20995590090751648,
      "learning_rate": 1e-06,
      "loss": -0.0636,
      "num_tokens": 453938864.0,
      "reward": 0.6328125,
      "reward_std": 0.1460520476102829,
      "rewards/verify_math_reward/mean": 0.6328125,
      "rewards/verify_math_reward/std": 0.48230743408203125,
      "step": 802
    },
    {
      "clip_ratio/high_max": 0.0014598001725971699,
      "clip_ratio/high_mean": 0.00042861368888225115,
      "clip_ratio/low_mean": 0.0003237422939719181,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007523559579567518,
      "completions/clipped_ratio": 0.0859375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3671.0,
      "completions/mean_length": 865.9721069335938,
      "completions/mean_terminated_length": 562.2943115234375,
      "completions/min_length": 142.0,
      "completions/min_terminated_length": 142.0,
      "epoch": 7.503790087463557,
      "grad_norm": 0.18857206404209137,
      "learning_rate": 1e-06,
      "loss": -0.0388,
      "num_tokens": 454494055.0,
      "reward": 0.6640625,
      "reward_std": 0.11599250137805939,
      "rewards/verify_math_reward/mean": 0.6640625,
      "rewards/verify_math_reward/std": 0.4725809693336487,
      "step": 803
    },
    {
      "clip_ratio/high_max": 0.0016289376617351081,
      "clip_ratio/high_mean": 0.0006079857830627589,
      "clip_ratio/low_mean": 0.0005249290843494236,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011329148619552143,
      "completions/clipped_ratio": 0.0948660714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3022.0,
      "completions/mean_length": 866.6038208007812,
      "completions/mean_terminated_length": 528.1343994140625,
      "completions/min_length": 130.0,
      "completions/min_terminated_length": 130.0,
      "epoch": 7.513119533527696,
      "grad_norm": 0.2475336641073227,
      "learning_rate": 1e-06,
      "loss": -0.0202,
      "num_tokens": 455014212.0,
      "reward": 0.6339285969734192,
      "reward_std": 0.1577039510011673,
      "rewards/verify_math_reward/mean": 0.6339285969734192,
      "rewards/verify_math_reward/std": 0.48199835419654846,
      "step": 804
    },
    {
      "clip_ratio/high_max": 0.0020908709229843225,
      "clip_ratio/high_mean": 0.0008554086743970402,
      "clip_ratio/low_mean": 0.0005937854139119736,
      "clip_ratio/low_min": 1.40860938699916e-05,
      "clip_ratio/region_mean": 0.0014491941037704237,
      "completions/clipped_ratio": 0.0915178571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3846.0,
      "completions/mean_length": 871.6752319335938,
      "completions/mean_terminated_length": 546.8660888671875,
      "completions/min_length": 133.0,
      "completions/min_terminated_length": 133.0,
      "epoch": 7.522448979591837,
      "grad_norm": 0.22647230327129364,
      "learning_rate": 1e-06,
      "loss": -0.0596,
      "num_tokens": 455544513.0,
      "reward": 0.6462053656578064,
      "reward_std": 0.19069157540798187,
      "rewards/verify_math_reward/mean": 0.6462053656578064,
      "rewards/verify_math_reward/std": 0.478413462638855,
      "step": 805
    },
    {
      "clip_ratio/high_max": 0.0018042797473754035,
      "clip_ratio/high_mean": 0.0006075592536944896,
      "clip_ratio/low_mean": 0.0003418043121428127,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000949363580730278,
      "completions/clipped_ratio": 0.0870535714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2748.0,
      "completions/mean_length": 870.6473388671875,
      "completions/mean_terminated_length": 563.0953979492188,
      "completions/min_length": 133.0,
      "completions/min_terminated_length": 133.0,
      "epoch": 7.531778425655976,
      "grad_norm": 0.21548768877983093,
      "learning_rate": 1e-06,
      "loss": -0.0358,
      "num_tokens": 456084413.0,
      "reward": 0.668526828289032,
      "reward_std": 0.11956292390823364,
      "rewards/verify_math_reward/mean": 0.6685267686843872,
      "rewards/verify_math_reward/std": 0.4710056483745575,
      "step": 806
    },
    {
      "clip_ratio/high_max": 0.002055928751360625,
      "clip_ratio/high_mean": 0.0005337901384336874,
      "clip_ratio/low_mean": 0.0005100326070532901,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001043822765495861,
      "completions/clipped_ratio": 0.1350446428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3652.0,
      "completions/mean_length": 1036.1842041015625,
      "completions/mean_terminated_length": 558.4580688476562,
      "completions/min_length": 151.0,
      "completions/min_terminated_length": 151.0,
      "epoch": 7.541107871720117,
      "grad_norm": 0.21702860295772552,
      "learning_rate": 1e-06,
      "loss": -0.0457,
      "num_tokens": 456604858.0,
      "reward": 0.5870535969734192,
      "reward_std": 0.1310279816389084,
      "rewards/verify_math_reward/mean": 0.5870535969734192,
      "rewards/verify_math_reward/std": 0.49263834953308105,
      "step": 807
    },
    {
      "clip_ratio/high_max": 0.0015766515225550393,
      "clip_ratio/high_mean": 0.0005313608567121264,
      "clip_ratio/low_mean": 0.0003719614910551172,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009033223341248231,
      "completions/clipped_ratio": 0.0859375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3162.0,
      "completions/mean_length": 908.2489013671875,
      "completions/mean_terminated_length": 608.5458374023438,
      "completions/min_length": 127.0,
      "completions/min_terminated_length": 127.0,
      "epoch": 7.550437317784256,
      "grad_norm": 0.17865093052387238,
      "learning_rate": 1e-06,
      "loss": -0.029,
      "num_tokens": 457182649.0,
      "reward": 0.5993303656578064,
      "reward_std": 0.12039663642644882,
      "rewards/verify_math_reward/mean": 0.5993303656578064,
      "rewards/verify_math_reward/std": 0.49030786752700806,
      "step": 808
    },
    {
      "clip_ratio/high_max": 0.0019642376792035066,
      "clip_ratio/high_mean": 0.0008519805787727819,
      "clip_ratio/low_mean": 0.0005484221051119675,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0014004026488692034,
      "completions/clipped_ratio": 0.0803571428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2262.0,
      "completions/mean_length": 874.9219360351562,
      "completions/mean_terminated_length": 593.4684448242188,
      "completions/min_length": 172.0,
      "completions/min_terminated_length": 172.0,
      "epoch": 7.559766763848397,
      "grad_norm": 0.3716540038585663,
      "learning_rate": 1e-06,
      "loss": -0.041,
      "num_tokens": 457759491.0,
      "reward": 0.6316964626312256,
      "reward_std": 0.1893431544303894,
      "rewards/verify_math_reward/mean": 0.6316964030265808,
      "rewards/verify_math_reward/std": 0.4826137125492096,
      "step": 809
    },
    {
      "clip_ratio/high_max": 0.002034013294178294,
      "clip_ratio/high_mean": 0.0006746804865542799,
      "clip_ratio/low_mean": 0.00048705783683544723,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001161738320661243,
      "completions/clipped_ratio": 0.0970982142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4040.0,
      "completions/mean_length": 879.8125610351562,
      "completions/mean_terminated_length": 533.943115234375,
      "completions/min_length": 173.0,
      "completions/min_terminated_length": 173.0,
      "epoch": 7.569096209912536,
      "grad_norm": 0.4034753739833832,
      "learning_rate": 1e-06,
      "loss": -0.0239,
      "num_tokens": 458293491.0,
      "reward": 0.6462053656578064,
      "reward_std": 0.13496747612953186,
      "rewards/verify_math_reward/mean": 0.6462053656578064,
      "rewards/verify_math_reward/std": 0.478413462638855,
      "step": 810
    },
    {
      "clip_ratio/high_max": 0.001661057016463019,
      "clip_ratio/high_mean": 0.0006062400598239037,
      "clip_ratio/low_mean": 0.0005257374787106528,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011319775512674823,
      "completions/clipped_ratio": 0.0993303571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2568.0,
      "completions/mean_length": 932.0167846679688,
      "completions/mean_terminated_length": 583.0768432617188,
      "completions/min_length": 170.0,
      "completions/min_terminated_length": 170.0,
      "epoch": 7.578425655976677,
      "grad_norm": 0.22804972529411316,
      "learning_rate": 1e-06,
      "loss": -0.0511,
      "num_tokens": 458849586.0,
      "reward": 0.6116071939468384,
      "reward_std": 0.15026254951953888,
      "rewards/verify_math_reward/mean": 0.6116071343421936,
      "rewards/verify_math_reward/std": 0.48765692114830017,
      "step": 811
    },
    {
      "clip_ratio/high_max": 0.0021547215183090884,
      "clip_ratio/high_mean": 0.0006679528969470994,
      "clip_ratio/low_mean": 0.00046255778045178886,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011305106745567173,
      "completions/clipped_ratio": 0.0926339285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3086.0,
      "completions/mean_length": 890.825927734375,
      "completions/mean_terminated_length": 563.6063842773438,
      "completions/min_length": 138.0,
      "completions/min_terminated_length": 138.0,
      "epoch": 7.587755102040816,
      "grad_norm": 0.2476268708705902,
      "learning_rate": 1e-06,
      "loss": -0.0341,
      "num_tokens": 459400774.0,
      "reward": 0.6004464626312256,
      "reward_std": 0.1620645970106125,
      "rewards/verify_math_reward/mean": 0.6004464030265808,
      "rewards/verify_math_reward/std": 0.49008017778396606,
      "step": 812
    },
    {
      "clip_ratio/high_max": 0.0015425645779032493,
      "clip_ratio/high_mean": 0.0004173754109615402,
      "clip_ratio/low_mean": 0.0003525760248521692,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007699514299019938,
      "completions/clipped_ratio": 0.1060267857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3272.0,
      "completions/mean_length": 917.65966796875,
      "completions/mean_terminated_length": 540.702880859375,
      "completions/min_length": 177.0,
      "completions/min_terminated_length": 177.0,
      "epoch": 7.597084548104956,
      "grad_norm": 0.19631144404411316,
      "learning_rate": 1e-06,
      "loss": -0.0411,
      "num_tokens": 459921333.0,
      "reward": 0.6875000596046448,
      "reward_std": 0.11565662175416946,
      "rewards/verify_math_reward/mean": 0.6875,
      "rewards/verify_math_reward/std": 0.4637712836265564,
      "step": 813
    },
    {
      "clip_ratio/high_max": 0.0020850097862421535,
      "clip_ratio/high_mean": 0.0006305483966571046,
      "clip_ratio/low_mean": 0.00039854229316915735,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001029090675729094,
      "completions/clipped_ratio": 0.0647321428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1630.0,
      "completions/mean_length": 711.372802734375,
      "completions/mean_terminated_length": 477.11456298828125,
      "completions/min_length": 148.0,
      "completions/min_terminated_length": 148.0,
      "epoch": 7.606413994169096,
      "grad_norm": 0.20963580906391144,
      "learning_rate": 1e-06,
      "loss": -0.0137,
      "num_tokens": 460410331.0,
      "reward": 0.7444196939468384,
      "reward_std": 0.10870447009801865,
      "rewards/verify_math_reward/mean": 0.7444196343421936,
      "rewards/verify_math_reward/std": 0.43643057346343994,
      "step": 814
    },
    {
      "clip_ratio/high_max": 0.0014944998920327635,
      "clip_ratio/high_mean": 0.0004622150722752849,
      "clip_ratio/low_mean": 0.0004701450361608295,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009323601188953035,
      "completions/clipped_ratio": 0.1149553571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2224.0,
      "completions/mean_length": 961.6875610351562,
      "completions/mean_terminated_length": 554.5825805664062,
      "completions/min_length": 139.0,
      "completions/min_terminated_length": 139.0,
      "epoch": 7.615743440233236,
      "grad_norm": 0.26445794105529785,
      "learning_rate": 1e-06,
      "loss": -0.0436,
      "num_tokens": 460935651.0,
      "reward": 0.6194196939468384,
      "reward_std": 0.13121412694454193,
      "rewards/verify_math_reward/mean": 0.6194196343421936,
      "rewards/verify_math_reward/std": 0.48580074310302734,
      "step": 815
    },
    {
      "clip_ratio/high_max": 0.0018435135825711768,
      "clip_ratio/high_mean": 0.0006467810808317154,
      "clip_ratio/low_mean": 0.0004172930639469996,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010640741493261885,
      "completions/clipped_ratio": 0.0915178571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3279.0,
      "completions/mean_length": 885.1495971679688,
      "completions/mean_terminated_length": 561.69775390625,
      "completions/min_length": 131.0,
      "completions/min_terminated_length": 131.0,
      "epoch": 7.625072886297376,
      "grad_norm": 0.20902347564697266,
      "learning_rate": 1e-06,
      "loss": -0.0386,
      "num_tokens": 461479025.0,
      "reward": 0.6015625,
      "reward_std": 0.1519550383090973,
      "rewards/verify_math_reward/mean": 0.6015625,
      "rewards/verify_math_reward/std": 0.48984986543655396,
      "step": 816
    },
    {
      "clip_ratio/high_max": 0.001077953122148756,
      "clip_ratio/high_mean": 0.0003348792638462328,
      "clip_ratio/low_mean": 0.00040514794545742916,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007400272097584093,
      "completions/clipped_ratio": 0.0881696428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3724.0,
      "completions/mean_length": 860.5000610351562,
      "completions/mean_terminated_length": 547.642578125,
      "completions/min_length": 146.0,
      "completions/min_terminated_length": 146.0,
      "epoch": 7.634402332361516,
      "grad_norm": 0.2130817174911499,
      "learning_rate": 1e-06,
      "loss": -0.0317,
      "num_tokens": 462012081.0,
      "reward": 0.629464328289032,
      "reward_std": 0.11393164098262787,
      "rewards/verify_math_reward/mean": 0.6294642686843872,
      "rewards/verify_math_reward/std": 0.4832179844379425,
      "step": 817
    },
    {
      "clip_ratio/high_max": 0.0017106234954553656,
      "clip_ratio/high_mean": 0.0006426229138014605,
      "clip_ratio/low_mean": 0.00048620864708937006,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001128831565438304,
      "completions/clipped_ratio": 0.09375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3523.0,
      "completions/mean_length": 884.247802734375,
      "completions/mean_terminated_length": 551.9974975585938,
      "completions/min_length": 147.0,
      "completions/min_terminated_length": 147.0,
      "epoch": 7.643731778425656,
      "grad_norm": 0.22409754991531372,
      "learning_rate": 1e-06,
      "loss": -0.0487,
      "num_tokens": 462551719.0,
      "reward": 0.6160714626312256,
      "reward_std": 0.1380895972251892,
      "rewards/verify_math_reward/mean": 0.6160714030265808,
      "rewards/verify_math_reward/std": 0.486612468957901,
      "step": 818
    },
    {
      "clip_ratio/high_max": 0.002189966689911671,
      "clip_ratio/high_mean": 0.0008116880489978939,
      "clip_ratio/low_mean": 0.0004735569018521346,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012852449472120497,
      "completions/clipped_ratio": 0.0658482142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2802.0,
      "completions/mean_length": 813.2377319335938,
      "completions/mean_terminated_length": 581.8363037109375,
      "completions/min_length": 156.0,
      "completions/min_terminated_length": 156.0,
      "epoch": 7.653061224489796,
      "grad_norm": 0.3138972222805023,
      "learning_rate": 1e-06,
      "loss": -0.0353,
      "num_tokens": 463136284.0,
      "reward": 0.7031250596046448,
      "reward_std": 0.1385025680065155,
      "rewards/verify_math_reward/mean": 0.703125,
      "rewards/verify_math_reward/std": 0.4571361541748047,
      "step": 819
    },
    {
      "clip_ratio/high_max": 0.0021604646608466282,
      "clip_ratio/high_mean": 0.0007709093206358375,
      "clip_ratio/low_mean": 0.00033970701042562723,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011106163328804541,
      "completions/clipped_ratio": 0.0636160714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3915.0,
      "completions/mean_length": 742.036865234375,
      "completions/mean_terminated_length": 514.1752319335938,
      "completions/min_length": 118.0,
      "completions/min_terminated_length": 118.0,
      "epoch": 7.662390670553936,
      "grad_norm": 0.32489922642707825,
      "learning_rate": 1e-06,
      "loss": -0.0367,
      "num_tokens": 463650245.0,
      "reward": 0.7053571939468384,
      "reward_std": 0.13981597125530243,
      "rewards/verify_math_reward/mean": 0.7053571343421936,
      "rewards/verify_math_reward/std": 0.45613667368888855,
      "step": 820
    },
    {
      "clip_ratio/high_max": 0.0020404564202181064,
      "clip_ratio/high_mean": 0.0007672004612686578,
      "clip_ratio/low_mean": 0.0005154649734322447,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001282665405597072,
      "completions/clipped_ratio": 0.0915178571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3377.0,
      "completions/mean_length": 891.9342041015625,
      "completions/mean_terminated_length": 569.1658325195312,
      "completions/min_length": 142.0,
      "completions/min_terminated_length": 142.0,
      "epoch": 7.671720116618076,
      "grad_norm": 0.21399322152137756,
      "learning_rate": 1e-06,
      "loss": -0.0578,
      "num_tokens": 464192634.0,
      "reward": 0.637276828289032,
      "reward_std": 0.16702328622341156,
      "rewards/verify_math_reward/mean": 0.6372767686843872,
      "rewards/verify_math_reward/std": 0.481054425239563,
      "step": 821
    },
    {
      "clip_ratio/high_max": 0.0018740105551842134,
      "clip_ratio/high_mean": 0.0006350987941914354,
      "clip_ratio/low_mean": 0.0006803349815527326,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0013154337611922529,
      "completions/clipped_ratio": 0.0970982142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3107.0,
      "completions/mean_length": 895.1495971679688,
      "completions/mean_terminated_length": 550.9295654296875,
      "completions/min_length": 157.0,
      "completions/min_terminated_length": 157.0,
      "epoch": 7.681049562682215,
      "grad_norm": 0.2542259097099304,
      "learning_rate": 1e-06,
      "loss": -0.0367,
      "num_tokens": 464734632.0,
      "reward": 0.5993303656578064,
      "reward_std": 0.13294051587581635,
      "rewards/verify_math_reward/mean": 0.5993303656578064,
      "rewards/verify_math_reward/std": 0.49030786752700806,
      "step": 822
    },
    {
      "clip_ratio/high_max": 0.0019385783598409034,
      "clip_ratio/high_mean": 0.0007336593480431475,
      "clip_ratio/low_mean": 0.0005824763429700397,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0013161357164790388,
      "completions/clipped_ratio": 0.1015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3606.0,
      "completions/mean_length": 876.8225708007812,
      "completions/mean_terminated_length": 512.91552734375,
      "completions/min_length": 142.0,
      "completions/min_terminated_length": 142.0,
      "epoch": 7.690379008746356,
      "grad_norm": 0.2604391574859619,
      "learning_rate": 1e-06,
      "loss": -0.0359,
      "num_tokens": 465233641.0,
      "reward": 0.6383928656578064,
      "reward_std": 0.1440257877111435,
      "rewards/verify_math_reward/mean": 0.6383928656578064,
      "rewards/verify_math_reward/std": 0.4807341992855072,
      "step": 823
    },
    {
      "clip_ratio/high_max": 0.0018905918987002224,
      "clip_ratio/high_mean": 0.0006724767918058205,
      "clip_ratio/low_mean": 0.00041785722532949876,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001090334051696118,
      "completions/clipped_ratio": 0.1026785714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1778.0,
      "completions/mean_length": 899.8292846679688,
      "completions/mean_terminated_length": 534.0982666015625,
      "completions/min_length": 182.0,
      "completions/min_terminated_length": 182.0,
      "epoch": 7.699708454810495,
      "grad_norm": 0.25682181119918823,
      "learning_rate": 1e-06,
      "loss": -0.0169,
      "num_tokens": 465750000.0,
      "reward": 0.6127232313156128,
      "reward_std": 0.1342499852180481,
      "rewards/verify_math_reward/mean": 0.6127232313156128,
      "rewards/verify_math_reward/std": 0.4873998463153839,
      "step": 824
    },
    {
      "clip_ratio/high_max": 0.002026215923251584,
      "clip_ratio/high_mean": 0.0008233674543589586,
      "clip_ratio/low_mean": 0.0006635132303927094,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0014868807265884243,
      "completions/clipped_ratio": 0.0892857142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3730.0,
      "completions/mean_length": 883.513427734375,
      "completions/mean_terminated_length": 568.563720703125,
      "completions/min_length": 163.0,
      "completions/min_terminated_length": 163.0,
      "epoch": 7.709037900874636,
      "grad_norm": 0.24753423035144806,
      "learning_rate": 1e-06,
      "loss": -0.045,
      "num_tokens": 466308996.0,
      "reward": 0.6283482313156128,
      "reward_std": 0.16386334598064423,
      "rewards/verify_math_reward/mean": 0.6283482313156128,
      "rewards/verify_math_reward/std": 0.4835159480571747,
      "step": 825
    },
    {
      "clip_ratio/high_max": 0.001801125647034496,
      "clip_ratio/high_mean": 0.0006470480766438413,
      "clip_ratio/low_mean": 0.00027894574031961383,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009259937905881088,
      "completions/clipped_ratio": 0.0714285714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3068.0,
      "completions/mean_length": 792.7745971679688,
      "completions/mean_terminated_length": 538.6802978515625,
      "completions/min_length": 131.0,
      "completions/min_terminated_length": 131.0,
      "epoch": 7.718367346938775,
      "grad_norm": 0.20461122691631317,
      "learning_rate": 1e-06,
      "loss": -0.0323,
      "num_tokens": 466851290.0,
      "reward": 0.6696428656578064,
      "reward_std": 0.11592015624046326,
      "rewards/verify_math_reward/mean": 0.6696428656578064,
      "rewards/verify_math_reward/std": 0.47060438990592957,
      "step": 826
    },
    {
      "clip_ratio/high_max": 0.0016967389274213929,
      "clip_ratio/high_mean": 0.0006368712183757452,
      "clip_ratio/low_mean": 0.00031680445749771025,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009536756588204298,
      "completions/clipped_ratio": 0.0714285714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2728.0,
      "completions/mean_length": 780.2801513671875,
      "completions/mean_terminated_length": 525.2247924804688,
      "completions/min_length": 125.0,
      "completions/min_terminated_length": 125.0,
      "epoch": 7.727696793002916,
      "grad_norm": 0.275640606880188,
      "learning_rate": 1e-06,
      "loss": -0.0385,
      "num_tokens": 467383877.0,
      "reward": 0.6595982313156128,
      "reward_std": 0.13557478785514832,
      "rewards/verify_math_reward/mean": 0.6595982313156128,
      "rewards/verify_math_reward/std": 0.4741089344024658,
      "step": 827
    },
    {
      "clip_ratio/high_max": 0.0018999132298631594,
      "clip_ratio/high_mean": 0.00069778775832674,
      "clip_ratio/low_mean": 0.00032354430095438147,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010213320383627433,
      "completions/clipped_ratio": 0.0982142857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4025.0,
      "completions/mean_length": 899.1105346679688,
      "completions/mean_terminated_length": 550.9343872070312,
      "completions/min_length": 132.0,
      "completions/min_terminated_length": 132.0,
      "epoch": 7.737026239067055,
      "grad_norm": 0.21681562066078186,
      "learning_rate": 1e-06,
      "loss": -0.0467,
      "num_tokens": 467921776.0,
      "reward": 0.5993303656578064,
      "reward_std": 0.1300095021724701,
      "rewards/verify_math_reward/mean": 0.5993303656578064,
      "rewards/verify_math_reward/std": 0.49030786752700806,
      "step": 828
    },
    {
      "clip_ratio/high_max": 0.0021351910691009834,
      "clip_ratio/high_mean": 0.0007330894768529106,
      "clip_ratio/low_mean": 0.0004994228811483481,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012325123716436792,
      "completions/clipped_ratio": 0.0970982142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3009.0,
      "completions/mean_length": 853.185302734375,
      "completions/mean_terminated_length": 504.452392578125,
      "completions/min_length": 106.0,
      "completions/min_terminated_length": 106.0,
      "epoch": 7.746355685131196,
      "grad_norm": 0.2287919819355011,
      "learning_rate": 1e-06,
      "loss": -0.0492,
      "num_tokens": 468420022.0,
      "reward": 0.6651785969734192,
      "reward_std": 0.1442507952451706,
      "rewards/verify_math_reward/mean": 0.6651785969734192,
      "rewards/verify_math_reward/std": 0.47219157218933105,
      "step": 829
    },
    {
      "clip_ratio/high_max": 0.0017939935605681967,
      "clip_ratio/high_mean": 0.0007073240094541688,
      "clip_ratio/low_mean": 0.000362676294571429,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001070000318577513,
      "completions/clipped_ratio": 0.09375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1993.0,
      "completions/mean_length": 869.2031860351562,
      "completions/mean_terminated_length": 535.3965454101562,
      "completions/min_length": 185.0,
      "completions/min_terminated_length": 185.0,
      "epoch": 7.755685131195335,
      "grad_norm": 0.24683068692684174,
      "learning_rate": 1e-06,
      "loss": -0.0401,
      "num_tokens": 468947580.0,
      "reward": 0.645089328289032,
      "reward_std": 0.12925811111927032,
      "rewards/verify_math_reward/mean": 0.6450892686843872,
      "rewards/verify_math_reward/std": 0.4787535071372986,
      "step": 830
    },
    {
      "clip_ratio/high_max": 0.00162812970665982,
      "clip_ratio/high_mean": 0.000563262915420637,
      "clip_ratio/low_mean": 0.00025965850636566756,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008229214272432728,
      "completions/clipped_ratio": 0.125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2041.0,
      "completions/mean_length": 989.7600708007812,
      "completions/mean_terminated_length": 546.011474609375,
      "completions/min_length": 140.0,
      "completions/min_terminated_length": 140.0,
      "epoch": 7.765014577259475,
      "grad_norm": 0.24280503392219543,
      "learning_rate": 1e-06,
      "loss": -0.0501,
      "num_tokens": 469462389.0,
      "reward": 0.65625,
      "reward_std": 0.1282082498073578,
      "rewards/verify_math_reward/mean": 0.65625,
      "rewards/verify_math_reward/std": 0.4752241373062134,
      "step": 831
    },
    {
      "clip_ratio/high_max": 0.001739942243148107,
      "clip_ratio/high_mean": 0.0005105355357954977,
      "clip_ratio/low_mean": 0.0005892741050956829,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001099809625884518,
      "completions/clipped_ratio": 0.1037946428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4087.0,
      "completions/mean_length": 944.52685546875,
      "completions/mean_terminated_length": 579.5367431640625,
      "completions/min_length": 130.0,
      "completions/min_terminated_length": 130.0,
      "epoch": 7.774344023323615,
      "grad_norm": 0.22110724449157715,
      "learning_rate": 1e-06,
      "loss": -0.0225,
      "num_tokens": 470009301.0,
      "reward": 0.5546875,
      "reward_std": 0.11994129419326782,
      "rewards/verify_math_reward/mean": 0.5546875,
      "rewards/verify_math_reward/std": 0.4972778558731079,
      "step": 832
    },
    {
      "clip_ratio/high_max": 0.0019482617572066374,
      "clip_ratio/high_mean": 0.0007169645268731983,
      "clip_ratio/low_mean": 0.0006119113368185936,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0013288758491398767,
      "completions/clipped_ratio": 0.0669642857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1835.0,
      "completions/mean_length": 755.4375610351562,
      "completions/mean_terminated_length": 515.6842041015625,
      "completions/min_length": 135.0,
      "completions/min_terminated_length": 135.0,
      "epoch": 7.783673469387755,
      "grad_norm": 0.2862670123577118,
      "learning_rate": 1e-06,
      "loss": -0.0185,
      "num_tokens": 470528605.0,
      "reward": 0.6629464626312256,
      "reward_std": 0.14804762601852417,
      "rewards/verify_math_reward/mean": 0.6629464030265808,
      "rewards/verify_math_reward/std": 0.47296738624572754,
      "step": 833
    },
    {
      "clip_ratio/high_max": 0.0015325476088037249,
      "clip_ratio/high_mean": 0.0005396471333369846,
      "clip_ratio/low_mean": 0.00030326443584272056,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008429115623584948,
      "completions/clipped_ratio": 0.0613839285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2519.0,
      "completions/mean_length": 737.5167846679688,
      "completions/mean_terminated_length": 517.8775024414062,
      "completions/min_length": 145.0,
      "completions/min_terminated_length": 145.0,
      "epoch": 7.793002915451895,
      "grad_norm": 0.17456020414829254,
      "learning_rate": 1e-06,
      "loss": -0.0191,
      "num_tokens": 471061236.0,
      "reward": 0.7008928656578064,
      "reward_std": 0.11757621169090271,
      "rewards/verify_math_reward/mean": 0.7008928656578064,
      "rewards/verify_math_reward/std": 0.458122581243515,
      "step": 834
    },
    {
      "clip_ratio/high_max": 0.0020878899449598975,
      "clip_ratio/high_mean": 0.0005363087943806022,
      "clip_ratio/low_mean": 0.00040220899290943635,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009385177891090279,
      "completions/clipped_ratio": 0.0982142857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3485.0,
      "completions/mean_length": 904.0938110351562,
      "completions/mean_terminated_length": 556.4603881835938,
      "completions/min_length": 138.0,
      "completions/min_terminated_length": 138.0,
      "epoch": 7.802332361516035,
      "grad_norm": 0.1644892543554306,
      "learning_rate": 1e-06,
      "loss": -0.0389,
      "num_tokens": 471603496.0,
      "reward": 0.6227678656578064,
      "reward_std": 0.09990689158439636,
      "rewards/verify_math_reward/mean": 0.6227678656578064,
      "rewards/verify_math_reward/std": 0.4849644601345062,
      "step": 835
    },
    {
      "clip_ratio/high_max": 0.001960821384273004,
      "clip_ratio/high_mean": 0.0006039140553184552,
      "clip_ratio/low_mean": 0.0004193444408429059,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010232584754703566,
      "completions/clipped_ratio": 0.0982142857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2880.0,
      "completions/mean_length": 874.4810791015625,
      "completions/mean_terminated_length": 523.6224975585938,
      "completions/min_length": 127.0,
      "completions/min_terminated_length": 127.0,
      "epoch": 7.811661807580175,
      "grad_norm": 0.20616614818572998,
      "learning_rate": 1e-06,
      "loss": -0.0411,
      "num_tokens": 472127255.0,
      "reward": 0.6484375,
      "reward_std": 0.11956290900707245,
      "rewards/verify_math_reward/mean": 0.6484375,
      "rewards/verify_math_reward/std": 0.4777248501777649,
      "step": 836
    },
    {
      "clip_ratio/high_max": 0.0017060250393114984,
      "clip_ratio/high_mean": 0.0006282697186179576,
      "clip_ratio/low_mean": 0.00040576628725830233,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010340360076952493,
      "completions/clipped_ratio": 0.0904017857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4094.0,
      "completions/mean_length": 873.2221069335938,
      "completions/mean_terminated_length": 552.9214477539062,
      "completions/min_length": 178.0,
      "completions/min_terminated_length": 178.0,
      "epoch": 7.820991253644315,
      "grad_norm": 0.21669143438339233,
      "learning_rate": 1e-06,
      "loss": -0.0299,
      "num_tokens": 472663790.0,
      "reward": 0.65625,
      "reward_std": 0.1287720799446106,
      "rewards/verify_math_reward/mean": 0.65625,
      "rewards/verify_math_reward/std": 0.4752241373062134,
      "step": 837
    },
    {
      "clip_ratio/high_max": 0.0015515964187216014,
      "clip_ratio/high_mean": 0.0006001234341965755,
      "clip_ratio/low_mean": 0.00032068414611785556,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009208075716742314,
      "completions/clipped_ratio": 0.0904017857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3323.0,
      "completions/mean_length": 842.8527221679688,
      "completions/mean_terminated_length": 519.5337524414062,
      "completions/min_length": 170.0,
      "completions/min_terminated_length": 170.0,
      "epoch": 7.830320699708455,
      "grad_norm": 0.20231305062770844,
      "learning_rate": 1e-06,
      "loss": -0.0297,
      "num_tokens": 473174354.0,
      "reward": 0.6540178656578064,
      "reward_std": 0.13083365559577942,
      "rewards/verify_math_reward/mean": 0.6540178656578064,
      "rewards/verify_math_reward/std": 0.4759531021118164,
      "step": 838
    },
    {
      "clip_ratio/high_max": 0.0021013850200688466,
      "clip_ratio/high_mean": 0.0007318230218515964,
      "clip_ratio/low_mean": 0.0004379845299808949,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011698075522872387,
      "completions/clipped_ratio": 0.0535714285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1890.0,
      "completions/mean_length": 706.5067138671875,
      "completions/mean_terminated_length": 514.6486206054688,
      "completions/min_length": 95.0,
      "completions/min_terminated_length": 95.0,
      "epoch": 7.839650145772595,
      "grad_norm": 0.20463016629219055,
      "learning_rate": 1e-06,
      "loss": -0.046,
      "num_tokens": 473706056.0,
      "reward": 0.7031250596046448,
      "reward_std": 0.13485799729824066,
      "rewards/verify_math_reward/mean": 0.703125,
      "rewards/verify_math_reward/std": 0.4571361541748047,
      "step": 839
    },
    {
      "clip_ratio/high_max": 0.0016366375748475548,
      "clip_ratio/high_mean": 0.000635799436167872,
      "clip_ratio/low_mean": 0.00037692594651161926,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001012725408145343,
      "completions/clipped_ratio": 0.0859375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3316.0,
      "completions/mean_length": 839.6942138671875,
      "completions/mean_terminated_length": 533.5457763671875,
      "completions/min_length": 154.0,
      "completions/min_terminated_length": 154.0,
      "epoch": 7.848979591836734,
      "grad_norm": 0.21159310638904572,
      "learning_rate": 1e-06,
      "loss": -0.0604,
      "num_tokens": 474227910.0,
      "reward": 0.6395089626312256,
      "reward_std": 0.1457924246788025,
      "rewards/verify_math_reward/mean": 0.6395089030265808,
      "rewards/verify_math_reward/std": 0.4804111123085022,
      "step": 840
    },
    {
      "clip_ratio/high_max": 0.0020121899869991466,
      "clip_ratio/high_mean": 0.0008275412565126317,
      "clip_ratio/low_mean": 0.00043838469673573854,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001265925955522107,
      "completions/clipped_ratio": 0.0736607142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3754.0,
      "completions/mean_length": 757.0011596679688,
      "completions/mean_terminated_length": 491.4903564453125,
      "completions/min_length": 137.0,
      "completions/min_terminated_length": 137.0,
      "epoch": 7.858309037900875,
      "grad_norm": 0.2540360689163208,
      "learning_rate": 1e-06,
      "loss": -0.0622,
      "num_tokens": 474733247.0,
      "reward": 0.691964328289032,
      "reward_std": 0.1593957394361496,
      "rewards/verify_math_reward/mean": 0.6919642686843872,
      "rewards/verify_math_reward/std": 0.4619392454624176,
      "step": 841
    },
    {
      "clip_ratio/high_max": 0.0017584055021870881,
      "clip_ratio/high_mean": 0.0006672156773674942,
      "clip_ratio/low_mean": 0.000606639578791146,
      "clip_ratio/low_min": 2.9315197025425732e-05,
      "clip_ratio/region_mean": 0.0012738552031805739,
      "completions/clipped_ratio": 0.0870535714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3006.0,
      "completions/mean_length": 876.3638916015625,
      "completions/mean_terminated_length": 569.3569946289062,
      "completions/min_length": 138.0,
      "completions/min_terminated_length": 138.0,
      "epoch": 7.867638483965014,
      "grad_norm": 0.2514052093029022,
      "learning_rate": 1e-06,
      "loss": -0.0432,
      "num_tokens": 475283549.0,
      "reward": 0.6160714626312256,
      "reward_std": 0.1576707363128662,
      "rewards/verify_math_reward/mean": 0.6160714030265808,
      "rewards/verify_math_reward/std": 0.486612468957901,
      "step": 842
    },
    {
      "clip_ratio/high_max": 0.0016686324415786657,
      "clip_ratio/high_mean": 0.0005509141410584562,
      "clip_ratio/low_mean": 0.00042700323547251173,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009779173833521781,
      "completions/clipped_ratio": 0.0859375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1800.0,
      "completions/mean_length": 800.6551513671875,
      "completions/mean_terminated_length": 490.8363952636719,
      "completions/min_length": 129.0,
      "completions/min_terminated_length": 129.0,
      "epoch": 7.876967930029155,
      "grad_norm": 0.28914740681648254,
      "learning_rate": 1e-06,
      "loss": -0.0257,
      "num_tokens": 475774712.0,
      "reward": 0.6484375,
      "reward_std": 0.1236218810081482,
      "rewards/verify_math_reward/mean": 0.6484375,
      "rewards/verify_math_reward/std": 0.4777248501777649,
      "step": 843
    },
    {
      "clip_ratio/high_max": 0.0016933851802605204,
      "clip_ratio/high_mean": 0.0006025929124007234,
      "clip_ratio/low_mean": 0.0005692411987183732,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001171834101114655,
      "completions/clipped_ratio": 0.0736607142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1984.0,
      "completions/mean_length": 760.5558471679688,
      "completions/mean_terminated_length": 495.3277282714844,
      "completions/min_length": 150.0,
      "completions/min_terminated_length": 150.0,
      "epoch": 7.886297376093294,
      "grad_norm": 0.2057284116744995,
      "learning_rate": 1e-06,
      "loss": -0.0296,
      "num_tokens": 476268986.0,
      "reward": 0.6908482313156128,
      "reward_std": 0.11768680810928345,
      "rewards/verify_math_reward/mean": 0.6908482313156128,
      "rewards/verify_math_reward/std": 0.46240198612213135,
      "step": 844
    },
    {
      "clip_ratio/high_max": 0.0019498175824992359,
      "clip_ratio/high_mean": 0.0007854042469261913,
      "clip_ratio/low_mean": 0.0005245197789918166,
      "clip_ratio/low_min": 1.3676149137609173e-05,
      "clip_ratio/region_mean": 0.0013099240204610396,
      "completions/clipped_ratio": 0.0669642857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2312.0,
      "completions/mean_length": 774.622802734375,
      "completions/mean_terminated_length": 536.2463989257812,
      "completions/min_length": 159.0,
      "completions/min_terminated_length": 159.0,
      "epoch": 7.895626822157435,
      "grad_norm": 0.2517111897468567,
      "learning_rate": 1e-06,
      "loss": -0.0272,
      "num_tokens": 476814640.0,
      "reward": 0.6339285969734192,
      "reward_std": 0.17307278513908386,
      "rewards/verify_math_reward/mean": 0.6339285969734192,
      "rewards/verify_math_reward/std": 0.48199838399887085,
      "step": 845
    },
    {
      "clip_ratio/high_max": 0.0018377655778749613,
      "clip_ratio/high_mean": 0.0006429817076423205,
      "clip_ratio/low_mean": 0.00043769156400230713,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010806732534547336,
      "completions/clipped_ratio": 0.0837053571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2761.0,
      "completions/mean_length": 818.9564819335938,
      "completions/mean_terminated_length": 519.5919799804688,
      "completions/min_length": 155.0,
      "completions/min_terminated_length": 155.0,
      "epoch": 7.904956268221574,
      "grad_norm": 0.2622203826904297,
      "learning_rate": 1e-06,
      "loss": -0.0288,
      "num_tokens": 477337889.0,
      "reward": 0.660714328289032,
      "reward_std": 0.13121342658996582,
      "rewards/verify_math_reward/mean": 0.6607142686843872,
      "rewards/verify_math_reward/std": 0.4737313687801361,
      "step": 846
    },
    {
      "clip_ratio/high_max": 0.0020022262178827077,
      "clip_ratio/high_mean": 0.0007785756806697464,
      "clip_ratio/low_mean": 0.0007115808166417992,
      "clip_ratio/low_min": 3.995268889411818e-05,
      "clip_ratio/region_mean": 0.0014901565009495243,
      "completions/clipped_ratio": 0.0803571428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3312.0,
      "completions/mean_length": 829.1027221679688,
      "completions/mean_terminated_length": 543.6456298828125,
      "completions/min_length": 99.0,
      "completions/min_terminated_length": 99.0,
      "epoch": 7.914285714285715,
      "grad_norm": 0.4514482021331787,
      "learning_rate": 1e-06,
      "loss": -0.023,
      "num_tokens": 477886685.0,
      "reward": 0.6551339626312256,
      "reward_std": 0.17731650173664093,
      "rewards/verify_math_reward/mean": 0.6551339030265808,
      "rewards/verify_math_reward/std": 0.4755900502204895,
      "step": 847
    },
    {
      "clip_ratio/high_max": 0.002184198223403655,
      "clip_ratio/high_mean": 0.0009001014686873532,
      "clip_ratio/low_mean": 0.00044511800069813034,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0013452194798446726,
      "completions/clipped_ratio": 0.0881696428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3173.0,
      "completions/mean_length": 863.5714721679688,
      "completions/mean_terminated_length": 551.0110473632812,
      "completions/min_length": 171.0,
      "completions/min_terminated_length": 171.0,
      "epoch": 7.923615160349854,
      "grad_norm": 0.5183972120285034,
      "learning_rate": 1e-06,
      "loss": -0.0683,
      "num_tokens": 478428389.0,
      "reward": 0.6283482313156128,
      "reward_std": 0.15675964951515198,
      "rewards/verify_math_reward/mean": 0.6283482313156128,
      "rewards/verify_math_reward/std": 0.4835159480571747,
      "step": 848
    },
    {
      "clip_ratio/high_max": 0.002004287212912459,
      "clip_ratio/high_mean": 0.0007069515486364253,
      "clip_ratio/low_mean": 0.0003856417215502006,
      "clip_ratio/low_min": 2.179218972742092e-05,
      "clip_ratio/region_mean": 0.0010925932874670252,
      "completions/clipped_ratio": 0.0859375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3662.0,
      "completions/mean_length": 847.6585083007812,
      "completions/mean_terminated_length": 542.2588500976562,
      "completions/min_length": 160.0,
      "completions/min_terminated_length": 160.0,
      "epoch": 7.932944606413994,
      "grad_norm": 0.19054283201694489,
      "learning_rate": 1e-06,
      "loss": -0.0416,
      "num_tokens": 478963731.0,
      "reward": 0.6272321939468384,
      "reward_std": 0.1347392499446869,
      "rewards/verify_math_reward/mean": 0.6272321343421936,
      "rewards/verify_math_reward/std": 0.4838111698627472,
      "step": 849
    },
    {
      "clip_ratio/high_max": 0.002129192725988105,
      "clip_ratio/high_mean": 0.0007958323694765568,
      "clip_ratio/low_mean": 0.0004401272362883901,
      "clip_ratio/low_min": 1.14009490062017e-05,
      "clip_ratio/region_mean": 0.0012359595966699999,
      "completions/clipped_ratio": 0.1127232142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3765.0,
      "completions/mean_length": 967.4263916015625,
      "completions/mean_terminated_length": 569.959716796875,
      "completions/min_length": 154.0,
      "completions/min_terminated_length": 154.0,
      "epoch": 7.942274052478134,
      "grad_norm": 0.35868701338768005,
      "learning_rate": 1e-06,
      "loss": -0.0567,
      "num_tokens": 479511625.0,
      "reward": 0.613839328289032,
      "reward_std": 0.17479778826236725,
      "rewards/verify_math_reward/mean": 0.6138392686843872,
      "rewards/verify_math_reward/std": 0.48714008927345276,
      "step": 850
    },
    {
      "clip_ratio/high_max": 0.0016355398001905996,
      "clip_ratio/high_mean": 0.0005937251335126348,
      "clip_ratio/low_mean": 0.00045768182644678745,
      "clip_ratio/low_min": 1.6322799638146535e-05,
      "clip_ratio/region_mean": 0.0010514069799683057,
      "completions/clipped_ratio": 0.0814732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3470.0,
      "completions/mean_length": 852.9609985351562,
      "completions/mean_terminated_length": 565.3037719726562,
      "completions/min_length": 139.0,
      "completions/min_terminated_length": 139.0,
      "epoch": 7.9516034985422746,
      "grad_norm": 0.2254210114479065,
      "learning_rate": 1e-06,
      "loss": -0.0368,
      "num_tokens": 480063022.0,
      "reward": 0.6238839626312256,
      "reward_std": 0.15041157603263855,
      "rewards/verify_math_reward/mean": 0.6238839030265808,
      "rewards/verify_math_reward/std": 0.4846802353858948,
      "step": 851
    },
    {
      "clip_ratio/high_max": 0.0021271073710522614,
      "clip_ratio/high_mean": 0.0008197276983992197,
      "clip_ratio/low_mean": 0.0005710192399419611,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0013907469765399583,
      "completions/clipped_ratio": 0.1071428571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3977.0,
      "completions/mean_length": 958.8170166015625,
      "completions/mean_terminated_length": 582.35498046875,
      "completions/min_length": 141.0,
      "completions/min_terminated_length": 141.0,
      "epoch": 7.960932944606414,
      "grad_norm": 0.31778720021247864,
      "learning_rate": 1e-06,
      "loss": -0.033,
      "num_tokens": 480617722.0,
      "reward": 0.5993303656578064,
      "reward_std": 0.15417630970478058,
      "rewards/verify_math_reward/mean": 0.5993303656578064,
      "rewards/verify_math_reward/std": 0.49030786752700806,
      "step": 852
    },
    {
      "clip_ratio/high_max": 0.0018951149613712914,
      "clip_ratio/high_mean": 0.0008180319509847322,
      "clip_ratio/low_mean": 0.0005928526206844253,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0014108845680311788,
      "completions/clipped_ratio": 0.0859375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2612.0,
      "completions/mean_length": 859.3158569335938,
      "completions/mean_terminated_length": 555.01220703125,
      "completions/min_length": 156.0,
      "completions/min_terminated_length": 156.0,
      "epoch": 7.970262390670554,
      "grad_norm": 0.2911638915538788,
      "learning_rate": 1e-06,
      "loss": -0.0409,
      "num_tokens": 481153149.0,
      "reward": 0.6752232313156128,
      "reward_std": 0.17836636304855347,
      "rewards/verify_math_reward/mean": 0.6752232313156128,
      "rewards/verify_math_reward/std": 0.46855294704437256,
      "step": 853
    },
    {
      "clip_ratio/high_max": 0.0017786150056053884,
      "clip_ratio/high_mean": 0.0006056256024749018,
      "clip_ratio/low_mean": 0.00037553645870502805,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009811620620894246,
      "completions/clipped_ratio": 0.0948660714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4072.0,
      "completions/mean_length": 867.638427734375,
      "completions/mean_terminated_length": 529.2774047851562,
      "completions/min_length": 105.0,
      "completions/min_terminated_length": 105.0,
      "epoch": 7.979591836734694,
      "grad_norm": 0.20771653950214386,
      "learning_rate": 1e-06,
      "loss": -0.0433,
      "num_tokens": 481667841.0,
      "reward": 0.6852678656578064,
      "reward_std": 0.1194855347275734,
      "rewards/verify_math_reward/mean": 0.6852678656578064,
      "rewards/verify_math_reward/std": 0.46466848254203796,
      "step": 854
    },
    {
      "clip_ratio/high_max": 0.0018286980030097766,
      "clip_ratio/high_mean": 0.000561557541914226,
      "clip_ratio/low_mean": 0.0004962916764270631,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010578492365311831,
      "completions/clipped_ratio": 0.0904017857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3810.0,
      "completions/mean_length": 872.4342041015625,
      "completions/mean_terminated_length": 552.0552368164062,
      "completions/min_length": 111.0,
      "completions/min_terminated_length": 111.0,
      "epoch": 7.988921282798834,
      "grad_norm": 0.21716172993183136,
      "learning_rate": 1e-06,
      "loss": -0.0479,
      "num_tokens": 482218878.0,
      "reward": 0.6026785969734192,
      "reward_std": 0.13639894127845764,
      "rewards/verify_math_reward/mean": 0.6026785969734192,
      "rewards/verify_math_reward/std": 0.48961687088012695,
      "step": 855
    },
    {
      "clip_ratio/high_max": 0.0015286402122001164,
      "clip_ratio/high_mean": 0.0005184036726859631,
      "clip_ratio/low_mean": 0.00045618433841809747,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009745880397531437,
      "completions/clipped_ratio": 0.09659090909090906,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2015.0,
      "completions/mean_length": 954.9716186523438,
      "completions/mean_terminated_length": 619.1383666992188,
      "completions/min_length": 197.0,
      "completions/min_terminated_length": 197.0,
      "epoch": 7.998250728862974,
      "grad_norm": 0.6963316798210144,
      "learning_rate": 1e-06,
      "loss": -0.0157,
      "num_tokens": 482733549.0,
      "reward": 0.6707589626312256,
      "reward_std": 0.1159183457493782,
      "rewards/verify_math_reward/mean": 0.6707589030265808,
      "rewards/verify_math_reward/std": 0.4702001214027405,
      "step": 856
    },
    {
      "clip_ratio/high_max": 0.0017244667797058355,
      "clip_ratio/high_mean": 0.0005943860869592754,
      "clip_ratio/low_mean": 0.00042664310967666097,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010210292020929046,
      "completions/clipped_ratio": 0.0870535714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2205.0,
      "completions/mean_length": 869.1629638671875,
      "completions/mean_terminated_length": 561.469482421875,
      "completions/min_length": 149.0,
      "completions/min_terminated_length": 149.0,
      "epoch": 8.00932944606414,
      "grad_norm": 15.352341651916504,
      "learning_rate": 1e-06,
      "loss": -0.0328,
      "num_tokens": 483287247.0,
      "reward": 0.629464328289032,
      "reward_std": 0.13714709877967834,
      "rewards/verify_math_reward/mean": 0.6294642686843872,
      "rewards/verify_math_reward/std": 0.4832179844379425,
      "step": 857
    },
    {
      "clip_ratio/high_max": 0.001815902487578569,
      "clip_ratio/high_mean": 0.000629084113825229,
      "clip_ratio/low_mean": 0.0005884908350708429,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012175749398011249,
      "completions/clipped_ratio": 0.0859375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2103.0,
      "completions/mean_length": 833.4766235351562,
      "completions/mean_terminated_length": 526.7435913085938,
      "completions/min_length": 191.0,
      "completions/min_terminated_length": 191.0,
      "epoch": 8.018658892128279,
      "grad_norm": 0.33955374360084534,
      "learning_rate": 1e-06,
      "loss": -0.0171,
      "num_tokens": 483814506.0,
      "reward": 0.699776828289032,
      "reward_std": 0.13583439588546753,
      "rewards/verify_math_reward/mean": 0.6997767686843872,
      "rewards/verify_math_reward/std": 0.4586109220981598,
      "step": 858
    },
    {
      "clip_ratio/high_max": 0.0015145917486734106,
      "clip_ratio/high_mean": 0.0005256214504925083,
      "clip_ratio/low_mean": 0.0002786210413887602,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008042424833547557,
      "completions/clipped_ratio": 0.1116071428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3935.0,
      "completions/mean_length": 957.14404296875,
      "completions/mean_terminated_length": 562.8153076171875,
      "completions/min_length": 123.0,
      "completions/min_terminated_length": 123.0,
      "epoch": 8.02798833819242,
      "grad_norm": 0.2032623589038849,
      "learning_rate": 1e-06,
      "loss": -0.045,
      "num_tokens": 484347099.0,
      "reward": 0.625,
      "reward_std": 0.09796436131000519,
      "rewards/verify_math_reward/mean": 0.625,
      "rewards/verify_math_reward/std": 0.48439329862594604,
      "step": 859
    },
    {
      "clip_ratio/high_max": 0.001714057958452031,
      "clip_ratio/high_mean": 0.000666359880597156,
      "clip_ratio/low_mean": 0.00044313512262306176,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011094949950347655,
      "completions/clipped_ratio": 0.0870535714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3916.0,
      "completions/mean_length": 909.4699096679688,
      "completions/mean_terminated_length": 605.6198120117188,
      "completions/min_length": 139.0,
      "completions/min_terminated_length": 139.0,
      "epoch": 8.03731778425656,
      "grad_norm": 0.2106010764837265,
      "learning_rate": 1e-06,
      "loss": -0.0455,
      "num_tokens": 484923064.0,
      "reward": 0.6462053656578064,
      "reward_std": 0.1527452915906906,
      "rewards/verify_math_reward/mean": 0.6462053656578064,
      "rewards/verify_math_reward/std": 0.478413462638855,
      "step": 860
    },
    {
      "clip_ratio/high_max": 0.0017888393049361184,
      "clip_ratio/high_mean": 0.00051904266456404,
      "clip_ratio/low_mean": 0.00037209191259535146,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.00089113457397616,
      "completions/clipped_ratio": 0.0837053571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3026.0,
      "completions/mean_length": 814.114990234375,
      "completions/mean_terminated_length": 514.3081665039062,
      "completions/min_length": 141.0,
      "completions/min_terminated_length": 141.0,
      "epoch": 8.0466472303207,
      "grad_norm": 0.22404490411281586,
      "learning_rate": 1e-06,
      "loss": -0.0477,
      "num_tokens": 485430359.0,
      "reward": 0.6741071939468384,
      "reward_std": 0.11652494221925735,
      "rewards/verify_math_reward/mean": 0.6741071343421936,
      "rewards/verify_math_reward/std": 0.4689692258834839,
      "step": 861
    },
    {
      "clip_ratio/high_max": 0.0017333504147245549,
      "clip_ratio/high_mean": 0.0006453966961998958,
      "clip_ratio/low_mean": 0.00027636869162961375,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009217654042004142,
      "completions/clipped_ratio": 0.1283482142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4010.0,
      "completions/mean_length": 991.5469360351562,
      "completions/mean_terminated_length": 534.4251098632812,
      "completions/min_length": 138.0,
      "completions/min_terminated_length": 138.0,
      "epoch": 8.055976676384839,
      "grad_norm": 0.2290908694267273,
      "learning_rate": 1e-06,
      "loss": -0.058,
      "num_tokens": 485929177.0,
      "reward": 0.684151828289032,
      "reward_std": 0.1170462965965271,
      "rewards/verify_math_reward/mean": 0.6841517686843872,
      "rewards/verify_math_reward/std": 0.4651124179363251,
      "step": 862
    },
    {
      "clip_ratio/high_max": 0.001612779811694054,
      "clip_ratio/high_mean": 0.0006354074148475775,
      "clip_ratio/low_mean": 0.0003885918279138423,
      "clip_ratio/low_min": 1.467480615247041e-05,
      "clip_ratio/region_mean": 0.0010239992552669719,
      "completions/clipped_ratio": 0.1037946428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3949.0,
      "completions/mean_length": 916.3136596679688,
      "completions/mean_terminated_length": 548.0560302734375,
      "completions/min_length": 139.0,
      "completions/min_terminated_length": 139.0,
      "epoch": 8.06530612244898,
      "grad_norm": 0.21813301742076874,
      "learning_rate": 1e-06,
      "loss": -0.0322,
      "num_tokens": 486473602.0,
      "reward": 0.6227678656578064,
      "reward_std": 0.1363978087902069,
      "rewards/verify_math_reward/mean": 0.6227678656578064,
      "rewards/verify_math_reward/std": 0.4849644899368286,
      "step": 863
    },
    {
      "clip_ratio/high_max": 0.0016862124830367975,
      "clip_ratio/high_mean": 0.0006248824356589466,
      "clip_ratio/low_mean": 0.0002908805540755566,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009157629938272294,
      "completions/clipped_ratio": 0.0926339285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3890.0,
      "completions/mean_length": 887.0569458007812,
      "completions/mean_terminated_length": 559.45263671875,
      "completions/min_length": 127.0,
      "completions/min_terminated_length": 127.0,
      "epoch": 8.07463556851312,
      "grad_norm": 0.41344738006591797,
      "learning_rate": 1e-06,
      "loss": -0.0673,
      "num_tokens": 487014325.0,
      "reward": 0.6629464626312256,
      "reward_std": 0.12648296356201172,
      "rewards/verify_math_reward/mean": 0.6629464030265808,
      "rewards/verify_math_reward/std": 0.47296738624572754,
      "step": 864
    },
    {
      "clip_ratio/high_max": 0.0019127090854453854,
      "clip_ratio/high_mean": 0.0006246632092370419,
      "clip_ratio/low_mean": 0.0006766343158233212,
      "clip_ratio/low_min": 1.775568125594873e-05,
      "clip_ratio/region_mean": 0.0013012975341553101,
      "completions/clipped_ratio": 0.0792410714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2258.0,
      "completions/mean_length": 799.7299194335938,
      "completions/mean_terminated_length": 516.0509033203125,
      "completions/min_length": 135.0,
      "completions/min_terminated_length": 135.0,
      "epoch": 8.08396501457726,
      "grad_norm": 0.24503915011882782,
      "learning_rate": 1e-06,
      "loss": -0.019,
      "num_tokens": 487537491.0,
      "reward": 0.6629464626312256,
      "reward_std": 0.13185282051563263,
      "rewards/verify_math_reward/mean": 0.6629464030265808,
      "rewards/verify_math_reward/std": 0.47296738624572754,
      "step": 865
    },
    {
      "clip_ratio/high_max": 0.0016169263690244406,
      "clip_ratio/high_mean": 0.0005210784593145945,
      "clip_ratio/low_mean": 0.0003487113672235864,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008697898374521174,
      "completions/clipped_ratio": 0.0725446428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3092.0,
      "completions/mean_length": 775.1886596679688,
      "completions/mean_terminated_length": 515.4380493164062,
      "completions/min_length": 166.0,
      "completions/min_terminated_length": 166.0,
      "epoch": 8.093294460641399,
      "grad_norm": 0.2012886106967926,
      "learning_rate": 1e-06,
      "loss": -0.0259,
      "num_tokens": 488051460.0,
      "reward": 0.6930803656578064,
      "reward_std": 0.1169707253575325,
      "rewards/verify_math_reward/mean": 0.6930803656578064,
      "rewards/verify_math_reward/std": 0.46147337555885315,
      "step": 866
    },
    {
      "clip_ratio/high_max": 0.0017281859545619227,
      "clip_ratio/high_mean": 0.000541291481567896,
      "clip_ratio/low_mean": 0.0003903100564457418,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009316015712101944,
      "completions/clipped_ratio": 0.1395089285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4084.0,
      "completions/mean_length": 1076.1295166015625,
      "completions/mean_terminated_length": 586.526611328125,
      "completions/min_length": 142.0,
      "completions/min_terminated_length": 142.0,
      "epoch": 8.102623906705539,
      "grad_norm": 0.2492036670446396,
      "learning_rate": 1e-06,
      "loss": -0.0466,
      "num_tokens": 488584376.0,
      "reward": 0.59375,
      "reward_std": 0.13981598615646362,
      "rewards/verify_math_reward/mean": 0.59375,
      "rewards/verify_math_reward/std": 0.4914066195487976,
      "step": 867
    },
    {
      "clip_ratio/high_max": 0.0022218978810997214,
      "clip_ratio/high_mean": 0.0008337921426573303,
      "clip_ratio/low_mean": 0.0004655011571230716,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012992933225177694,
      "completions/clipped_ratio": 0.0970982142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2298.0,
      "completions/mean_length": 893.5781860351562,
      "completions/mean_terminated_length": 549.1890869140625,
      "completions/min_length": 141.0,
      "completions/min_terminated_length": 141.0,
      "epoch": 8.11195335276968,
      "grad_norm": 238.7201690673828,
      "learning_rate": 1e-06,
      "loss": -0.0326,
      "num_tokens": 489126470.0,
      "reward": 0.6618303656578064,
      "reward_std": 0.1555984914302826,
      "rewards/verify_math_reward/mean": 0.6618303656578064,
      "rewards/verify_math_reward/std": 0.4733508229255676,
      "step": 868
    },
    {
      "clip_ratio/high_max": 0.0020096220141567755,
      "clip_ratio/high_mean": 0.0007902960333012743,
      "clip_ratio/low_mean": 0.0004401411270009703,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012304371739446651,
      "completions/clipped_ratio": 0.0870535714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3103.0,
      "completions/mean_length": 847.7745971679688,
      "completions/mean_terminated_length": 538.0415649414062,
      "completions/min_length": 160.0,
      "completions/min_terminated_length": 160.0,
      "epoch": 8.12128279883382,
      "grad_norm": 0.2761950194835663,
      "learning_rate": 1e-06,
      "loss": -0.0462,
      "num_tokens": 489648748.0,
      "reward": 0.6484375,
      "reward_std": 0.1641664206981659,
      "rewards/verify_math_reward/mean": 0.6484375,
      "rewards/verify_math_reward/std": 0.4777248501777649,
      "step": 869
    },
    {
      "clip_ratio/high_max": 0.0014475346979452297,
      "clip_ratio/high_mean": 0.000513871299517632,
      "clip_ratio/low_mean": 0.00033935469491552794,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008532259871572023,
      "completions/clipped_ratio": 0.1082589285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3831.0,
      "completions/mean_length": 939.6763916015625,
      "completions/mean_terminated_length": 556.4931030273438,
      "completions/min_length": 118.0,
      "completions/min_terminated_length": 118.0,
      "epoch": 8.130612244897959,
      "grad_norm": 0.3890567719936371,
      "learning_rate": 1e-06,
      "loss": -0.0449,
      "num_tokens": 490178770.0,
      "reward": 0.606026828289032,
      "reward_std": 0.11276091635227203,
      "rewards/verify_math_reward/mean": 0.6060267686843872,
      "rewards/verify_math_reward/std": 0.48890194296836853,
      "step": 870
    },
    {
      "clip_ratio/high_max": 0.0016950298231677152,
      "clip_ratio/high_mean": 0.0005651328647218179,
      "clip_ratio/low_mean": 0.0004706784930021968,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010358113868278451,
      "completions/clipped_ratio": 0.0825892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3064.0,
      "completions/mean_length": 843.036865234375,
      "completions/mean_terminated_length": 550.1909790039062,
      "completions/min_length": 151.0,
      "completions/min_terminated_length": 151.0,
      "epoch": 8.139941690962099,
      "grad_norm": 0.23404917120933533,
      "learning_rate": 1e-06,
      "loss": -0.007,
      "num_tokens": 490726339.0,
      "reward": 0.684151828289032,
      "reward_std": 0.12982404232025146,
      "rewards/verify_math_reward/mean": 0.6841517686843872,
      "rewards/verify_math_reward/std": 0.4651124179363251,
      "step": 871
    },
    {
      "clip_ratio/high_max": 0.0016453908865514677,
      "clip_ratio/high_mean": 0.0006381078073900426,
      "clip_ratio/low_mean": 0.00038285072605503956,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010209585325355874,
      "completions/clipped_ratio": 0.1037946428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3089.0,
      "completions/mean_length": 906.99560546875,
      "completions/mean_terminated_length": 537.6587524414062,
      "completions/min_length": 124.0,
      "completions/min_terminated_length": 124.0,
      "epoch": 8.14927113702624,
      "grad_norm": 0.2166660577058792,
      "learning_rate": 1e-06,
      "loss": -0.0469,
      "num_tokens": 491254479.0,
      "reward": 0.6495535969734192,
      "reward_std": 0.14030200242996216,
      "rewards/verify_math_reward/mean": 0.6495535969734192,
      "rewards/verify_math_reward/std": 0.477376252412796,
      "step": 872
    },
    {
      "clip_ratio/high_max": 0.001982413032237673,
      "clip_ratio/high_mean": 0.0006984776791796321,
      "clip_ratio/low_mean": 0.0003224920515094709,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010209697193204192,
      "completions/clipped_ratio": 0.0848214285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2797.0,
      "completions/mean_length": 839.3013916015625,
      "completions/mean_terminated_length": 537.4609985351562,
      "completions/min_length": 151.0,
      "completions/min_terminated_length": 151.0,
      "epoch": 8.15860058309038,
      "grad_norm": 0.2352529764175415,
      "learning_rate": 1e-06,
      "loss": -0.0565,
      "num_tokens": 491787909.0,
      "reward": 0.7276785969734192,
      "reward_std": 0.14609482884407043,
      "rewards/verify_math_reward/mean": 0.7276785969734192,
      "rewards/verify_math_reward/std": 0.4454030692577362,
      "step": 873
    },
    {
      "clip_ratio/high_max": 0.00204184370522853,
      "clip_ratio/high_mean": 0.000725960026102257,
      "clip_ratio/low_mean": 0.0004920503793073294,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001218010402226355,
      "completions/clipped_ratio": 0.1082589285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3599.0,
      "completions/mean_length": 969.3047485351562,
      "completions/mean_terminated_length": 589.7183837890625,
      "completions/min_length": 133.0,
      "completions/min_terminated_length": 133.0,
      "epoch": 8.167930029154519,
      "grad_norm": 0.24664461612701416,
      "learning_rate": 1e-06,
      "loss": -0.0441,
      "num_tokens": 492351654.0,
      "reward": 0.6316964626312256,
      "reward_std": 0.16499200463294983,
      "rewards/verify_math_reward/mean": 0.6316964030265808,
      "rewards/verify_math_reward/std": 0.4826137125492096,
      "step": 874
    },
    {
      "clip_ratio/high_max": 0.0021251223297440447,
      "clip_ratio/high_mean": 0.000931073915126035,
      "clip_ratio/low_mean": 0.0005393568981162389,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0014704307832289487,
      "completions/clipped_ratio": 0.109375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3360.0,
      "completions/mean_length": 938.8035888671875,
      "completions/mean_terminated_length": 551.0776977539062,
      "completions/min_length": 128.0,
      "completions/min_terminated_length": 128.0,
      "epoch": 8.177259475218658,
      "grad_norm": 0.2628461420536041,
      "learning_rate": 1e-06,
      "loss": -0.0787,
      "num_tokens": 492871910.0,
      "reward": 0.637276828289032,
      "reward_std": 0.1826542466878891,
      "rewards/verify_math_reward/mean": 0.6372767686843872,
      "rewards/verify_math_reward/std": 0.481054425239563,
      "step": 875
    },
    {
      "clip_ratio/high_max": 0.0018862502183765173,
      "clip_ratio/high_mean": 0.0006135445682957652,
      "clip_ratio/low_mean": 0.0003818796196810581,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000995424214124796,
      "completions/clipped_ratio": 0.0926339285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2622.0,
      "completions/mean_length": 826.864990234375,
      "completions/mean_terminated_length": 493.1156005859375,
      "completions/min_length": 131.0,
      "completions/min_terminated_length": 131.0,
      "epoch": 8.186588921282798,
      "grad_norm": 0.25860628485679626,
      "learning_rate": 1e-06,
      "loss": -0.0254,
      "num_tokens": 493365341.0,
      "reward": 0.6863839626312256,
      "reward_std": 0.1401950567960739,
      "rewards/verify_math_reward/mean": 0.6863839030265808,
      "rewards/verify_math_reward/std": 0.46422144770622253,
      "step": 876
    },
    {
      "clip_ratio/high_max": 0.002142851193639217,
      "clip_ratio/high_mean": 0.0007946861342134071,
      "clip_ratio/low_mean": 0.00040247954029837274,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011971656967944,
      "completions/clipped_ratio": 0.0814732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3883.0,
      "completions/mean_length": 815.7678833007812,
      "completions/mean_terminated_length": 524.8117065429688,
      "completions/min_length": 132.0,
      "completions/min_terminated_length": 132.0,
      "epoch": 8.19591836734694,
      "grad_norm": 0.20338168740272522,
      "learning_rate": 1e-06,
      "loss": -0.0478,
      "num_tokens": 493896981.0,
      "reward": 0.6975446939468384,
      "reward_std": 0.138957217335701,
      "rewards/verify_math_reward/mean": 0.6975446343421936,
      "rewards/verify_math_reward/std": 0.45957788825035095,
      "step": 877
    },
    {
      "clip_ratio/high_max": 0.0017884866683743894,
      "clip_ratio/high_mean": 0.0006116581653259345,
      "clip_ratio/low_mean": 0.0003461850456005777,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009578432036505546,
      "completions/clipped_ratio": 0.1272321428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3954.0,
      "completions/mean_length": 999.6339721679688,
      "completions/mean_terminated_length": 548.2455444335938,
      "completions/min_length": 95.0,
      "completions/min_terminated_length": 95.0,
      "epoch": 8.205247813411079,
      "grad_norm": 0.2069510817527771,
      "learning_rate": 1e-06,
      "loss": -0.0338,
      "num_tokens": 494419029.0,
      "reward": 0.637276828289032,
      "reward_std": 0.11761011183261871,
      "rewards/verify_math_reward/mean": 0.6372767686843872,
      "rewards/verify_math_reward/std": 0.481054425239563,
      "step": 878
    },
    {
      "clip_ratio/high_max": 0.0015876740035309922,
      "clip_ratio/high_mean": 0.0005098295314382995,
      "clip_ratio/low_mean": 0.00030483110958812176,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008146606378431898,
      "completions/clipped_ratio": 0.0948660714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3362.0,
      "completions/mean_length": 887.3370971679688,
      "completions/mean_terminated_length": 551.0406494140625,
      "completions/min_length": 154.0,
      "completions/min_terminated_length": 154.0,
      "epoch": 8.214577259475218,
      "grad_norm": 0.2441258728504181,
      "learning_rate": 1e-06,
      "loss": -0.0145,
      "num_tokens": 494958299.0,
      "reward": 0.6595982313156128,
      "reward_std": 0.09296473115682602,
      "rewards/verify_math_reward/mean": 0.6595982313156128,
      "rewards/verify_math_reward/std": 0.4741089344024658,
      "step": 879
    },
    {
      "clip_ratio/high_max": 0.0018043315503746271,
      "clip_ratio/high_mean": 0.0005977879645797657,
      "clip_ratio/low_mean": 0.0004473864946703543,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001045174452883657,
      "completions/clipped_ratio": 0.1194196428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4088.0,
      "completions/mean_length": 1023.6138916015625,
      "completions/mean_terminated_length": 606.953125,
      "completions/min_length": 135.0,
      "completions/min_terminated_length": 135.0,
      "epoch": 8.223906705539358,
      "grad_norm": 0.22276468575000763,
      "learning_rate": 1e-06,
      "loss": -0.035,
      "num_tokens": 495526337.0,
      "reward": 0.609375,
      "reward_std": 0.13872899115085602,
      "rewards/verify_math_reward/mean": 0.609375,
      "rewards/verify_math_reward/std": 0.48816296458244324,
      "step": 880
    },
    {
      "clip_ratio/high_max": 0.002095451029163087,
      "clip_ratio/high_mean": 0.0007548647981820977,
      "clip_ratio/low_mean": 0.0005120580945003894,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012669228817685507,
      "completions/clipped_ratio": 0.0691964285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2106.0,
      "completions/mean_length": 769.6663208007812,
      "completions/mean_terminated_length": 522.3848876953125,
      "completions/min_length": 125.0,
      "completions/min_terminated_length": 125.0,
      "epoch": 8.2332361516035,
      "grad_norm": 0.5053399801254272,
      "learning_rate": 1e-06,
      "loss": -0.0318,
      "num_tokens": 496057430.0,
      "reward": 0.6696428656578064,
      "reward_std": 0.15033671259880066,
      "rewards/verify_math_reward/mean": 0.6696428656578064,
      "rewards/verify_math_reward/std": 0.47060438990592957,
      "step": 881
    },
    {
      "clip_ratio/high_max": 0.0021112427639309317,
      "clip_ratio/high_mean": 0.0008087911592156161,
      "clip_ratio/low_mean": 0.00037163476304158394,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011804258938354906,
      "completions/clipped_ratio": 0.1116071428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3899.0,
      "completions/mean_length": 937.7913208007812,
      "completions/mean_terminated_length": 541.0313720703125,
      "completions/min_length": 121.0,
      "completions/min_terminated_length": 121.0,
      "epoch": 8.242565597667639,
      "grad_norm": 0.22142240405082703,
      "learning_rate": 1e-06,
      "loss": -0.0599,
      "num_tokens": 496591603.0,
      "reward": 0.6316964626312256,
      "reward_std": 0.1648743599653244,
      "rewards/verify_math_reward/mean": 0.6316964030265808,
      "rewards/verify_math_reward/std": 0.4826137125492096,
      "step": 882
    },
    {
      "clip_ratio/high_max": 0.0015066460800881032,
      "clip_ratio/high_mean": 0.0005286981004246627,
      "clip_ratio/low_mean": 0.0001926557949900598,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007213538719952339,
      "completions/clipped_ratio": 0.1127232142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3166.0,
      "completions/mean_length": 944.7020263671875,
      "completions/mean_terminated_length": 544.348388671875,
      "completions/min_length": 124.0,
      "completions/min_terminated_length": 124.0,
      "epoch": 8.251895043731778,
      "grad_norm": 0.17582453787326813,
      "learning_rate": 1e-06,
      "loss": -0.03,
      "num_tokens": 497114072.0,
      "reward": 0.6071428656578064,
      "reward_std": 0.09622910618782043,
      "rewards/verify_math_reward/mean": 0.6071428656578064,
      "rewards/verify_math_reward/std": 0.48865824937820435,
      "step": 883
    },
    {
      "clip_ratio/high_max": 0.0020494664859143086,
      "clip_ratio/high_mean": 0.0006650339419138618,
      "clip_ratio/low_mean": 0.00041972394319600426,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010847578341781627,
      "completions/clipped_ratio": 0.0892857142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3307.0,
      "completions/mean_length": 872.8348388671875,
      "completions/mean_terminated_length": 556.8382568359375,
      "completions/min_length": 132.0,
      "completions/min_terminated_length": 132.0,
      "epoch": 8.261224489795918,
      "grad_norm": 0.24951384961605072,
      "learning_rate": 1e-06,
      "loss": -0.0565,
      "num_tokens": 497651252.0,
      "reward": 0.6495535969734192,
      "reward_std": 0.1560874581336975,
      "rewards/verify_math_reward/mean": 0.6495535969734192,
      "rewards/verify_math_reward/std": 0.477376252412796,
      "step": 884
    },
    {
      "clip_ratio/high_max": 0.0019271996352472343,
      "clip_ratio/high_mean": 0.0007097693446667108,
      "clip_ratio/low_mean": 0.0005364949629438343,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012462643389881123,
      "completions/clipped_ratio": 0.1116071428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3198.0,
      "completions/mean_length": 932.59716796875,
      "completions/mean_terminated_length": 535.1846923828125,
      "completions/min_length": 124.0,
      "completions/min_terminated_length": 124.0,
      "epoch": 8.270553935860057,
      "grad_norm": 0.4013597369194031,
      "learning_rate": 1e-06,
      "loss": -0.0687,
      "num_tokens": 498174699.0,
      "reward": 0.6495535969734192,
      "reward_std": 0.1720215380191803,
      "rewards/verify_math_reward/mean": 0.6495535969734192,
      "rewards/verify_math_reward/std": 0.477376252412796,
      "step": 885
    },
    {
      "clip_ratio/high_max": 0.0023901965796540026,
      "clip_ratio/high_mean": 0.0008991915856313426,
      "clip_ratio/low_mean": 0.00040031771254689374,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012995092693017796,
      "completions/clipped_ratio": 0.1283482142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3876.0,
      "completions/mean_length": 1019.1406860351562,
      "completions/mean_terminated_length": 566.0819702148438,
      "completions/min_length": 109.0,
      "completions/min_terminated_length": 109.0,
      "epoch": 8.279883381924199,
      "grad_norm": 0.23164652287960052,
      "learning_rate": 1e-06,
      "loss": -0.0341,
      "num_tokens": 498707593.0,
      "reward": 0.6439732313156128,
      "reward_std": 0.1468448042869568,
      "rewards/verify_math_reward/mean": 0.6439732313156128,
      "rewards/verify_math_reward/std": 0.47909072041511536,
      "step": 886
    },
    {
      "clip_ratio/high_max": 0.0022972855367697775,
      "clip_ratio/high_mean": 0.0008382612049899762,
      "clip_ratio/low_mean": 0.0004574144759317278,
      "clip_ratio/low_min": 3.371089405845851e-05,
      "clip_ratio/region_mean": 0.0012956756545463577,
      "completions/clipped_ratio": 0.0848214285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2977.0,
      "completions/mean_length": 838.8549194335938,
      "completions/mean_terminated_length": 536.97314453125,
      "completions/min_length": 140.0,
      "completions/min_terminated_length": 140.0,
      "epoch": 8.289212827988338,
      "grad_norm": 0.28572994470596313,
      "learning_rate": 1e-06,
      "loss": -0.0696,
      "num_tokens": 499232447.0,
      "reward": 0.6975446939468384,
      "reward_std": 0.16679435968399048,
      "rewards/verify_math_reward/mean": 0.6975446343421936,
      "rewards/verify_math_reward/std": 0.45957791805267334,
      "step": 887
    },
    {
      "clip_ratio/high_max": 0.0016594358821748756,
      "clip_ratio/high_mean": 0.000605644958341145,
      "clip_ratio/low_mean": 0.0003100996875673445,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009157446475001052,
      "completions/clipped_ratio": 0.1026785714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3959.0,
      "completions/mean_length": 914.5491333007812,
      "completions/mean_terminated_length": 550.5025024414062,
      "completions/min_length": 176.0,
      "completions/min_terminated_length": 176.0,
      "epoch": 8.298542274052478,
      "grad_norm": 0.20085084438323975,
      "learning_rate": 1e-06,
      "loss": -0.0582,
      "num_tokens": 499765051.0,
      "reward": 0.6495535969734192,
      "reward_std": 0.12414927780628204,
      "rewards/verify_math_reward/mean": 0.6495535969734192,
      "rewards/verify_math_reward/std": 0.477376252412796,
      "step": 888
    },
    {
      "clip_ratio/high_max": 0.0019504541869537206,
      "clip_ratio/high_mean": 0.0006448357589761144,
      "clip_ratio/low_mean": 0.00034611516866789316,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009909509235512814,
      "completions/clipped_ratio": 0.1060267857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2123.0,
      "completions/mean_length": 894.7344360351562,
      "completions/mean_terminated_length": 515.0586547851562,
      "completions/min_length": 141.0,
      "completions/min_terminated_length": 141.0,
      "epoch": 8.307871720116617,
      "grad_norm": 0.20388734340667725,
      "learning_rate": 1e-06,
      "loss": -0.057,
      "num_tokens": 500264101.0,
      "reward": 0.7064732313156128,
      "reward_std": 0.1365479677915573,
      "rewards/verify_math_reward/mean": 0.7064732313156128,
      "rewards/verify_math_reward/std": 0.4556320011615753,
      "step": 889
    },
    {
      "clip_ratio/high_max": 0.0018714383641054155,
      "clip_ratio/high_mean": 0.0006585944420294254,
      "clip_ratio/low_mean": 0.0004814585936401272,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011400530456739943,
      "completions/clipped_ratio": 0.1116071428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4079.0,
      "completions/mean_length": 944.4051513671875,
      "completions/mean_terminated_length": 548.4761352539062,
      "completions/min_length": 134.0,
      "completions/min_terminated_length": 134.0,
      "epoch": 8.317201166180759,
      "grad_norm": 0.22173303365707397,
      "learning_rate": 1e-06,
      "loss": -0.0317,
      "num_tokens": 500794280.0,
      "reward": 0.660714328289032,
      "reward_std": 0.11419377475976944,
      "rewards/verify_math_reward/mean": 0.6607142686843872,
      "rewards/verify_math_reward/std": 0.4737313687801361,
      "step": 890
    },
    {
      "clip_ratio/high_max": 0.0020545174193102866,
      "clip_ratio/high_mean": 0.0006875173803564394,
      "clip_ratio/low_mean": 0.0004447611241857885,
      "clip_ratio/low_min": 9.802383829082828e-06,
      "clip_ratio/region_mean": 0.0011322784885123838,
      "completions/clipped_ratio": 0.1305803571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4050.0,
      "completions/mean_length": 1030.0648193359375,
      "completions/mean_terminated_length": 569.5841064453125,
      "completions/min_length": 124.0,
      "completions/min_terminated_length": 124.0,
      "epoch": 8.326530612244898,
      "grad_norm": 0.2118057757616043,
      "learning_rate": 1e-06,
      "loss": -0.0397,
      "num_tokens": 501333930.0,
      "reward": 0.629464328289032,
      "reward_std": 0.14004239439964294,
      "rewards/verify_math_reward/mean": 0.6294642686843872,
      "rewards/verify_math_reward/std": 0.4832179844379425,
      "step": 891
    },
    {
      "clip_ratio/high_max": 0.0016472196075483225,
      "clip_ratio/high_mean": 0.0005582850362770841,
      "clip_ratio/low_mean": 0.00046393736238314887,
      "clip_ratio/low_min": 1.8021914002019912e-05,
      "clip_ratio/region_mean": 0.0010222224009339698,
      "completions/clipped_ratio": 0.0948660714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3394.0,
      "completions/mean_length": 864.5424194335938,
      "completions/mean_terminated_length": 525.85693359375,
      "completions/min_length": 141.0,
      "completions/min_terminated_length": 141.0,
      "epoch": 8.335860058309038,
      "grad_norm": 0.22242428362369537,
      "learning_rate": 1e-06,
      "loss": -0.0211,
      "num_tokens": 501848384.0,
      "reward": 0.6026785969734192,
      "reward_std": 0.1345216929912567,
      "rewards/verify_math_reward/mean": 0.6026785969734192,
      "rewards/verify_math_reward/std": 0.48961687088012695,
      "step": 892
    },
    {
      "clip_ratio/high_max": 0.001975229210074758,
      "clip_ratio/high_mean": 0.0006894999714859296,
      "clip_ratio/low_mean": 0.0004100064293197647,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010995063894370105,
      "completions/clipped_ratio": 0.0870535714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2552.0,
      "completions/mean_length": 858.8270263671875,
      "completions/mean_terminated_length": 550.14794921875,
      "completions/min_length": 145.0,
      "completions/min_terminated_length": 145.0,
      "epoch": 8.345189504373177,
      "grad_norm": 0.25533241033554077,
      "learning_rate": 1e-06,
      "loss": -0.0423,
      "num_tokens": 502379541.0,
      "reward": 0.6618303656578064,
      "reward_std": 0.13947898149490356,
      "rewards/verify_math_reward/mean": 0.6618303656578064,
      "rewards/verify_math_reward/std": 0.4733508229255676,
      "step": 893
    },
    {
      "clip_ratio/high_max": 0.0020232624028722057,
      "clip_ratio/high_mean": 0.0006215312896529213,
      "clip_ratio/low_mean": 0.0004123197107901433,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010338510055589722,
      "completions/clipped_ratio": 0.09375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3723.0,
      "completions/mean_length": 851.575927734375,
      "completions/mean_terminated_length": 515.94580078125,
      "completions/min_length": 135.0,
      "completions/min_terminated_length": 135.0,
      "epoch": 8.354518950437317,
      "grad_norm": 0.21104665100574493,
      "learning_rate": 1e-06,
      "loss": -0.0408,
      "num_tokens": 502891985.0,
      "reward": 0.621651828289032,
      "reward_std": 0.12482510507106781,
      "rewards/verify_math_reward/mean": 0.6216517686843872,
      "rewards/verify_math_reward/std": 0.4852459728717804,
      "step": 894
    },
    {
      "clip_ratio/high_max": 0.002325372952327598,
      "clip_ratio/high_mean": 0.0008392198560613906,
      "clip_ratio/low_mean": 0.00047660489872214384,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0013158247311366722,
      "completions/clipped_ratio": 0.1104910714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3856.0,
      "completions/mean_length": 978.0256958007812,
      "completions/mean_terminated_length": 590.7239379882812,
      "completions/min_length": 140.0,
      "completions/min_terminated_length": 140.0,
      "epoch": 8.363848396501458,
      "grad_norm": 0.22268977761268616,
      "learning_rate": 1e-06,
      "loss": -0.0614,
      "num_tokens": 503446264.0,
      "reward": 0.6160714626312256,
      "reward_std": 0.18321487307548523,
      "rewards/verify_math_reward/mean": 0.6160714030265808,
      "rewards/verify_math_reward/std": 0.486612468957901,
      "step": 895
    },
    {
      "clip_ratio/high_max": 0.0014690818279632367,
      "clip_ratio/high_mean": 0.000583587230721605,
      "clip_ratio/low_mean": 0.0002704414960135182,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008540287126379553,
      "completions/clipped_ratio": 0.0982142857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2168.0,
      "completions/mean_length": 898.5011596679688,
      "completions/mean_terminated_length": 550.2586669921875,
      "completions/min_length": 130.0,
      "completions/min_terminated_length": 130.0,
      "epoch": 8.373177842565598,
      "grad_norm": 0.19972148537635803,
      "learning_rate": 1e-06,
      "loss": -0.034,
      "num_tokens": 503974257.0,
      "reward": 0.6875000596046448,
      "reward_std": 0.11208761483430862,
      "rewards/verify_math_reward/mean": 0.6875,
      "rewards/verify_math_reward/std": 0.4637712836265564,
      "step": 896
    },
    {
      "clip_ratio/high_max": 0.002112543603288941,
      "clip_ratio/high_mean": 0.0007068553941280697,
      "clip_ratio/low_mean": 0.00047607089641132916,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001182926273031626,
      "completions/clipped_ratio": 0.0959821428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2223.0,
      "completions/mean_length": 834.950927734375,
      "completions/mean_terminated_length": 488.716064453125,
      "completions/min_length": 141.0,
      "completions/min_terminated_length": 141.0,
      "epoch": 8.382507288629737,
      "grad_norm": 0.21627932786941528,
      "learning_rate": 1e-06,
      "loss": -0.0355,
      "num_tokens": 504470837.0,
      "reward": 0.6819196939468384,
      "reward_std": 0.14861243963241577,
      "rewards/verify_math_reward/mean": 0.6819196343421936,
      "rewards/verify_math_reward/std": 0.46599099040031433,
      "step": 897
    },
    {
      "clip_ratio/high_max": 0.001663959090365097,
      "clip_ratio/high_mean": 0.0005463961751956958,
      "clip_ratio/low_mean": 0.0003685870410663483,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009149832330876961,
      "completions/clipped_ratio": 0.1194196428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4071.0,
      "completions/mean_length": 970.8035888671875,
      "completions/mean_terminated_length": 546.98095703125,
      "completions/min_length": 152.0,
      "completions/min_terminated_length": 152.0,
      "epoch": 8.391836734693877,
      "grad_norm": 0.21270903944969177,
      "learning_rate": 1e-06,
      "loss": -0.0478,
      "num_tokens": 504979421.0,
      "reward": 0.684151828289032,
      "reward_std": 0.12294787913560867,
      "rewards/verify_math_reward/mean": 0.6841517686843872,
      "rewards/verify_math_reward/std": 0.4651124179363251,
      "step": 898
    },
    {
      "clip_ratio/high_max": 0.002173752825910924,
      "clip_ratio/high_mean": 0.0009471107168792514,
      "clip_ratio/low_mean": 0.0005112791423016461,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0014583898519049399,
      "completions/clipped_ratio": 0.078125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3959.0,
      "completions/mean_length": 794.5469360351562,
      "completions/mean_terminated_length": 514.7627563476562,
      "completions/min_length": 145.0,
      "completions/min_terminated_length": 145.0,
      "epoch": 8.401166180758018,
      "grad_norm": 0.2854195237159729,
      "learning_rate": 1e-06,
      "loss": -0.0351,
      "num_tokens": 505496719.0,
      "reward": 0.6830357313156128,
      "reward_std": 0.15169471502304077,
      "rewards/verify_math_reward/mean": 0.6830357313156128,
      "rewards/verify_math_reward/std": 0.46555325388908386,
      "step": 899
    },
    {
      "clip_ratio/high_max": 0.002184569697419647,
      "clip_ratio/high_mean": 0.0009239231985702645,
      "clip_ratio/low_mean": 0.0003347971370430969,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012587203418661375,
      "completions/clipped_ratio": 0.0625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1900.0,
      "completions/mean_length": 750.4832763671875,
      "completions/mean_terminated_length": 527.4488525390625,
      "completions/min_length": 149.0,
      "completions/min_terminated_length": 149.0,
      "epoch": 8.410495626822158,
      "grad_norm": 0.2775871157646179,
      "learning_rate": 1e-06,
      "loss": -0.0097,
      "num_tokens": 506020176.0,
      "reward": 0.723214328289032,
      "reward_std": 0.1670200526714325,
      "rewards/verify_math_reward/mean": 0.7232142686843872,
      "rewards/verify_math_reward/std": 0.44765952229499817,
      "step": 900
    },
    {
      "clip_ratio/high_max": 0.002224650677817408,
      "clip_ratio/high_mean": 0.0008865884065016871,
      "clip_ratio/low_mean": 0.0003944913078157697,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012810797088604886,
      "completions/clipped_ratio": 0.09375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3087.0,
      "completions/mean_length": 878.513427734375,
      "completions/mean_terminated_length": 545.669921875,
      "completions/min_length": 141.0,
      "completions/min_terminated_length": 141.0,
      "epoch": 8.419825072886297,
      "grad_norm": 2.284661293029785,
      "learning_rate": 1e-06,
      "loss": -0.0575,
      "num_tokens": 506546380.0,
      "reward": 0.6718750596046448,
      "reward_std": 0.1646900177001953,
      "rewards/verify_math_reward/mean": 0.671875,
      "rewards/verify_math_reward/std": 0.46979284286499023,
      "step": 901
    },
    {
      "clip_ratio/high_max": 0.001945417607203126,
      "clip_ratio/high_mean": 0.0006544553716594237,
      "clip_ratio/low_mean": 0.0003661346249828057,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010205899889115244,
      "completions/clipped_ratio": 0.0948660714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2685.0,
      "completions/mean_length": 842.9967041015625,
      "completions/mean_terminated_length": 502.0530090332031,
      "completions/min_length": 116.0,
      "completions/min_terminated_length": 116.0,
      "epoch": 8.429154518950437,
      "grad_norm": 0.21726374328136444,
      "learning_rate": 1e-06,
      "loss": -0.0618,
      "num_tokens": 507034673.0,
      "reward": 0.7287946939468384,
      "reward_std": 0.13752618432044983,
      "rewards/verify_math_reward/mean": 0.7287946343421936,
      "rewards/verify_math_reward/std": 0.44483017921447754,
      "step": 902
    },
    {
      "clip_ratio/high_max": 0.0020464137851377018,
      "clip_ratio/high_mean": 0.0007443920094374334,
      "clip_ratio/low_mean": 0.0003264422643951548,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010708342997531872,
      "completions/clipped_ratio": 0.1149553571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3798.0,
      "completions/mean_length": 941.388427734375,
      "completions/mean_terminated_length": 531.6469116210938,
      "completions/min_length": 140.0,
      "completions/min_terminated_length": 140.0,
      "epoch": 8.438483965014576,
      "grad_norm": 0.5861804485321045,
      "learning_rate": 1e-06,
      "loss": -0.0497,
      "num_tokens": 507543701.0,
      "reward": 0.6741071939468384,
      "reward_std": 0.13854604959487915,
      "rewards/verify_math_reward/mean": 0.6741071343421936,
      "rewards/verify_math_reward/std": 0.4689692556858063,
      "step": 903
    },
    {
      "clip_ratio/high_max": 0.0020555760729621397,
      "clip_ratio/high_mean": 0.0006632981621805811,
      "clip_ratio/low_mean": 0.0002628438535339228,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009261420582333812,
      "completions/clipped_ratio": 0.1037946428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4090.0,
      "completions/mean_length": 945.7589721679688,
      "completions/mean_terminated_length": 580.9115600585938,
      "completions/min_length": 164.0,
      "completions/min_terminated_length": 164.0,
      "epoch": 8.447813411078718,
      "grad_norm": 0.21933381259441376,
      "learning_rate": 1e-06,
      "loss": -0.031,
      "num_tokens": 508106973.0,
      "reward": 0.6395089626312256,
      "reward_std": 0.11888891458511353,
      "rewards/verify_math_reward/mean": 0.6395089030265808,
      "rewards/verify_math_reward/std": 0.4804111421108246,
      "step": 904
    },
    {
      "clip_ratio/high_max": 0.002727092505665496,
      "clip_ratio/high_mean": 0.0008832131898088846,
      "clip_ratio/low_mean": 0.00044514592991617974,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0013283591251820326,
      "completions/clipped_ratio": 0.1082589285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2117.0,
      "completions/mean_length": 908.4420166015625,
      "completions/mean_terminated_length": 521.4668579101562,
      "completions/min_length": 141.0,
      "completions/min_terminated_length": 141.0,
      "epoch": 8.457142857142857,
      "grad_norm": 0.2372286021709442,
      "learning_rate": 1e-06,
      "loss": -0.0575,
      "num_tokens": 508619489.0,
      "reward": 0.6886160969734192,
      "reward_std": 0.13755826652050018,
      "rewards/verify_math_reward/mean": 0.6886160969734192,
      "rewards/verify_math_reward/std": 0.46331802010536194,
      "step": 905
    },
    {
      "clip_ratio/high_max": 0.0016336179396603256,
      "clip_ratio/high_mean": 0.0005315077278282843,
      "clip_ratio/low_mean": 0.00032338370465367916,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008548914356651949,
      "completions/clipped_ratio": 0.1049107142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2829.0,
      "completions/mean_length": 890.54248046875,
      "completions/mean_terminated_length": 514.8403930664062,
      "completions/min_length": 153.0,
      "completions/min_terminated_length": 153.0,
      "epoch": 8.466472303206997,
      "grad_norm": 0.2048484832048416,
      "learning_rate": 1e-06,
      "loss": -0.028,
      "num_tokens": 509118311.0,
      "reward": 0.6975446939468384,
      "reward_std": 0.10788102447986603,
      "rewards/verify_math_reward/mean": 0.6975446343421936,
      "rewards/verify_math_reward/std": 0.45957788825035095,
      "step": 906
    },
    {
      "clip_ratio/high_max": 0.0014587766454496887,
      "clip_ratio/high_mean": 0.0005412365135271102,
      "clip_ratio/low_mean": 0.0004826612894248683,
      "clip_ratio/low_min": 1.4945002476451918e-05,
      "clip_ratio/region_mean": 0.0010238978393317666,
      "completions/clipped_ratio": 0.0948660714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4069.0,
      "completions/mean_length": 885.6641235351562,
      "completions/mean_terminated_length": 549.1923217773438,
      "completions/min_length": 145.0,
      "completions/min_terminated_length": 145.0,
      "epoch": 8.475801749271136,
      "grad_norm": 0.2174452394247055,
      "learning_rate": 1e-06,
      "loss": -0.0302,
      "num_tokens": 509649154.0,
      "reward": 0.606026828289032,
      "reward_std": 0.1304224729537964,
      "rewards/verify_math_reward/mean": 0.6060267686843872,
      "rewards/verify_math_reward/std": 0.48890194296836853,
      "step": 907
    },
    {
      "clip_ratio/high_max": 0.0020289599560783245,
      "clip_ratio/high_mean": 0.000711862368916627,
      "clip_ratio/low_mean": 0.00047021469345054356,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.00118207704872475,
      "completions/clipped_ratio": 0.1383928571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3752.0,
      "completions/mean_length": 1061.375,
      "completions/mean_terminated_length": 573.9481811523438,
      "completions/min_length": 180.0,
      "completions/min_terminated_length": 180.0,
      "epoch": 8.485131195335278,
      "grad_norm": 0.24835245311260223,
      "learning_rate": 1e-06,
      "loss": -0.0591,
      "num_tokens": 510175474.0,
      "reward": 0.6350446939468384,
      "reward_std": 0.14920946955680847,
      "rewards/verify_math_reward/mean": 0.6350446343421936,
      "rewards/verify_math_reward/std": 0.481686532497406,
      "step": 908
    },
    {
      "clip_ratio/high_max": 0.0018362417395110242,
      "clip_ratio/high_mean": 0.0006620414878852898,
      "clip_ratio/low_mean": 0.0003299538598184881,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009919953627104405,
      "completions/clipped_ratio": 0.0959821428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2415.0,
      "completions/mean_length": 860.3170166015625,
      "completions/mean_terminated_length": 516.7753295898438,
      "completions/min_length": 146.0,
      "completions/min_terminated_length": 146.0,
      "epoch": 8.494460641399417,
      "grad_norm": 0.22667130827903748,
      "learning_rate": 1e-06,
      "loss": -0.0244,
      "num_tokens": 510680118.0,
      "reward": 0.660714328289032,
      "reward_std": 0.14849267899990082,
      "rewards/verify_math_reward/mean": 0.6607142686843872,
      "rewards/verify_math_reward/std": 0.4737313687801361,
      "step": 909
    },
    {
      "clip_ratio/high_max": 0.0022997462438070215,
      "clip_ratio/high_mean": 0.0007791659063514089,
      "clip_ratio/low_mean": 0.0003796070168391452,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011587728877202608,
      "completions/clipped_ratio": 0.1026785714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2895.0,
      "completions/mean_length": 906.6897583007812,
      "completions/mean_terminated_length": 541.7437744140625,
      "completions/min_length": 169.0,
      "completions/min_terminated_length": 169.0,
      "epoch": 8.503790087463557,
      "grad_norm": 0.3134598731994629,
      "learning_rate": 1e-06,
      "loss": -0.0246,
      "num_tokens": 511215744.0,
      "reward": 0.6305803656578064,
      "reward_std": 0.14091253280639648,
      "rewards/verify_math_reward/mean": 0.6305803656578064,
      "rewards/verify_math_reward/std": 0.4829172194004059,
      "step": 910
    },
    {
      "clip_ratio/high_max": 0.0016868900565896183,
      "clip_ratio/high_mean": 0.0005501292662302149,
      "clip_ratio/low_mean": 0.00036749553237314103,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000917624767680536,
      "completions/clipped_ratio": 0.0580357142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2369.0,
      "completions/mean_length": 702.9408569335938,
      "completions/mean_terminated_length": 493.88983154296875,
      "completions/min_length": 127.0,
      "completions/min_terminated_length": 127.0,
      "epoch": 8.513119533527696,
      "grad_norm": 0.241309255361557,
      "learning_rate": 1e-06,
      "loss": -0.0348,
      "num_tokens": 511721443.0,
      "reward": 0.7287946939468384,
      "reward_std": 0.12366396188735962,
      "rewards/verify_math_reward/mean": 0.7287946343421936,
      "rewards/verify_math_reward/std": 0.44483017921447754,
      "step": 911
    },
    {
      "clip_ratio/high_max": 0.0015970728927641176,
      "clip_ratio/high_mean": 0.0004294130103517091,
      "clip_ratio/low_mean": 0.0005405297551988042,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009699427773739444,
      "completions/clipped_ratio": 0.1183035714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3722.0,
      "completions/mean_length": 985.3460083007812,
      "completions/mean_terminated_length": 567.9671020507812,
      "completions/min_length": 176.0,
      "completions/min_terminated_length": 176.0,
      "epoch": 8.522448979591836,
      "grad_norm": 0.1997060477733612,
      "learning_rate": 1e-06,
      "loss": -0.0433,
      "num_tokens": 512264337.0,
      "reward": 0.5345982313156128,
      "reward_std": 0.11272881925106049,
      "rewards/verify_math_reward/mean": 0.5345982313156128,
      "rewards/verify_math_reward/std": 0.4990801215171814,
      "step": 912
    },
    {
      "clip_ratio/high_max": 0.0018318583206564654,
      "clip_ratio/high_mean": 0.0007385027111013187,
      "clip_ratio/low_mean": 0.0005207276090004598,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012592303100973368,
      "completions/clipped_ratio": 0.1060267857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3824.0,
      "completions/mean_length": 937.3973388671875,
      "completions/mean_terminated_length": 562.781494140625,
      "completions/min_length": 129.0,
      "completions/min_terminated_length": 129.0,
      "epoch": 8.531778425655977,
      "grad_norm": 0.24116960167884827,
      "learning_rate": 1e-06,
      "loss": -0.0569,
      "num_tokens": 512800413.0,
      "reward": 0.637276828289032,
      "reward_std": 0.15315966308116913,
      "rewards/verify_math_reward/mean": 0.6372767686843872,
      "rewards/verify_math_reward/std": 0.481054425239563,
      "step": 913
    },
    {
      "clip_ratio/high_max": 0.0018709901087277103,
      "clip_ratio/high_mean": 0.0006881550307298312,
      "clip_ratio/low_mean": 0.0003869075301281555,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010750625551736448,
      "completions/clipped_ratio": 0.1037946428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2539.0,
      "completions/mean_length": 855.3984985351562,
      "completions/mean_terminated_length": 480.0859069824219,
      "completions/min_length": 129.0,
      "completions/min_terminated_length": 129.0,
      "epoch": 8.541107871720117,
      "grad_norm": 0.22416174411773682,
      "learning_rate": 1e-06,
      "loss": -0.0634,
      "num_tokens": 513280186.0,
      "reward": 0.6908482313156128,
      "reward_std": 0.14774663746356964,
      "rewards/verify_math_reward/mean": 0.6908482313156128,
      "rewards/verify_math_reward/std": 0.46240198612213135,
      "step": 914
    },
    {
      "clip_ratio/high_max": 0.0021866236202185974,
      "clip_ratio/high_mean": 0.0006154387301648967,
      "clip_ratio/low_mean": 0.000243381276504806,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008588199998484924,
      "completions/clipped_ratio": 0.0770089285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1616.0,
      "completions/mean_length": 781.6763916015625,
      "completions/mean_terminated_length": 505.14874267578125,
      "completions/min_length": 129.0,
      "completions/min_terminated_length": 129.0,
      "epoch": 8.550437317784256,
      "grad_norm": 0.21724767982959747,
      "learning_rate": 1e-06,
      "loss": -0.0356,
      "num_tokens": 513786520.0,
      "reward": 0.7020089626312256,
      "reward_std": 0.11768428236246109,
      "rewards/verify_math_reward/mean": 0.7020089030265808,
      "rewards/verify_math_reward/std": 0.45763099193573,
      "step": 915
    },
    {
      "clip_ratio/high_max": 0.001643316831177799,
      "clip_ratio/high_mean": 0.0006447542600653833,
      "clip_ratio/low_mean": 0.0003459509134700056,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009907052044582088,
      "completions/clipped_ratio": 0.1171875,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2622.0,
      "completions/mean_length": 1011.30810546875,
      "completions/mean_terminated_length": 601.835693359375,
      "completions/min_length": 176.0,
      "completions/min_terminated_length": 176.0,
      "epoch": 8.559766763848396,
      "grad_norm": 0.2337225377559662,
      "learning_rate": 1e-06,
      "loss": -0.0576,
      "num_tokens": 514361588.0,
      "reward": 0.543526828289032,
      "reward_std": 0.1481991559267044,
      "rewards/verify_math_reward/mean": 0.5435267686843872,
      "rewards/verify_math_reward/std": 0.49838000535964966,
      "step": 916
    },
    {
      "clip_ratio/high_max": 0.0019296219725219999,
      "clip_ratio/high_mean": 0.0006793836455472047,
      "clip_ratio/low_mean": 0.0003670189353215392,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001046402583597228,
      "completions/clipped_ratio": 0.1171875,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2867.0,
      "completions/mean_length": 949.7176513671875,
      "completions/mean_terminated_length": 532.069580078125,
      "completions/min_length": 141.0,
      "completions/min_terminated_length": 141.0,
      "epoch": 8.569096209912537,
      "grad_norm": 0.23076514899730682,
      "learning_rate": 1e-06,
      "loss": -0.0488,
      "num_tokens": 514870863.0,
      "reward": 0.6205357313156128,
      "reward_std": 0.13429418206214905,
      "rewards/verify_math_reward/mean": 0.6205357313156128,
      "rewards/verify_math_reward/std": 0.4855247139930725,
      "step": 917
    },
    {
      "clip_ratio/high_max": 0.0016397672079619952,
      "clip_ratio/high_mean": 0.0005472286702570273,
      "clip_ratio/low_mean": 0.000468710894438118,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010159395496884827,
      "completions/clipped_ratio": 0.109375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2430.0,
      "completions/mean_length": 926.8660888671875,
      "completions/mean_terminated_length": 537.6741943359375,
      "completions/min_length": 170.0,
      "completions/min_terminated_length": 170.0,
      "epoch": 8.578425655976677,
      "grad_norm": 0.21286757290363312,
      "learning_rate": 1e-06,
      "loss": -0.0291,
      "num_tokens": 515393903.0,
      "reward": 0.6194196939468384,
      "reward_std": 0.1350095570087433,
      "rewards/verify_math_reward/mean": 0.6194196343421936,
      "rewards/verify_math_reward/std": 0.48580074310302734,
      "step": 918
    },
    {
      "clip_ratio/high_max": 0.0015756159627926536,
      "clip_ratio/high_mean": 0.000557405535801081,
      "clip_ratio/low_mean": 0.0005766614485764876,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011340670062054414,
      "completions/clipped_ratio": 0.0669642857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2044.0,
      "completions/mean_length": 742.7142944335938,
      "completions/mean_terminated_length": 502.0478210449219,
      "completions/min_length": 145.0,
      "completions/min_terminated_length": 145.0,
      "epoch": 8.587755102040816,
      "grad_norm": 6717.05322265625,
      "learning_rate": 1e-06,
      "loss": 0.2463,
      "num_tokens": 515909135.0,
      "reward": 0.7087053656578064,
      "reward_std": 0.15733122825622559,
      "rewards/verify_math_reward/mean": 0.7087053656578064,
      "rewards/verify_math_reward/std": 0.45461276173591614,
      "step": 919
    },
    {
      "clip_ratio/high_max": 0.002001937074965099,
      "clip_ratio/high_mean": 0.0007161392595662619,
      "clip_ratio/low_mean": 0.000455452853202587,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011715921100403648,
      "completions/clipped_ratio": 0.1138392857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3228.0,
      "completions/mean_length": 972.87060546875,
      "completions/mean_terminated_length": 571.6624755859375,
      "completions/min_length": 141.0,
      "completions/min_terminated_length": 141.0,
      "epoch": 8.597084548104956,
      "grad_norm": 0.21933098137378693,
      "learning_rate": 1e-06,
      "loss": -0.0416,
      "num_tokens": 516445763.0,
      "reward": 0.6194196939468384,
      "reward_std": 0.16037102043628693,
      "rewards/verify_math_reward/mean": 0.6194196343421936,
      "rewards/verify_math_reward/std": 0.48580074310302734,
      "step": 920
    },
    {
      "clip_ratio/high_max": 0.0021662593208020553,
      "clip_ratio/high_mean": 0.0007581191330245929,
      "clip_ratio/low_mean": 0.0004433449021234992,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012014640124107245,
      "completions/clipped_ratio": 0.1261160714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3997.0,
      "completions/mean_length": 962.029052734375,
      "completions/mean_terminated_length": 509.74456787109375,
      "completions/min_length": 120.0,
      "completions/min_terminated_length": 120.0,
      "epoch": 8.606413994169095,
      "grad_norm": 0.23349839448928833,
      "learning_rate": 1e-06,
      "loss": -0.0566,
      "num_tokens": 516927573.0,
      "reward": 0.668526828289032,
      "reward_std": 0.1288815438747406,
      "rewards/verify_math_reward/mean": 0.6685267686843872,
      "rewards/verify_math_reward/std": 0.4710056483745575,
      "step": 921
    },
    {
      "clip_ratio/high_max": 0.001967736450751545,
      "clip_ratio/high_mean": 0.0006042885925126029,
      "clip_ratio/low_mean": 0.0004970809841324808,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011013695402652957,
      "completions/clipped_ratio": 0.0625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2446.0,
      "completions/mean_length": 787.2064819335938,
      "completions/mean_terminated_length": 566.6202392578125,
      "completions/min_length": 120.0,
      "completions/min_terminated_length": 120.0,
      "epoch": 8.615743440233237,
      "grad_norm": 0.26429736614227295,
      "learning_rate": 1e-06,
      "loss": -0.033,
      "num_tokens": 517487646.0,
      "reward": 0.6316964626312256,
      "reward_std": 0.14624707400798798,
      "rewards/verify_math_reward/mean": 0.6316964030265808,
      "rewards/verify_math_reward/std": 0.4826137125492096,
      "step": 922
    },
    {
      "clip_ratio/high_max": 0.0020967237251170445,
      "clip_ratio/high_mean": 0.0007870002773415763,
      "clip_ratio/low_mean": 0.0003176267155140522,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011046270083170384,
      "completions/clipped_ratio": 0.125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3753.0,
      "completions/mean_length": 974.7824096679688,
      "completions/mean_terminated_length": 528.8941040039062,
      "completions/min_length": 131.0,
      "completions/min_terminated_length": 131.0,
      "epoch": 8.625072886297376,
      "grad_norm": 0.23378008604049683,
      "learning_rate": 1e-06,
      "loss": -0.0492,
      "num_tokens": 517998483.0,
      "reward": 0.6674107313156128,
      "reward_std": 0.13219164311885834,
      "rewards/verify_math_reward/mean": 0.6674107313156128,
      "rewards/verify_math_reward/std": 0.47140392661094666,
      "step": 923
    },
    {
      "clip_ratio/high_max": 0.0022776820114813745,
      "clip_ratio/high_mean": 0.0009351541120850015,
      "clip_ratio/low_mean": 0.0004110058025617036,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0013461599010042846,
      "completions/clipped_ratio": 0.1037946428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2214.0,
      "completions/mean_length": 871.1205444335938,
      "completions/mean_terminated_length": 497.6288757324219,
      "completions/min_length": 155.0,
      "completions/min_terminated_length": 155.0,
      "epoch": 8.634402332361516,
      "grad_norm": 0.2799464166164398,
      "learning_rate": 1e-06,
      "loss": -0.0368,
      "num_tokens": 518490399.0,
      "reward": 0.6551339626312256,
      "reward_std": 0.15030533075332642,
      "rewards/verify_math_reward/mean": 0.6551339030265808,
      "rewards/verify_math_reward/std": 0.4755900502204895,
      "step": 924
    },
    {
      "clip_ratio/high_max": 0.001753624834236689,
      "clip_ratio/high_mean": 0.0005756567447861016,
      "clip_ratio/low_mean": 0.0005050633108112379,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010807200560520869,
      "completions/clipped_ratio": 0.1127232142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3961.0,
      "completions/mean_length": 943.169677734375,
      "completions/mean_terminated_length": 542.6213989257812,
      "completions/min_length": 132.0,
      "completions/min_terminated_length": 132.0,
      "epoch": 8.643731778425655,
      "grad_norm": 0.31731176376342773,
      "learning_rate": 1e-06,
      "loss": -0.0348,
      "num_tokens": 519013759.0,
      "reward": 0.6272321939468384,
      "reward_std": 0.14522789418697357,
      "rewards/verify_math_reward/mean": 0.6272321343421936,
      "rewards/verify_math_reward/std": 0.4838111698627472,
      "step": 925
    },
    {
      "clip_ratio/high_max": 0.0019128205167362466,
      "clip_ratio/high_mean": 0.0006273483968470828,
      "clip_ratio/low_mean": 0.000485394918541715,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011127433135698084,
      "completions/clipped_ratio": 0.1082589285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2648.0,
      "completions/mean_length": 910.247802734375,
      "completions/mean_terminated_length": 523.4918823242188,
      "completions/min_length": 127.0,
      "completions/min_terminated_length": 127.0,
      "epoch": 8.653061224489797,
      "grad_norm": 0.2529394030570984,
      "learning_rate": 1e-06,
      "loss": -0.0441,
      "num_tokens": 519519909.0,
      "reward": 0.6551339626312256,
      "reward_std": 0.15259189903736115,
      "rewards/verify_math_reward/mean": 0.6551339030265808,
      "rewards/verify_math_reward/std": 0.4755900502204895,
      "step": 926
    },
    {
      "clip_ratio/high_max": 0.001869158870249521,
      "clip_ratio/high_mean": 0.0006110826516305679,
      "clip_ratio/low_mean": 0.0002701722187339328,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008812548621790484,
      "completions/clipped_ratio": 0.1194196428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2848.0,
      "completions/mean_length": 936.6428833007812,
      "completions/mean_terminated_length": 508.18756103515625,
      "completions/min_length": 140.0,
      "completions/min_terminated_length": 140.0,
      "epoch": 8.662390670553936,
      "grad_norm": 0.20568110048770905,
      "learning_rate": 1e-06,
      "loss": -0.0268,
      "num_tokens": 520012085.0,
      "reward": 0.621651828289032,
      "reward_std": 0.11314070224761963,
      "rewards/verify_math_reward/mean": 0.6216517686843872,
      "rewards/verify_math_reward/std": 0.4852459728717804,
      "step": 927
    },
    {
      "clip_ratio/high_max": 0.0019109445747744758,
      "clip_ratio/high_mean": 0.0006054648492863635,
      "clip_ratio/low_mean": 0.0004211364412185503,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010266013396176277,
      "completions/clipped_ratio": 0.1328125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3126.0,
      "completions/mean_length": 1008.6328735351562,
      "completions/mean_terminated_length": 535.7927856445312,
      "completions/min_length": 156.0,
      "completions/min_terminated_length": 156.0,
      "epoch": 8.671720116618076,
      "grad_norm": 0.25297918915748596,
      "learning_rate": 1e-06,
      "loss": -0.0326,
      "num_tokens": 520517500.0,
      "reward": 0.5926339626312256,
      "reward_std": 0.13929352164268494,
      "rewards/verify_math_reward/mean": 0.5926339030265808,
      "rewards/verify_math_reward/std": 0.49161848425865173,
      "step": 928
    },
    {
      "clip_ratio/high_max": 0.0017755872031557374,
      "clip_ratio/high_mean": 0.000656154577882262,
      "clip_ratio/low_mean": 0.0004934472926834133,
      "clip_ratio/low_min": 1.6850903193699196e-05,
      "clip_ratio/region_mean": 0.0011496018632897176,
      "completions/clipped_ratio": 0.1082589285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3596.0,
      "completions/mean_length": 926.3795166015625,
      "completions/mean_terminated_length": 541.5819702148438,
      "completions/min_length": 144.0,
      "completions/min_terminated_length": 144.0,
      "epoch": 8.681049562682215,
      "grad_norm": 0.28964167833328247,
      "learning_rate": 1e-06,
      "loss": -0.0405,
      "num_tokens": 521038920.0,
      "reward": 0.6328125,
      "reward_std": 0.16314727067947388,
      "rewards/verify_math_reward/mean": 0.6328125,
      "rewards/verify_math_reward/std": 0.48230743408203125,
      "step": 929
    },
    {
      "clip_ratio/high_max": 0.002014971847529523,
      "clip_ratio/high_mean": 0.0006176638726174133,
      "clip_ratio/low_mean": 0.00044335930124361767,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010610231838654727,
      "completions/clipped_ratio": 0.1372767857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2858.0,
      "completions/mean_length": 995.2109985351562,
      "completions/mean_terminated_length": 501.81243896484375,
      "completions/min_length": 142.0,
      "completions/min_terminated_length": 142.0,
      "epoch": 8.690379008746355,
      "grad_norm": 0.24830204248428345,
      "learning_rate": 1e-06,
      "loss": -0.0385,
      "num_tokens": 521513133.0,
      "reward": 0.684151828289032,
      "reward_std": 0.11930189281702042,
      "rewards/verify_math_reward/mean": 0.6841517686843872,
      "rewards/verify_math_reward/std": 0.4651124179363251,
      "step": 930
    },
    {
      "clip_ratio/high_max": 0.0019216491382394452,
      "clip_ratio/high_mean": 0.0007386029656117898,
      "clip_ratio/low_mean": 0.0005430868377516163,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012816898488381412,
      "completions/clipped_ratio": 0.140625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4077.0,
      "completions/mean_length": 1096.6685791015625,
      "completions/mean_terminated_length": 605.8688354492188,
      "completions/min_length": 150.0,
      "completions/min_terminated_length": 150.0,
      "epoch": 8.699708454810496,
      "grad_norm": 0.2941707968711853,
      "learning_rate": 1e-06,
      "loss": -0.0534,
      "num_tokens": 522073092.0,
      "reward": 0.5948660969734192,
      "reward_std": 0.16905026137828827,
      "rewards/verify_math_reward/mean": 0.5948660969734192,
      "rewards/verify_math_reward/std": 0.491192102432251,
      "step": 931
    },
    {
      "clip_ratio/high_max": 0.001703739591903286,
      "clip_ratio/high_mean": 0.000559276007606968,
      "clip_ratio/low_mean": 0.0004476770636756555,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010069530726468656,
      "completions/clipped_ratio": 0.1004464285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3916.0,
      "completions/mean_length": 907.87841796875,
      "completions/mean_terminated_length": 551.8845825195312,
      "completions/min_length": 143.0,
      "completions/min_terminated_length": 143.0,
      "epoch": 8.709037900874636,
      "grad_norm": 0.23361043632030487,
      "learning_rate": 1e-06,
      "loss": -0.0452,
      "num_tokens": 522611519.0,
      "reward": 0.6082589626312256,
      "reward_std": 0.1391758769750595,
      "rewards/verify_math_reward/mean": 0.6082589030265808,
      "rewards/verify_math_reward/std": 0.4884119927883148,
      "step": 932
    },
    {
      "clip_ratio/high_max": 0.0016184208434424363,
      "clip_ratio/high_mean": 0.0005095614214951638,
      "clip_ratio/low_mean": 0.0004677145607274724,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009772759967745515,
      "completions/clipped_ratio": 0.1316964285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3048.0,
      "completions/mean_length": 1032.6529541015625,
      "completions/mean_terminated_length": 568.0321044921875,
      "completions/min_length": 138.0,
      "completions/min_terminated_length": 138.0,
      "epoch": 8.718367346938775,
      "grad_norm": 0.29861971735954285,
      "learning_rate": 1e-06,
      "loss": -0.0439,
      "num_tokens": 523142136.0,
      "reward": 0.6149553656578064,
      "reward_std": 0.12133137136697769,
      "rewards/verify_math_reward/mean": 0.6149553656578064,
      "rewards/verify_math_reward/std": 0.4868776500225067,
      "step": 933
    },
    {
      "clip_ratio/high_max": 0.0016488937071699183,
      "clip_ratio/high_mean": 0.00060257361838012,
      "clip_ratio/low_mean": 0.0005295234968798468,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011320971370878397,
      "completions/clipped_ratio": 0.1305803571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2773.0,
      "completions/mean_length": 1023.216552734375,
      "completions/mean_terminated_length": 561.7073364257812,
      "completions/min_length": 152.0,
      "completions/min_terminated_length": 152.0,
      "epoch": 8.727696793002915,
      "grad_norm": 0.29928725957870483,
      "learning_rate": 1e-06,
      "loss": -0.0473,
      "num_tokens": 523665874.0,
      "reward": 0.6049107313156128,
      "reward_std": 0.13042065501213074,
      "rewards/verify_math_reward/mean": 0.6049107313156128,
      "rewards/verify_math_reward/std": 0.48914292454719543,
      "step": 934
    },
    {
      "clip_ratio/high_max": 0.002083321516693104,
      "clip_ratio/high_mean": 0.0008094834902294679,
      "clip_ratio/low_mean": 0.0005263916391413659,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0013358751093619503,
      "completions/clipped_ratio": 0.1238839285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2732.0,
      "completions/mean_length": 977.9129638671875,
      "completions/mean_terminated_length": 537.011474609375,
      "completions/min_length": 92.0,
      "completions/min_terminated_length": 92.0,
      "epoch": 8.737026239067056,
      "grad_norm": 0.2581687569618225,
      "learning_rate": 1e-06,
      "loss": -0.0427,
      "num_tokens": 524166892.0,
      "reward": 0.6517857313156128,
      "reward_std": 0.1559034138917923,
      "rewards/verify_math_reward/mean": 0.6517857313156128,
      "rewards/verify_math_reward/std": 0.47667041420936584,
      "step": 935
    },
    {
      "clip_ratio/high_max": 0.0020345321245258674,
      "clip_ratio/high_mean": 0.0008072499858826632,
      "clip_ratio/low_mean": 0.0005050225477134518,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0013122725431458093,
      "completions/clipped_ratio": 0.0948660714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3878.0,
      "completions/mean_length": 855.7857666015625,
      "completions/mean_terminated_length": 516.1824951171875,
      "completions/min_length": 164.0,
      "completions/min_terminated_length": 164.0,
      "epoch": 8.746355685131196,
      "grad_norm": 0.6368719339370728,
      "learning_rate": 1e-06,
      "loss": -0.0442,
      "num_tokens": 524673628.0,
      "reward": 0.6171875,
      "reward_std": 0.15942852199077606,
      "rewards/verify_math_reward/mean": 0.6171875,
      "rewards/verify_math_reward/std": 0.4863446056842804,
      "step": 936
    },
    {
      "clip_ratio/high_max": 0.0015030265221867012,
      "clip_ratio/high_mean": 0.0005101862302581139,
      "clip_ratio/low_mean": 0.00029400829180303845,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008041945093282266,
      "completions/clipped_ratio": 0.0948660714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3443.0,
      "completions/mean_length": 864.7701416015625,
      "completions/mean_terminated_length": 526.1085205078125,
      "completions/min_length": 112.0,
      "completions/min_terminated_length": 112.0,
      "epoch": 8.755685131195335,
      "grad_norm": 0.17452889680862427,
      "learning_rate": 1e-06,
      "loss": -0.0331,
      "num_tokens": 525193518.0,
      "reward": 0.7020089626312256,
      "reward_std": 0.10333602875471115,
      "rewards/verify_math_reward/mean": 0.7020089030265808,
      "rewards/verify_math_reward/std": 0.45763099193573,
      "step": 937
    },
    {
      "clip_ratio/high_max": 0.0017282160697504878,
      "clip_ratio/high_mean": 0.0007196230753834243,
      "clip_ratio/low_mean": 0.000523139514370996,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012427625842974521,
      "completions/clipped_ratio": 0.140625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2514.0,
      "completions/mean_length": 1067.56591796875,
      "completions/mean_terminated_length": 572.00390625,
      "completions/min_length": 155.0,
      "completions/min_terminated_length": 155.0,
      "epoch": 8.765014577259475,
      "grad_norm": 0.2921406924724579,
      "learning_rate": 1e-06,
      "loss": -0.0736,
      "num_tokens": 525720633.0,
      "reward": 0.613839328289032,
      "reward_std": 0.16927596926689148,
      "rewards/verify_math_reward/mean": 0.6138392686843872,
      "rewards/verify_math_reward/std": 0.48714008927345276,
      "step": 938
    },
    {
      "clip_ratio/high_max": 0.002149793963326374,
      "clip_ratio/high_mean": 0.0007308811664188397,
      "clip_ratio/low_mean": 0.00017927868805145408,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009101598698180169,
      "completions/clipped_ratio": 0.0680803571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1987.0,
      "completions/mean_length": 713.154052734375,
      "completions/mean_terminated_length": 466.0239562988281,
      "completions/min_length": 152.0,
      "completions/min_terminated_length": 152.0,
      "epoch": 8.774344023323614,
      "grad_norm": 0.25510385632514954,
      "learning_rate": 1e-06,
      "loss": -0.0557,
      "num_tokens": 526193739.0,
      "reward": 0.738839328289032,
      "reward_std": 0.11866319924592972,
      "rewards/verify_math_reward/mean": 0.7388392686843872,
      "rewards/verify_math_reward/std": 0.439512699842453,
      "step": 939
    },
    {
      "clip_ratio/high_max": 0.002407035426585935,
      "clip_ratio/high_mean": 0.00088339581816399,
      "clip_ratio/low_mean": 0.0004453325391295948,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001328728350927122,
      "completions/clipped_ratio": 0.1450892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3440.0,
      "completions/mean_length": 1051.930908203125,
      "completions/mean_terminated_length": 535.3133544921875,
      "completions/min_length": 142.0,
      "completions/min_terminated_length": 142.0,
      "epoch": 8.783673469387756,
      "grad_norm": 0.2580684721469879,
      "learning_rate": 1e-06,
      "loss": -0.0513,
      "num_tokens": 526697469.0,
      "reward": 0.625,
      "reward_std": 0.15030603110790253,
      "rewards/verify_math_reward/mean": 0.625,
      "rewards/verify_math_reward/std": 0.48439329862594604,
      "step": 940
    },
    {
      "clip_ratio/high_max": 0.0016847862389113288,
      "clip_ratio/high_mean": 0.0005231068853390752,
      "clip_ratio/low_mean": 0.00046479639240715187,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009879032804747112,
      "completions/clipped_ratio": 0.0926339285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3600.0,
      "completions/mean_length": 839.4877319335938,
      "completions/mean_terminated_length": 507.02703857421875,
      "completions/min_length": 124.0,
      "completions/min_terminated_length": 124.0,
      "epoch": 8.793002915451895,
      "grad_norm": 0.275890052318573,
      "learning_rate": 1e-06,
      "loss": -0.0487,
      "num_tokens": 527188746.0,
      "reward": 0.6752232313156128,
      "reward_std": 0.14004167914390564,
      "rewards/verify_math_reward/mean": 0.6752232313156128,
      "rewards/verify_math_reward/std": 0.46855294704437256,
      "step": 941
    },
    {
      "clip_ratio/high_max": 0.002201137878728332,
      "clip_ratio/high_mean": 0.0007997457887540804,
      "clip_ratio/low_mean": 0.00037658151813957375,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011763273359974846,
      "completions/clipped_ratio": 0.1149553571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3335.0,
      "completions/mean_length": 924.200927734375,
      "completions/mean_terminated_length": 512.2269897460938,
      "completions/min_length": 160.0,
      "completions/min_terminated_length": 160.0,
      "epoch": 8.802332361516035,
      "grad_norm": 0.2549758851528168,
      "learning_rate": 1e-06,
      "loss": -0.0494,
      "num_tokens": 527680038.0,
      "reward": 0.6417410969734192,
      "reward_std": 0.13260029256343842,
      "rewards/verify_math_reward/mean": 0.6417410969734192,
      "rewards/verify_math_reward/std": 0.47975656390190125,
      "step": 942
    },
    {
      "clip_ratio/high_max": 0.0018307962527615018,
      "clip_ratio/high_mean": 0.0006916166012160829,
      "clip_ratio/low_mean": 0.0003792502388932917,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010708668378356379,
      "completions/clipped_ratio": 0.0703125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3613.0,
      "completions/mean_length": 781.2344360351562,
      "completions/mean_terminated_length": 530.5377807617188,
      "completions/min_length": 142.0,
      "completions/min_terminated_length": 142.0,
      "epoch": 8.811661807580174,
      "grad_norm": 0.21261122822761536,
      "learning_rate": 1e-06,
      "loss": -0.0325,
      "num_tokens": 528212112.0,
      "reward": 0.6741071939468384,
      "reward_std": 0.126027911901474,
      "rewards/verify_math_reward/mean": 0.6741071343421936,
      "rewards/verify_math_reward/std": 0.4689692556858063,
      "step": 943
    },
    {
      "clip_ratio/high_max": 0.0016578540744376369,
      "clip_ratio/high_mean": 0.0006712314716423862,
      "clip_ratio/low_mean": 0.0002848696722139721,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009561011211189907,
      "completions/clipped_ratio": 0.1127232142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3646.0,
      "completions/mean_length": 962.4420166015625,
      "completions/mean_terminated_length": 564.3421020507812,
      "completions/min_length": 154.0,
      "completions/min_terminated_length": 154.0,
      "epoch": 8.820991253644316,
      "grad_norm": 0.2269987165927887,
      "learning_rate": 1e-06,
      "loss": -0.0505,
      "num_tokens": 528746356.0,
      "reward": 0.6350446939468384,
      "reward_std": 0.14681048691272736,
      "rewards/verify_math_reward/mean": 0.6350446343421936,
      "rewards/verify_math_reward/std": 0.481686532497406,
      "step": 944
    },
    {
      "clip_ratio/high_max": 0.002148511994164437,
      "clip_ratio/high_mean": 0.0007235466182464734,
      "clip_ratio/low_mean": 0.0003367772960700677,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010603239170450252,
      "completions/clipped_ratio": 0.1015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2865.0,
      "completions/mean_length": 890.5558471679688,
      "completions/mean_terminated_length": 528.2012329101562,
      "completions/min_length": 122.0,
      "completions/min_terminated_length": 122.0,
      "epoch": 8.830320699708455,
      "grad_norm": 0.23371781408786774,
      "learning_rate": 1e-06,
      "loss": -0.0521,
      "num_tokens": 529255430.0,
      "reward": 0.6551339626312256,
      "reward_std": 0.13041996955871582,
      "rewards/verify_math_reward/mean": 0.6551339030265808,
      "rewards/verify_math_reward/std": 0.4755900800228119,
      "step": 945
    },
    {
      "clip_ratio/high_max": 0.0021029542622272857,
      "clip_ratio/high_mean": 0.000874880613082496,
      "clip_ratio/low_mean": 0.0004403387274578563,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0013152193387213629,
      "completions/clipped_ratio": 0.1049107142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3647.0,
      "completions/mean_length": 892.3114013671875,
      "completions/mean_terminated_length": 516.8167114257812,
      "completions/min_length": 146.0,
      "completions/min_terminated_length": 146.0,
      "epoch": 8.839650145772595,
      "grad_norm": 0.2372330278158188,
      "learning_rate": 1e-06,
      "loss": -0.082,
      "num_tokens": 529753661.0,
      "reward": 0.6696428656578064,
      "reward_std": 0.1595045030117035,
      "rewards/verify_math_reward/mean": 0.6696428656578064,
      "rewards/verify_math_reward/std": 0.47060438990592957,
      "step": 946
    },
    {
      "clip_ratio/high_max": 0.001694159542239504,
      "clip_ratio/high_mean": 0.0005554514391405974,
      "clip_ratio/low_mean": 0.0002972530833176279,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008527045392838772,
      "completions/clipped_ratio": 0.0993303571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3460.0,
      "completions/mean_length": 883.1272583007812,
      "completions/mean_terminated_length": 528.7955322265625,
      "completions/min_length": 173.0,
      "completions/min_terminated_length": 173.0,
      "epoch": 8.848979591836734,
      "grad_norm": 0.20902705192565918,
      "learning_rate": 1e-06,
      "loss": -0.0318,
      "num_tokens": 530262919.0,
      "reward": 0.640625,
      "reward_std": 0.11396483331918716,
      "rewards/verify_math_reward/mean": 0.640625,
      "rewards/verify_math_reward/std": 0.48008525371551514,
      "step": 947
    },
    {
      "clip_ratio/high_max": 0.0019944397208746523,
      "clip_ratio/high_mean": 0.0007265213989740005,
      "clip_ratio/low_mean": 0.0003158008330501616,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010423222338431515,
      "completions/clipped_ratio": 0.0915178571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3685.0,
      "completions/mean_length": 911.2109985351562,
      "completions/mean_terminated_length": 590.384521484375,
      "completions/min_length": 138.0,
      "completions/min_terminated_length": 138.0,
      "epoch": 8.858309037900874,
      "grad_norm": 0.22213862836360931,
      "learning_rate": 1e-06,
      "loss": -0.0397,
      "num_tokens": 530833612.0,
      "reward": 0.6674107313156128,
      "reward_std": 0.13639894127845764,
      "rewards/verify_math_reward/mean": 0.6674107313156128,
      "rewards/verify_math_reward/std": 0.47140392661094666,
      "step": 948
    },
    {
      "clip_ratio/high_max": 0.0017291305521212053,
      "clip_ratio/high_mean": 0.0006730979021085659,
      "clip_ratio/low_mean": 0.00032835083197824133,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001001448712486308,
      "completions/clipped_ratio": 0.109375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2863.0,
      "completions/mean_length": 921.8995971679688,
      "completions/mean_terminated_length": 532.0977172851562,
      "completions/min_length": 135.0,
      "completions/min_terminated_length": 135.0,
      "epoch": 8.867638483965015,
      "grad_norm": 0.21696864068508148,
      "learning_rate": 1e-06,
      "loss": -0.0542,
      "num_tokens": 531344994.0,
      "reward": 0.6328125,
      "reward_std": 0.15138868987560272,
      "rewards/verify_math_reward/mean": 0.6328125,
      "rewards/verify_math_reward/std": 0.48230743408203125,
      "step": 949
    },
    {
      "clip_ratio/high_max": 0.001585699350471259,
      "clip_ratio/high_mean": 0.0006043091252649901,
      "clip_ratio/low_mean": 0.00048170495847443817,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010860140719159972,
      "completions/clipped_ratio": 0.1316964285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2396.0,
      "completions/mean_length": 1003.1495971679688,
      "completions/mean_terminated_length": 534.053955078125,
      "completions/min_length": 137.0,
      "completions/min_terminated_length": 137.0,
      "epoch": 8.876967930029155,
      "grad_norm": 0.22879809141159058,
      "learning_rate": 1e-06,
      "loss": -0.0577,
      "num_tokens": 531850544.0,
      "reward": 0.6339285969734192,
      "reward_std": 0.13685175776481628,
      "rewards/verify_math_reward/mean": 0.6339285969734192,
      "rewards/verify_math_reward/std": 0.48199838399887085,
      "step": 950
    },
    {
      "clip_ratio/high_max": 0.002322256677871337,
      "clip_ratio/high_mean": 0.0008632607587060193,
      "clip_ratio/low_mean": 0.00039092526469630684,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012541860232886393,
      "completions/clipped_ratio": 0.109375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2684.0,
      "completions/mean_length": 930.66748046875,
      "completions/mean_terminated_length": 541.9423828125,
      "completions/min_length": 109.0,
      "completions/min_terminated_length": 109.0,
      "epoch": 8.886297376093294,
      "grad_norm": 0.3646446466445923,
      "learning_rate": 1e-06,
      "loss": -0.0454,
      "num_tokens": 532377054.0,
      "reward": 0.6149553656578064,
      "reward_std": 0.14027062058448792,
      "rewards/verify_math_reward/mean": 0.6149553656578064,
      "rewards/verify_math_reward/std": 0.4868776500225067,
      "step": 951
    },
    {
      "clip_ratio/high_max": 0.0020843351157964207,
      "clip_ratio/high_mean": 0.0008173993301170412,
      "clip_ratio/low_mean": 0.0003183621884090826,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001135761533078039,
      "completions/clipped_ratio": 0.0926339285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3512.0,
      "completions/mean_length": 880.3582763671875,
      "completions/mean_terminated_length": 552.0701293945312,
      "completions/min_length": 144.0,
      "completions/min_terminated_length": 144.0,
      "epoch": 8.895626822157434,
      "grad_norm": 0.25953951478004456,
      "learning_rate": 1e-06,
      "loss": -0.0388,
      "num_tokens": 532915935.0,
      "reward": 0.676339328289032,
      "reward_std": 0.14417551457881927,
      "rewards/verify_math_reward/mean": 0.6763392686843872,
      "rewards/verify_math_reward/std": 0.4681335687637329,
      "step": 952
    },
    {
      "clip_ratio/high_max": 0.0023956390250532422,
      "clip_ratio/high_mean": 0.0009061027431016555,
      "clip_ratio/low_mean": 0.0004230161107443564,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0013291188261064235,
      "completions/clipped_ratio": 0.1127232142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3827.0,
      "completions/mean_length": 931.810302734375,
      "completions/mean_terminated_length": 529.81884765625,
      "completions/min_length": 164.0,
      "completions/min_terminated_length": 164.0,
      "epoch": 8.904956268221575,
      "grad_norm": 0.2815774381160736,
      "learning_rate": 1e-06,
      "loss": -0.0431,
      "num_tokens": 533428317.0,
      "reward": 0.6417410969734192,
      "reward_std": 0.17223837971687317,
      "rewards/verify_math_reward/mean": 0.6417410969734192,
      "rewards/verify_math_reward/std": 0.47975659370422363,
      "step": 953
    },
    {
      "clip_ratio/high_max": 0.0023849601711845025,
      "clip_ratio/high_mean": 0.0007930019182822434,
      "clip_ratio/low_mean": 0.00044139458168501733,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001234396520885639,
      "completions/clipped_ratio": 0.0691964285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2789.0,
      "completions/mean_length": 759.6663208007812,
      "completions/mean_terminated_length": 511.6415100097656,
      "completions/min_length": 169.0,
      "completions/min_terminated_length": 169.0,
      "epoch": 8.914285714285715,
      "grad_norm": 0.9295297265052795,
      "learning_rate": 1e-06,
      "loss": -0.0395,
      "num_tokens": 533946258.0,
      "reward": 0.684151828289032,
      "reward_std": 0.12561675906181335,
      "rewards/verify_math_reward/mean": 0.6841517686843872,
      "rewards/verify_math_reward/std": 0.4651124179363251,
      "step": 954
    },
    {
      "clip_ratio/high_max": 0.002032962493103696,
      "clip_ratio/high_mean": 0.000756587736759684,
      "clip_ratio/low_mean": 0.0005395033485910972,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001296091046242509,
      "completions/clipped_ratio": 0.0904017857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3923.0,
      "completions/mean_length": 868.7332763671875,
      "completions/mean_terminated_length": 547.9865112304688,
      "completions/min_length": 132.0,
      "completions/min_terminated_length": 132.0,
      "epoch": 8.923615160349854,
      "grad_norm": 0.3900900185108185,
      "learning_rate": 1e-06,
      "loss": -0.0334,
      "num_tokens": 534476587.0,
      "reward": 0.6774553656578064,
      "reward_std": 0.15353691577911377,
      "rewards/verify_math_reward/mean": 0.6774553656578064,
      "rewards/verify_math_reward/std": 0.4677111804485321,
      "step": 955
    },
    {
      "clip_ratio/high_max": 0.001982136134756729,
      "clip_ratio/high_mean": 0.0006743329922755947,
      "clip_ratio/low_mean": 0.0004361085129858111,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011104415207228158,
      "completions/clipped_ratio": 0.1127232142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3267.0,
      "completions/mean_length": 947.0156860351562,
      "completions/mean_terminated_length": 546.9559936523438,
      "completions/min_length": 169.0,
      "completions/min_terminated_length": 169.0,
      "epoch": 8.932944606413994,
      "grad_norm": 0.2565886974334717,
      "learning_rate": 1e-06,
      "loss": -0.0439,
      "num_tokens": 535001649.0,
      "reward": 0.6261160969734192,
      "reward_std": 0.12456297874450684,
      "rewards/verify_math_reward/mean": 0.6261160969734192,
      "rewards/verify_math_reward/std": 0.48410359025001526,
      "step": 956
    },
    {
      "clip_ratio/high_max": 0.0018949311051983386,
      "clip_ratio/high_mean": 0.0006435758623410948,
      "clip_ratio/low_mean": 0.00041794130288508313,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010615171722747618,
      "completions/clipped_ratio": 0.1696428571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3170.0,
      "completions/mean_length": 1151.618408203125,
      "completions/mean_terminated_length": 550.0779418945312,
      "completions/min_length": 178.0,
      "completions/min_terminated_length": 178.0,
      "epoch": 8.942274052478133,
      "grad_norm": 0.26293376088142395,
      "learning_rate": 1e-06,
      "loss": -0.0784,
      "num_tokens": 535502587.0,
      "reward": 0.543526828289032,
      "reward_std": 0.14815960824489594,
      "rewards/verify_math_reward/mean": 0.5435267686843872,
      "rewards/verify_math_reward/std": 0.49838000535964966,
      "step": 957
    },
    {
      "clip_ratio/high_max": 0.0017872969001473393,
      "clip_ratio/high_mean": 0.0005414857291725639,
      "clip_ratio/low_mean": 0.000530541659827577,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010720273967308458,
      "completions/clipped_ratio": 0.1127232142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2626.0,
      "completions/mean_length": 986.763427734375,
      "completions/mean_terminated_length": 591.7534790039062,
      "completions/min_length": 110.0,
      "completions/min_terminated_length": 110.0,
      "epoch": 8.951603498542275,
      "grad_norm": 0.8553789258003235,
      "learning_rate": 1e-06,
      "loss": -0.0298,
      "num_tokens": 536068279.0,
      "reward": 0.6127232313156128,
      "reward_std": 0.13914379477500916,
      "rewards/verify_math_reward/mean": 0.6127232313156128,
      "rewards/verify_math_reward/std": 0.4873998463153839,
      "step": 958
    },
    {
      "clip_ratio/high_max": 0.0019308765076857526,
      "clip_ratio/high_mean": 0.0006775250167265767,
      "clip_ratio/low_mean": 0.0005201590511205723,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011976840578427073,
      "completions/clipped_ratio": 0.1428571428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3868.0,
      "completions/mean_length": 1032.8671875,
      "completions/mean_terminated_length": 522.3450927734375,
      "completions/min_length": 155.0,
      "completions/min_terminated_length": 155.0,
      "epoch": 8.960932944606414,
      "grad_norm": 0.24272368848323822,
      "learning_rate": 1e-06,
      "loss": -0.0493,
      "num_tokens": 536560648.0,
      "reward": 0.6618303656578064,
      "reward_std": 0.13568215072155,
      "rewards/verify_math_reward/mean": 0.6618303656578064,
      "rewards/verify_math_reward/std": 0.4733508229255676,
      "step": 959
    },
    {
      "clip_ratio/high_max": 0.0023686804706812836,
      "clip_ratio/high_mean": 0.0007480191306967754,
      "clip_ratio/low_mean": 0.0004042846069296502,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011523037137521897,
      "completions/clipped_ratio": 0.1294642857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3794.0,
      "completions/mean_length": 1030.0546875,
      "completions/mean_terminated_length": 574.0936279296875,
      "completions/min_length": 159.0,
      "completions/min_terminated_length": 159.0,
      "epoch": 8.970262390670554,
      "grad_norm": 0.8244277834892273,
      "learning_rate": 1e-06,
      "loss": -0.0492,
      "num_tokens": 537105161.0,
      "reward": 0.6160714626312256,
      "reward_std": 0.13760243356227875,
      "rewards/verify_math_reward/mean": 0.6160714030265808,
      "rewards/verify_math_reward/std": 0.486612468957901,
      "step": 960
    },
    {
      "clip_ratio/high_max": 0.0020416714178281836,
      "clip_ratio/high_mean": 0.0008604426293459255,
      "clip_ratio/low_mean": 0.0004435464015841717,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0013039890363870654,
      "completions/clipped_ratio": 0.0848214285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3019.0,
      "completions/mean_length": 832.083740234375,
      "completions/mean_terminated_length": 529.5744018554688,
      "completions/min_length": 130.0,
      "completions/min_terminated_length": 130.0,
      "epoch": 8.979591836734693,
      "grad_norm": 0.27510568499565125,
      "learning_rate": 1e-06,
      "loss": -0.0473,
      "num_tokens": 537627972.0,
      "reward": 0.6930803656578064,
      "reward_std": 0.18118606507778168,
      "rewards/verify_math_reward/mean": 0.6930803656578064,
      "rewards/verify_math_reward/std": 0.46147337555885315,
      "step": 961
    },
    {
      "clip_ratio/high_max": 0.0020786139648407698,
      "clip_ratio/high_mean": 0.0008178072894224897,
      "clip_ratio/low_mean": 0.0006200714051374234,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001437878621800337,
      "completions/clipped_ratio": 0.0970982142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3749.0,
      "completions/mean_length": 892.8850708007812,
      "completions/mean_terminated_length": 548.4215087890625,
      "completions/min_length": 131.0,
      "completions/min_terminated_length": 131.0,
      "epoch": 8.988921282798835,
      "grad_norm": 0.32806262373924255,
      "learning_rate": 1e-06,
      "loss": -0.0493,
      "num_tokens": 538168917.0,
      "reward": 0.6417410969734192,
      "reward_std": 0.1789316087961197,
      "rewards/verify_math_reward/mean": 0.6417410969734192,
      "rewards/verify_math_reward/std": 0.47975656390190125,
      "step": 962
    },
    {
      "clip_ratio/high_max": 0.0019751785475818906,
      "clip_ratio/high_mean": 0.0007488408955396153,
      "clip_ratio/low_mean": 0.0004993245383957401,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012481654557632282,
      "completions/clipped_ratio": 0.09943181818181823,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1380.0,
      "completions/mean_length": 922.9005737304688,
      "completions/mean_terminated_length": 572.558349609375,
      "completions/min_length": 179.0,
      "completions/min_terminated_length": 179.0,
      "epoch": 8.998250728862974,
      "grad_norm": 0.25778788328170776,
      "learning_rate": 1e-06,
      "loss": -0.0481,
      "num_tokens": 538719050.0,
      "reward": 0.5848214626312256,
      "reward_std": 0.1367429792881012,
      "rewards/verify_math_reward/mean": 0.5848214030265808,
      "rewards/verify_math_reward/std": 0.49302801489830017,
      "step": 963
    },
    {
      "clip_ratio/high_max": 0.0020093115599593148,
      "clip_ratio/high_mean": 0.0008440847905148985,
      "clip_ratio/low_mean": 0.00047851414274191484,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001322598931437824,
      "completions/clipped_ratio": 0.0915178571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3851.0,
      "completions/mean_length": 919.58935546875,
      "completions/mean_terminated_length": 599.6068725585938,
      "completions/min_length": 138.0,
      "completions/min_terminated_length": 138.0,
      "epoch": 9.00932944606414,
      "grad_norm": 0.203589528799057,
      "learning_rate": 1e-06,
      "loss": -0.0416,
      "num_tokens": 539299338.0,
      "reward": 0.6662946939468384,
      "reward_std": 0.168679341673851,
      "rewards/verify_math_reward/mean": 0.6662946343421936,
      "rewards/verify_math_reward/std": 0.47179922461509705,
      "step": 964
    },
    {
      "clip_ratio/high_max": 0.0018228169428766705,
      "clip_ratio/high_mean": 0.0006125026775407605,
      "clip_ratio/low_mean": 0.00035421171560301445,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009667143822298385,
      "completions/clipped_ratio": 0.1026785714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3509.0,
      "completions/mean_length": 863.7723388671875,
      "completions/mean_terminated_length": 493.9154052734375,
      "completions/min_length": 124.0,
      "completions/min_terminated_length": 124.0,
      "epoch": 9.018658892128279,
      "grad_norm": 0.23763151466846466,
      "learning_rate": 1e-06,
      "loss": -0.0405,
      "num_tokens": 539782430.0,
      "reward": 0.6897321939468384,
      "reward_std": 0.1356828510761261,
      "rewards/verify_math_reward/mean": 0.6897321343421936,
      "rewards/verify_math_reward/std": 0.462861567735672,
      "step": 965
    },
    {
      "clip_ratio/high_max": 0.0022057854148442857,
      "clip_ratio/high_mean": 0.0008197773859137669,
      "clip_ratio/low_mean": 0.0005298856394801987,
      "clip_ratio/low_min": 1.5424482626258396e-05,
      "clip_ratio/region_mean": 0.0013496630308509339,
      "completions/clipped_ratio": 0.0803571428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4044.0,
      "completions/mean_length": 811.2801513671875,
      "completions/mean_terminated_length": 524.2658081054688,
      "completions/min_length": 150.0,
      "completions/min_terminated_length": 150.0,
      "epoch": 9.02798833819242,
      "grad_norm": 0.2678503096103668,
      "learning_rate": 1e-06,
      "loss": -0.0523,
      "num_tokens": 540311097.0,
      "reward": 0.6640625,
      "reward_std": 0.16074052453041077,
      "rewards/verify_math_reward/mean": 0.6640625,
      "rewards/verify_math_reward/std": 0.4725809693336487,
      "step": 966
    },
    {
      "clip_ratio/high_max": 0.0021756058376922738,
      "clip_ratio/high_mean": 0.0007414086248900276,
      "clip_ratio/low_mean": 0.0005067757974757114,
      "clip_ratio/low_min": 1.4751002709090244e-05,
      "clip_ratio/region_mean": 0.0012481844096328132,
      "completions/clipped_ratio": 0.125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1880.0,
      "completions/mean_length": 967.0558471679688,
      "completions/mean_terminated_length": 520.0637817382812,
      "completions/min_length": 132.0,
      "completions/min_terminated_length": 132.0,
      "epoch": 9.03731778425656,
      "grad_norm": 2.595151424407959,
      "learning_rate": 1e-06,
      "loss": -0.0549,
      "num_tokens": 540809227.0,
      "reward": 0.6339285969734192,
      "reward_std": 0.14368806779384613,
      "rewards/verify_math_reward/mean": 0.6339285969734192,
      "rewards/verify_math_reward/std": 0.48199838399887085,
      "step": 967
    },
    {
      "clip_ratio/high_max": 0.0016082822221505921,
      "clip_ratio/high_mean": 0.0006099893516875454,
      "clip_ratio/low_mean": 0.00024320421096035716,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008531935691280523,
      "completions/clipped_ratio": 0.1116071428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3626.0,
      "completions/mean_length": 966.1942138671875,
      "completions/mean_terminated_length": 573.0025024414062,
      "completions/min_length": 152.0,
      "completions/min_terminated_length": 152.0,
      "epoch": 9.0466472303207,
      "grad_norm": 0.1958475559949875,
      "learning_rate": 1e-06,
      "loss": -0.0471,
      "num_tokens": 541371185.0,
      "reward": 0.609375,
      "reward_std": 0.12587566673755646,
      "rewards/verify_math_reward/mean": 0.609375,
      "rewards/verify_math_reward/std": 0.48816296458244324,
      "step": 968
    },
    {
      "clip_ratio/high_max": 0.0021028903356636874,
      "clip_ratio/high_mean": 0.0007086892273946432,
      "clip_ratio/low_mean": 0.0004200470975774806,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011287362885923358,
      "completions/clipped_ratio": 0.0814732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3582.0,
      "completions/mean_length": 828.036865234375,
      "completions/mean_terminated_length": 538.1689453125,
      "completions/min_length": 140.0,
      "completions/min_terminated_length": 140.0,
      "epoch": 9.055976676384839,
      "grad_norm": 0.20024479925632477,
      "learning_rate": 1e-06,
      "loss": -0.0505,
      "num_tokens": 541898738.0,
      "reward": 0.6897321939468384,
      "reward_std": 0.12092021852731705,
      "rewards/verify_math_reward/mean": 0.6897321343421936,
      "rewards/verify_math_reward/std": 0.462861567735672,
      "step": 969
    },
    {
      "clip_ratio/high_max": 0.002064900614641374,
      "clip_ratio/high_mean": 0.0007238188782139332,
      "clip_ratio/low_mean": 0.00025628001299082825,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009800989018913242,
      "completions/clipped_ratio": 0.0803571428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2521.0,
      "completions/mean_length": 778.5904541015625,
      "completions/mean_terminated_length": 488.71966552734375,
      "completions/min_length": 108.0,
      "completions/min_terminated_length": 108.0,
      "epoch": 9.06530612244898,
      "grad_norm": 0.26544249057769775,
      "learning_rate": 1e-06,
      "loss": -0.0321,
      "num_tokens": 542385971.0,
      "reward": 0.7064732313156128,
      "reward_std": 0.12343642115592957,
      "rewards/verify_math_reward/mean": 0.7064732313156128,
      "rewards/verify_math_reward/std": 0.4556320011615753,
      "step": 970
    },
    {
      "clip_ratio/high_max": 0.0017448019916628255,
      "clip_ratio/high_mean": 0.0006562436042258923,
      "clip_ratio/low_mean": 0.0005531991464522434,
      "clip_ratio/low_min": 1.3001872503082268e-05,
      "clip_ratio/region_mean": 0.0012094427802367136,
      "completions/clipped_ratio": 0.1127232142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3242.0,
      "completions/mean_length": 965.7210083007812,
      "completions/mean_terminated_length": 568.0377197265625,
      "completions/min_length": 120.0,
      "completions/min_terminated_length": 120.0,
      "epoch": 9.07463556851312,
      "grad_norm": 0.21854481101036072,
      "learning_rate": 1e-06,
      "loss": -0.0344,
      "num_tokens": 542929969.0,
      "reward": 0.621651828289032,
      "reward_std": 0.14556489884853363,
      "rewards/verify_math_reward/mean": 0.6216517686843872,
      "rewards/verify_math_reward/std": 0.485245943069458,
      "step": 971
    },
    {
      "clip_ratio/high_max": 0.0016820623168314341,
      "clip_ratio/high_mean": 0.00045042975625619874,
      "clip_ratio/low_mean": 0.0004390708999153503,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008895006576494779,
      "completions/clipped_ratio": 0.0770089285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3163.0,
      "completions/mean_length": 796.8638916015625,
      "completions/mean_terminated_length": 521.6033935546875,
      "completions/min_length": 161.0,
      "completions/min_terminated_length": 161.0,
      "epoch": 9.08396501457726,
      "grad_norm": 0.3739985525608063,
      "learning_rate": 1e-06,
      "loss": -0.0221,
      "num_tokens": 543444255.0,
      "reward": 0.6964285969734192,
      "reward_std": 0.12035498023033142,
      "rewards/verify_math_reward/mean": 0.6964285969734192,
      "rewards/verify_math_reward/std": 0.4600566029548645,
      "step": 972
    },
    {
      "clip_ratio/high_max": 0.0017770100057532545,
      "clip_ratio/high_mean": 0.0006339427545754006,
      "clip_ratio/low_mean": 0.00034976727511093486,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000983710040600272,
      "completions/clipped_ratio": 0.0792410714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2004.0,
      "completions/mean_length": 835.1116333007812,
      "completions/mean_terminated_length": 554.4776000976562,
      "completions/min_length": 149.0,
      "completions/min_terminated_length": 149.0,
      "epoch": 9.093294460641399,
      "grad_norm": 0.202556312084198,
      "learning_rate": 1e-06,
      "loss": -0.0398,
      "num_tokens": 543989723.0,
      "reward": 0.6886160969734192,
      "reward_std": 0.1393256038427353,
      "rewards/verify_math_reward/mean": 0.6886160969734192,
      "rewards/verify_math_reward/std": 0.46331802010536194,
      "step": 973
    },
    {
      "clip_ratio/high_max": 0.0018257257870573085,
      "clip_ratio/high_mean": 0.0004840030205741641,
      "clip_ratio/low_mean": 0.0004987927691217919,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009827957801462617,
      "completions/clipped_ratio": 0.1171875,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3883.0,
      "completions/mean_length": 1002.9141235351562,
      "completions/mean_terminated_length": 592.3274536132812,
      "completions/min_length": 158.0,
      "completions/min_terminated_length": 158.0,
      "epoch": 9.102623906705539,
      "grad_norm": 0.22389450669288635,
      "learning_rate": 1e-06,
      "loss": -0.0374,
      "num_tokens": 544538366.0,
      "reward": 0.5926339626312256,
      "reward_std": 0.12933549284934998,
      "rewards/verify_math_reward/mean": 0.5926339030265808,
      "rewards/verify_math_reward/std": 0.49161845445632935,
      "step": 974
    },
    {
      "clip_ratio/high_max": 0.002657514691236429,
      "clip_ratio/high_mean": 0.0009615100443625124,
      "clip_ratio/low_mean": 0.0006056770866962324,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0015671870933147147,
      "completions/clipped_ratio": 0.1517857142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3349.0,
      "completions/mean_length": 1071.8226318359375,
      "completions/mean_terminated_length": 530.6539916992188,
      "completions/min_length": 120.0,
      "completions/min_terminated_length": 120.0,
      "epoch": 9.11195335276968,
      "grad_norm": 0.4175319969654083,
      "learning_rate": 1e-06,
      "loss": -0.0895,
      "num_tokens": 545047175.0,
      "reward": 0.5837053656578064,
      "reward_std": 0.16506798565387726,
      "rewards/verify_math_reward/mean": 0.5837053656578064,
      "rewards/verify_math_reward/std": 0.49321895837783813,
      "step": 975
    },
    {
      "clip_ratio/high_max": 0.0018570687279861886,
      "clip_ratio/high_mean": 0.0006237320376385469,
      "clip_ratio/low_mean": 0.0005383524844546628,
      "clip_ratio/low_min": 1.752172647684347e-05,
      "clip_ratio/region_mean": 0.0011620845216384623,
      "completions/clipped_ratio": 0.1305803571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2910.0,
      "completions/mean_length": 983.0223388671875,
      "completions/mean_terminated_length": 515.4762573242188,
      "completions/min_length": 163.0,
      "completions/min_terminated_length": 163.0,
      "epoch": 9.12128279883382,
      "grad_norm": 0.2701054811477661,
      "learning_rate": 1e-06,
      "loss": -0.0562,
      "num_tokens": 545540907.0,
      "reward": 0.6116071939468384,
      "reward_std": 0.1429734081029892,
      "rewards/verify_math_reward/mean": 0.6116071343421936,
      "rewards/verify_math_reward/std": 0.48765692114830017,
      "step": 976
    },
    {
      "clip_ratio/high_max": 0.00210203471215209,
      "clip_ratio/high_mean": 0.0008136369706335245,
      "clip_ratio/low_mean": 0.00039900924821267836,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012126462279411498,
      "completions/clipped_ratio": 0.1194196428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3797.0,
      "completions/mean_length": 1006.4230346679688,
      "completions/mean_terminated_length": 587.430908203125,
      "completions/min_length": 160.0,
      "completions/min_terminated_length": 160.0,
      "epoch": 9.130612244897959,
      "grad_norm": 0.2305305004119873,
      "learning_rate": 1e-06,
      "loss": -0.0651,
      "num_tokens": 546097342.0,
      "reward": 0.629464328289032,
      "reward_std": 0.16825930774211884,
      "rewards/verify_math_reward/mean": 0.6294642686843872,
      "rewards/verify_math_reward/std": 0.4832179844379425,
      "step": 977
    },
    {
      "clip_ratio/high_max": 0.002026208430834231,
      "clip_ratio/high_mean": 0.0006495902581491464,
      "clip_ratio/low_mean": 0.00031128956925385864,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009608798245608341,
      "completions/clipped_ratio": 0.140625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3378.0,
      "completions/mean_length": 1021.5558471679688,
      "completions/mean_terminated_length": 518.4649047851562,
      "completions/min_length": 129.0,
      "completions/min_terminated_length": 129.0,
      "epoch": 9.139941690962099,
      "grad_norm": 0.2514380216598511,
      "learning_rate": 1e-06,
      "loss": -0.0529,
      "num_tokens": 546585328.0,
      "reward": 0.6383928656578064,
      "reward_std": 0.12674400210380554,
      "rewards/verify_math_reward/mean": 0.6383928656578064,
      "rewards/verify_math_reward/std": 0.4807341992855072,
      "step": 978
    },
    {
      "clip_ratio/high_max": 0.0018954524566652253,
      "clip_ratio/high_mean": 0.0006922929460415617,
      "clip_ratio/low_mean": 0.00033622576484049205,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010285187290719477,
      "completions/clipped_ratio": 0.1037946428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2062.0,
      "completions/mean_length": 907.7489013671875,
      "completions/mean_terminated_length": 538.4993896484375,
      "completions/min_length": 161.0,
      "completions/min_terminated_length": 161.0,
      "epoch": 9.14927113702624,
      "grad_norm": 0.21578700840473175,
      "learning_rate": 1e-06,
      "loss": -0.049,
      "num_tokens": 547112535.0,
      "reward": 0.6573660969734192,
      "reward_std": 0.13467325270175934,
      "rewards/verify_math_reward/mean": 0.6573660969734192,
      "rewards/verify_math_reward/std": 0.47485533356666565,
      "step": 979
    },
    {
      "clip_ratio/high_max": 0.0021745014855696354,
      "clip_ratio/high_mean": 0.0008442952084806166,
      "clip_ratio/low_mean": 0.0003191076943949156,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011634029251581524,
      "completions/clipped_ratio": 0.1462053571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3657.0,
      "completions/mean_length": 1070.578125,
      "completions/mean_terminated_length": 552.4993896484375,
      "completions/min_length": 152.0,
      "completions/min_terminated_length": 152.0,
      "epoch": 9.15860058309038,
      "grad_norm": 0.23006924986839294,
      "learning_rate": 1e-06,
      "loss": -0.0701,
      "num_tokens": 547626397.0,
      "reward": 0.637276828289032,
      "reward_std": 0.16262802481651306,
      "rewards/verify_math_reward/mean": 0.6372767686843872,
      "rewards/verify_math_reward/std": 0.481054425239563,
      "step": 980
    },
    {
      "clip_ratio/high_max": 0.0018160391045967117,
      "clip_ratio/high_mean": 0.0006584514458154445,
      "clip_ratio/low_mean": 0.00042727838717837585,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001085729827536852,
      "completions/clipped_ratio": 0.0792410714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2652.0,
      "completions/mean_length": 803.1886596679688,
      "completions/mean_terminated_length": 519.8072509765625,
      "completions/min_length": 157.0,
      "completions/min_terminated_length": 157.0,
      "epoch": 9.167930029154519,
      "grad_norm": 0.2168523222208023,
      "learning_rate": 1e-06,
      "loss": -0.0394,
      "num_tokens": 548152662.0,
      "reward": 0.6439732313156128,
      "reward_std": 0.1546546071767807,
      "rewards/verify_math_reward/mean": 0.6439732313156128,
      "rewards/verify_math_reward/std": 0.47909069061279297,
      "step": 981
    },
    {
      "clip_ratio/high_max": 0.0017473156367486808,
      "clip_ratio/high_mean": 0.0006193101580720395,
      "clip_ratio/low_mean": 0.0003872137795042363,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010065239112009294,
      "completions/clipped_ratio": 0.0915178571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2810.0,
      "completions/mean_length": 834.4955444335938,
      "completions/mean_terminated_length": 505.9410095214844,
      "completions/min_length": 132.0,
      "completions/min_terminated_length": 132.0,
      "epoch": 9.177259475218658,
      "grad_norm": 0.6085642576217651,
      "learning_rate": 1e-06,
      "loss": -0.0426,
      "num_tokens": 548643954.0,
      "reward": 0.6272321939468384,
      "reward_std": 0.14199379086494446,
      "rewards/verify_math_reward/mean": 0.6272321343421936,
      "rewards/verify_math_reward/std": 0.4838111698627472,
      "step": 982
    },
    {
      "clip_ratio/high_max": 0.0018346663418924436,
      "clip_ratio/high_mean": 0.0006980431280680932,
      "clip_ratio/low_mean": 0.00047877328279355424,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011768164113163948,
      "completions/clipped_ratio": 0.0970982142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3568.0,
      "completions/mean_length": 862.716552734375,
      "completions/mean_terminated_length": 515.0086669921875,
      "completions/min_length": 137.0,
      "completions/min_terminated_length": 137.0,
      "epoch": 9.186588921282798,
      "grad_norm": 0.2697436213493347,
      "learning_rate": 1e-06,
      "loss": -0.0712,
      "num_tokens": 549145948.0,
      "reward": 0.7109375596046448,
      "reward_std": 0.13947898149490356,
      "rewards/verify_math_reward/mean": 0.7109375,
      "rewards/verify_math_reward/std": 0.45358020067214966,
      "step": 983
    },
    {
      "clip_ratio/high_max": 0.0017626728586037643,
      "clip_ratio/high_mean": 0.00068703516444657,
      "clip_ratio/low_mean": 0.00044360163337842096,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011306368105579168,
      "completions/clipped_ratio": 0.1004464285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3812.0,
      "completions/mean_length": 919.1451416015625,
      "completions/mean_terminated_length": 564.409423828125,
      "completions/min_length": 118.0,
      "completions/min_terminated_length": 118.0,
      "epoch": 9.19591836734694,
      "grad_norm": 0.2808445692062378,
      "learning_rate": 1e-06,
      "loss": -0.0214,
      "num_tokens": 549690614.0,
      "reward": 0.6328125,
      "reward_std": 0.1382436603307724,
      "rewards/verify_math_reward/mean": 0.6328125,
      "rewards/verify_math_reward/std": 0.48230743408203125,
      "step": 984
    },
    {
      "clip_ratio/high_max": 0.0019384068218641914,
      "clip_ratio/high_mean": 0.0005926047469984042,
      "clip_ratio/low_mean": 0.0002483020130057412,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000840906759549398,
      "completions/clipped_ratio": 0.0747767857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2166.0,
      "completions/mean_length": 776.4230346679688,
      "completions/mean_terminated_length": 508.1339111328125,
      "completions/min_length": 114.0,
      "completions/min_terminated_length": 114.0,
      "epoch": 9.205247813411079,
      "grad_norm": 0.18402858078479767,
      "learning_rate": 1e-06,
      "loss": -0.0342,
      "num_tokens": 550183529.0,
      "reward": 0.7243303656578064,
      "reward_std": 0.10742456465959549,
      "rewards/verify_math_reward/mean": 0.7243303656578064,
      "rewards/verify_math_reward/std": 0.4471006691455841,
      "step": 985
    },
    {
      "clip_ratio/high_max": 0.001789521276805317,
      "clip_ratio/high_mean": 0.0006327138380584074,
      "clip_ratio/low_mean": 0.00034425806279614335,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000976971881755162,
      "completions/clipped_ratio": 0.0814732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3967.0,
      "completions/mean_length": 803.5491333007812,
      "completions/mean_terminated_length": 511.5091247558594,
      "completions/min_length": 128.0,
      "completions/min_terminated_length": 128.0,
      "epoch": 9.214577259475218,
      "grad_norm": 0.829149603843689,
      "learning_rate": 1e-06,
      "loss": -0.0495,
      "num_tokens": 550690085.0,
      "reward": 0.7243303656578064,
      "reward_std": 0.11404222995042801,
      "rewards/verify_math_reward/mean": 0.7243303656578064,
      "rewards/verify_math_reward/std": 0.4471006691455841,
      "step": 986
    },
    {
      "clip_ratio/high_max": 0.001902834716020152,
      "clip_ratio/high_mean": 0.0006656191744696116,
      "clip_ratio/low_mean": 0.00035124982059642207,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010168690223508747,
      "completions/clipped_ratio": 0.1372767857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3194.0,
      "completions/mean_length": 1050.0960693359375,
      "completions/mean_terminated_length": 565.4307861328125,
      "completions/min_length": 152.0,
      "completions/min_terminated_length": 152.0,
      "epoch": 9.223906705539358,
      "grad_norm": 0.2216397374868393,
      "learning_rate": 1e-06,
      "loss": -0.0686,
      "num_tokens": 551226619.0,
      "reward": 0.5926339626312256,
      "reward_std": 0.13466298580169678,
      "rewards/verify_math_reward/mean": 0.5926339030265808,
      "rewards/verify_math_reward/std": 0.49161848425865173,
      "step": 987
    },
    {
      "clip_ratio/high_max": 0.0016880720722838305,
      "clip_ratio/high_mean": 0.0006492141983471811,
      "clip_ratio/low_mean": 0.000368816930858884,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010180311364820227,
      "completions/clipped_ratio": 0.0993303571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3736.0,
      "completions/mean_length": 912.03466796875,
      "completions/mean_terminated_length": 560.8909912109375,
      "completions/min_length": 138.0,
      "completions/min_terminated_length": 138.0,
      "epoch": 9.2332361516035,
      "grad_norm": 0.2358027994632721,
      "learning_rate": 1e-06,
      "loss": -0.0459,
      "num_tokens": 551779138.0,
      "reward": 0.6361607313156128,
      "reward_std": 0.14774592220783234,
      "rewards/verify_math_reward/mean": 0.6361607313156128,
      "rewards/verify_math_reward/std": 0.4813718795776367,
      "step": 988
    },
    {
      "clip_ratio/high_max": 0.0021234851628832985,
      "clip_ratio/high_mean": 0.0007095335295161931,
      "clip_ratio/low_mean": 0.0005461098162413691,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012556433612189721,
      "completions/clipped_ratio": 0.1194196428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2474.0,
      "completions/mean_length": 950.5145263671875,
      "completions/mean_terminated_length": 523.9404296875,
      "completions/min_length": 172.0,
      "completions/min_terminated_length": 172.0,
      "epoch": 9.242565597667639,
      "grad_norm": 0.27929025888442993,
      "learning_rate": 1e-06,
      "loss": -0.0596,
      "num_tokens": 552276159.0,
      "reward": 0.668526828289032,
      "reward_std": 0.1533079892396927,
      "rewards/verify_math_reward/mean": 0.6685267686843872,
      "rewards/verify_math_reward/std": 0.4710056483745575,
      "step": 989
    },
    {
      "clip_ratio/high_max": 0.002024388057179749,
      "clip_ratio/high_mean": 0.0007182806275523035,
      "clip_ratio/low_mean": 0.00048276128927682294,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012010418940917589,
      "completions/clipped_ratio": 0.1127232142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2961.0,
      "completions/mean_length": 915.060302734375,
      "completions/mean_terminated_length": 510.9408874511719,
      "completions/min_length": 129.0,
      "completions/min_terminated_length": 129.0,
      "epoch": 9.251895043731778,
      "grad_norm": 0.2631526589393616,
      "learning_rate": 1e-06,
      "loss": -0.0418,
      "num_tokens": 552779741.0,
      "reward": 0.6339285969734192,
      "reward_std": 0.14143106341362,
      "rewards/verify_math_reward/mean": 0.6339285969734192,
      "rewards/verify_math_reward/std": 0.48199835419654846,
      "step": 990
    },
    {
      "clip_ratio/high_max": 0.0014596193941542879,
      "clip_ratio/high_mean": 0.0005140056018717587,
      "clip_ratio/low_mean": 0.0005408700999396387,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010548756927164504,
      "completions/clipped_ratio": 0.0881696428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2163.0,
      "completions/mean_length": 839.9085083007812,
      "completions/mean_terminated_length": 525.0599975585938,
      "completions/min_length": 145.0,
      "completions/min_terminated_length": 145.0,
      "epoch": 9.261224489795918,
      "grad_norm": 0.242902934551239,
      "learning_rate": 1e-06,
      "loss": -0.0458,
      "num_tokens": 553310843.0,
      "reward": 0.6328125,
      "reward_std": 0.12192870676517487,
      "rewards/verify_math_reward/mean": 0.6328125,
      "rewards/verify_math_reward/std": 0.48230743408203125,
      "step": 991
    },
    {
      "clip_ratio/high_max": 0.0018979092492372729,
      "clip_ratio/high_mean": 0.0007676648638152983,
      "clip_ratio/low_mean": 0.000459608839264547,
      "clip_ratio/low_min": 1.3619524906971492e-05,
      "clip_ratio/region_mean": 0.0012272737149032764,
      "completions/clipped_ratio": 0.1138392857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3209.0,
      "completions/mean_length": 953.8594360351562,
      "completions/mean_terminated_length": 550.2090454101562,
      "completions/min_length": 164.0,
      "completions/min_terminated_length": 164.0,
      "epoch": 9.270553935860057,
      "grad_norm": 0.2421979159116745,
      "learning_rate": 1e-06,
      "loss": -0.0724,
      "num_tokens": 553834045.0,
      "reward": 0.6104910969734192,
      "reward_std": 0.15924307703971863,
      "rewards/verify_math_reward/mean": 0.6104910969734192,
      "rewards/verify_math_reward/std": 0.48791125416755676,
      "step": 992
    },
    {
      "clip_ratio/high_max": 0.0024087222845992073,
      "clip_ratio/high_mean": 0.0008905131344363326,
      "clip_ratio/low_mean": 0.0002782431856758194,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011687562946463004,
      "completions/clipped_ratio": 0.0904017857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3689.0,
      "completions/mean_length": 875.357177734375,
      "completions/mean_terminated_length": 555.2687377929688,
      "completions/min_length": 133.0,
      "completions/min_terminated_length": 133.0,
      "epoch": 9.279883381924199,
      "grad_norm": 31.97504997253418,
      "learning_rate": 1e-06,
      "loss": -0.0541,
      "num_tokens": 554385797.0,
      "reward": 0.6975446939468384,
      "reward_std": 0.146658256649971,
      "rewards/verify_math_reward/mean": 0.6975446343421936,
      "rewards/verify_math_reward/std": 0.45957788825035095,
      "step": 993
    },
    {
      "clip_ratio/high_max": 0.0022565197723452,
      "clip_ratio/high_mean": 0.0007862259280955186,
      "clip_ratio/low_mean": 0.0004552455166049185,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012414714219630696,
      "completions/clipped_ratio": 0.1305803571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3297.0,
      "completions/mean_length": 1000.0435791015625,
      "completions/mean_terminated_length": 535.053955078125,
      "completions/min_length": 149.0,
      "completions/min_terminated_length": 149.0,
      "epoch": 9.289212827988338,
      "grad_norm": 0.26120316982269287,
      "learning_rate": 1e-06,
      "loss": -0.0539,
      "num_tokens": 554885780.0,
      "reward": 0.6383928656578064,
      "reward_std": 0.13906781375408173,
      "rewards/verify_math_reward/mean": 0.6383928656578064,
      "rewards/verify_math_reward/std": 0.4807341992855072,
      "step": 994
    },
    {
      "clip_ratio/high_max": 0.0017992375142057426,
      "clip_ratio/high_mean": 0.0006830044858361362,
      "clip_ratio/low_mean": 0.00041874111070683284,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011017456163244788,
      "completions/clipped_ratio": 0.0837053571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3475.0,
      "completions/mean_length": 850.0870971679688,
      "completions/mean_terminated_length": 553.56640625,
      "completions/min_length": 133.0,
      "completions/min_terminated_length": 133.0,
      "epoch": 9.298542274052478,
      "grad_norm": 0.23041197657585144,
      "learning_rate": 1e-06,
      "loss": -0.0442,
      "num_tokens": 555432802.0,
      "reward": 0.6595982313156128,
      "reward_std": 0.14462240040302277,
      "rewards/verify_math_reward/mean": 0.6595982313156128,
      "rewards/verify_math_reward/std": 0.4741089344024658,
      "step": 995
    },
    {
      "clip_ratio/high_max": 0.0021802876217407174,
      "clip_ratio/high_mean": 0.0007658100566914072,
      "clip_ratio/low_mean": 0.0003805052924690244,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001146315331425285,
      "completions/clipped_ratio": 0.1171875,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3104.0,
      "completions/mean_length": 943.3114013671875,
      "completions/mean_terminated_length": 524.8129272460938,
      "completions/min_length": 139.0,
      "completions/min_terminated_length": 139.0,
      "epoch": 9.307871720116617,
      "grad_norm": 0.2721904218196869,
      "learning_rate": 1e-06,
      "loss": -0.0518,
      "num_tokens": 555940369.0,
      "reward": 0.660714328289032,
      "reward_std": 0.14121240377426147,
      "rewards/verify_math_reward/mean": 0.6607142686843872,
      "rewards/verify_math_reward/std": 0.4737313687801361,
      "step": 996
    },
    {
      "clip_ratio/high_max": 0.0017383672566211317,
      "clip_ratio/high_mean": 0.0005729928307118826,
      "clip_ratio/low_mean": 0.0005022300210839603,
      "clip_ratio/low_min": 1.7071837646653876e-05,
      "clip_ratio/region_mean": 0.001075222869985737,
      "completions/clipped_ratio": 0.0881696428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2191.0,
      "completions/mean_length": 825.552490234375,
      "completions/mean_terminated_length": 509.3157958984375,
      "completions/min_length": 125.0,
      "completions/min_terminated_length": 125.0,
      "epoch": 9.317201166180759,
      "grad_norm": 0.23203667998313904,
      "learning_rate": 1e-06,
      "loss": -0.0468,
      "num_tokens": 556443808.0,
      "reward": 0.6796875596046448,
      "reward_std": 0.1382322758436203,
      "rewards/verify_math_reward/mean": 0.6796875,
      "rewards/verify_math_reward/std": 0.4668572247028351,
      "step": 997
    },
    {
      "clip_ratio/high_max": 0.0016585760640737135,
      "clip_ratio/high_mean": 0.0005702862890757388,
      "clip_ratio/low_mean": 0.0004152120200160425,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009854983291006647,
      "completions/clipped_ratio": 0.0959821428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3359.0,
      "completions/mean_length": 869.4163208007812,
      "completions/mean_terminated_length": 526.8407592773438,
      "completions/min_length": 92.0,
      "completions/min_terminated_length": 92.0,
      "epoch": 9.326530612244898,
      "grad_norm": 0.23990260064601898,
      "learning_rate": 1e-06,
      "loss": -0.025,
      "num_tokens": 556948261.0,
      "reward": 0.6618303656578064,
      "reward_std": 0.134141206741333,
      "rewards/verify_math_reward/mean": 0.6618303656578064,
      "rewards/verify_math_reward/std": 0.4733508229255676,
      "step": 998
    },
    {
      "clip_ratio/high_max": 0.001819459313992411,
      "clip_ratio/high_mean": 0.0006607770028494997,
      "clip_ratio/low_mean": 0.0005408785964391427,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012016556102025788,
      "completions/clipped_ratio": 0.1026785714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2962.0,
      "completions/mean_length": 830.966552734375,
      "completions/mean_terminated_length": 457.355712890625,
      "completions/min_length": 107.0,
      "completions/min_terminated_length": 107.0,
      "epoch": 9.335860058309038,
      "grad_norm": 0.26270946860313416,
      "learning_rate": 1e-06,
      "loss": -0.0576,
      "num_tokens": 557407351.0,
      "reward": 0.6808035969734192,
      "reward_std": 0.1371905654668808,
      "rewards/verify_math_reward/mean": 0.6808035969734192,
      "rewards/verify_math_reward/std": 0.46642565727233887,
      "step": 999
    },
    {
      "clip_ratio/high_max": 0.0014994931734690908,
      "clip_ratio/high_mean": 0.0004761727250297554,
      "clip_ratio/low_mean": 0.00027690992033058137,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007530826533184154,
      "completions/clipped_ratio": 0.0904017857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3933.0,
      "completions/mean_length": 851.708740234375,
      "completions/mean_terminated_length": 529.2699584960938,
      "completions/min_length": 176.0,
      "completions/min_terminated_length": 176.0,
      "epoch": 9.345189504373177,
      "grad_norm": 0.19529157876968384,
      "learning_rate": 1e-06,
      "loss": -0.0333,
      "num_tokens": 557933426.0,
      "reward": 0.6674107313156128,
      "reward_std": 0.10701198875904083,
      "rewards/verify_math_reward/mean": 0.6674107313156128,
      "rewards/verify_math_reward/std": 0.47140392661094666,
      "step": 1000
    },
    {
      "clip_ratio/high_max": 0.0014798677548242267,
      "clip_ratio/high_mean": 0.0005599712112598354,
      "clip_ratio/low_mean": 0.00042298395828765933,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009829551727307262,
      "completions/clipped_ratio": 0.1495535714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3759.0,
      "completions/mean_length": 1040.9967041015625,
      "completions/mean_terminated_length": 503.76507568359375,
      "completions/min_length": 159.0,
      "completions/min_terminated_length": 159.0,
      "epoch": 9.354518950437317,
      "grad_norm": 0.24427780508995056,
      "learning_rate": 1e-06,
      "loss": -0.05,
      "num_tokens": 558411143.0,
      "reward": 0.6316964626312256,
      "reward_std": 0.1380895972251892,
      "rewards/verify_math_reward/mean": 0.6316964030265808,
      "rewards/verify_math_reward/std": 0.4826137125492096,
      "step": 1001
    },
    {
      "clip_ratio/high_max": 0.0016418406812590547,
      "clip_ratio/high_mean": 0.0005652182262565475,
      "clip_ratio/low_mean": 0.0003534897705321782,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009187079886032734,
      "completions/clipped_ratio": 0.1194196428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4049.0,
      "completions/mean_length": 938.0848388671875,
      "completions/mean_terminated_length": 509.8251037597656,
      "completions/min_length": 118.0,
      "completions/min_terminated_length": 118.0,
      "epoch": 9.363848396501458,
      "grad_norm": 0.2545948624610901,
      "learning_rate": 1e-06,
      "loss": -0.0307,
      "num_tokens": 558896099.0,
      "reward": 0.6473214626312256,
      "reward_std": 0.11565801501274109,
      "rewards/verify_math_reward/mean": 0.6473214030265808,
      "rewards/verify_math_reward/std": 0.47807058691978455,
      "step": 1002
    },
    {
      "clip_ratio/high_max": 0.0018756211575237103,
      "clip_ratio/high_mean": 0.0006141574685898377,
      "clip_ratio/low_mean": 0.00042348662009317195,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010376441168773454,
      "completions/clipped_ratio": 0.1127232142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2327.0,
      "completions/mean_length": 935.9129638671875,
      "completions/mean_terminated_length": 534.4427490234375,
      "completions/min_length": 127.0,
      "completions/min_terminated_length": 127.0,
      "epoch": 9.373177842565598,
      "grad_norm": 0.20861415565013885,
      "learning_rate": 1e-06,
      "loss": -0.0588,
      "num_tokens": 559408669.0,
      "reward": 0.6417410969734192,
      "reward_std": 0.1255386769771576,
      "rewards/verify_math_reward/mean": 0.6417410969734192,
      "rewards/verify_math_reward/std": 0.47975656390190125,
      "step": 1003
    },
    {
      "clip_ratio/high_max": 0.0017241709101654124,
      "clip_ratio/high_mean": 0.0007156978317652829,
      "clip_ratio/low_mean": 0.00039253456225196715,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011082323671871563,
      "completions/clipped_ratio": 0.125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3613.0,
      "completions/mean_length": 983.443115234375,
      "completions/mean_terminated_length": 538.7920532226562,
      "completions/min_length": 166.0,
      "completions/min_terminated_length": 166.0,
      "epoch": 9.382507288629737,
      "grad_norm": 0.2265206128358841,
      "learning_rate": 1e-06,
      "loss": -0.0427,
      "num_tokens": 559922538.0,
      "reward": 0.6484375,
      "reward_std": 0.1542109102010727,
      "rewards/verify_math_reward/mean": 0.6484375,
      "rewards/verify_math_reward/std": 0.4777248501777649,
      "step": 1004
    },
    {
      "clip_ratio/high_max": 0.0017527268464618828,
      "clip_ratio/high_mean": 0.0005118758144817548,
      "clip_ratio/low_mean": 0.0004175999210929149,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009294757219322491,
      "completions/clipped_ratio": 0.1138392857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3894.0,
      "completions/mean_length": 953.44873046875,
      "completions/mean_terminated_length": 549.7455444335938,
      "completions/min_length": 154.0,
      "completions/min_terminated_length": 154.0,
      "epoch": 9.391836734693877,
      "grad_norm": 0.30438947677612305,
      "learning_rate": 1e-06,
      "loss": -0.031,
      "num_tokens": 560444068.0,
      "reward": 0.6517857313156128,
      "reward_std": 0.110169418156147,
      "rewards/verify_math_reward/mean": 0.6517857313156128,
      "rewards/verify_math_reward/std": 0.47667041420936584,
      "step": 1005
    },
    {
      "clip_ratio/high_max": 0.0017773605286492966,
      "clip_ratio/high_mean": 0.0005669130550813861,
      "clip_ratio/low_mean": 0.00047973556957003893,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010466486328368774,
      "completions/clipped_ratio": 0.1071428571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4054.0,
      "completions/mean_length": 947.6473388671875,
      "completions/mean_terminated_length": 569.844970703125,
      "completions/min_length": 142.0,
      "completions/min_terminated_length": 142.0,
      "epoch": 9.401166180758018,
      "grad_norm": 0.2808428108692169,
      "learning_rate": 1e-06,
      "loss": -0.0394,
      "num_tokens": 560988552.0,
      "reward": 0.6283482313156128,
      "reward_std": 0.14373114705085754,
      "rewards/verify_math_reward/mean": 0.6283482313156128,
      "rewards/verify_math_reward/std": 0.4835159480571747,
      "step": 1006
    },
    {
      "clip_ratio/high_max": 0.0017154052256955765,
      "clip_ratio/high_mean": 0.0005592965089817881,
      "clip_ratio/low_mean": 0.00033373002133885166,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008930265339586185,
      "completions/clipped_ratio": 0.1372767857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3733.0,
      "completions/mean_length": 1008.8761596679688,
      "completions/mean_terminated_length": 517.6520385742188,
      "completions/min_length": 160.0,
      "completions/min_terminated_length": 160.0,
      "epoch": 9.410495626822158,
      "grad_norm": 0.28805091977119446,
      "learning_rate": 1e-06,
      "loss": -0.0717,
      "num_tokens": 561477145.0,
      "reward": 0.6227678656578064,
      "reward_std": 0.11967099457979202,
      "rewards/verify_math_reward/mean": 0.6227678656578064,
      "rewards/verify_math_reward/std": 0.4849644899368286,
      "step": 1007
    },
    {
      "clip_ratio/high_max": 0.002020704618189484,
      "clip_ratio/high_mean": 0.0008039077965804609,
      "clip_ratio/low_mean": 0.00048153305897358223,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012854408487328328,
      "completions/clipped_ratio": 0.1227678571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3521.0,
      "completions/mean_length": 982.1920166015625,
      "completions/mean_terminated_length": 546.4172973632812,
      "completions/min_length": 131.0,
      "completions/min_terminated_length": 131.0,
      "epoch": 9.419825072886297,
      "grad_norm": 0.31485286355018616,
      "learning_rate": 1e-06,
      "loss": -0.0505,
      "num_tokens": 561995213.0,
      "reward": 0.637276828289032,
      "reward_std": 0.1576700508594513,
      "rewards/verify_math_reward/mean": 0.6372767686843872,
      "rewards/verify_math_reward/std": 0.481054425239563,
      "step": 1008
    },
    {
      "clip_ratio/high_max": 0.0016207741209655069,
      "clip_ratio/high_mean": 0.0005247285771474708,
      "clip_ratio/low_mean": 0.0003002406610903563,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008249692455137847,
      "completions/clipped_ratio": 0.1171875,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3270.0,
      "completions/mean_length": 974.9063110351562,
      "completions/mean_terminated_length": 560.601806640625,
      "completions/min_length": 156.0,
      "completions/min_terminated_length": 156.0,
      "epoch": 9.429154518950437,
      "grad_norm": 0.6327840089797974,
      "learning_rate": 1e-06,
      "loss": -0.0643,
      "num_tokens": 562526481.0,
      "reward": 0.6316964626312256,
      "reward_std": 0.11794712394475937,
      "rewards/verify_math_reward/mean": 0.6316964030265808,
      "rewards/verify_math_reward/std": 0.4826137125492096,
      "step": 1009
    },
    {
      "clip_ratio/high_max": 0.002112652495270595,
      "clip_ratio/high_mean": 0.0007039539596007671,
      "clip_ratio/low_mean": 0.00043457348101583193,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011385274337953888,
      "completions/clipped_ratio": 0.1149553571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3177.0,
      "completions/mean_length": 907.7131958007812,
      "completions/mean_terminated_length": 493.59771728515625,
      "completions/min_length": 145.0,
      "completions/min_terminated_length": 145.0,
      "epoch": 9.438483965014576,
      "grad_norm": 0.25927993655204773,
      "learning_rate": 1e-06,
      "loss": -0.0439,
      "num_tokens": 563015184.0,
      "reward": 0.6495535969734192,
      "reward_std": 0.13511762022972107,
      "rewards/verify_math_reward/mean": 0.6495535969734192,
      "rewards/verify_math_reward/std": 0.477376252412796,
      "step": 1010
    },
    {
      "clip_ratio/high_max": 0.0019028616297873668,
      "clip_ratio/high_mean": 0.0007404392126773018,
      "clip_ratio/low_mean": 0.0003135628901418386,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010540021012275247,
      "completions/clipped_ratio": 0.1127232142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2398.0,
      "completions/mean_length": 920.3750610351562,
      "completions/mean_terminated_length": 516.9307861328125,
      "completions/min_length": 156.0,
      "completions/min_terminated_length": 156.0,
      "epoch": 9.447813411078718,
      "grad_norm": 0.36331212520599365,
      "learning_rate": 1e-06,
      "loss": -0.0485,
      "num_tokens": 563516632.0,
      "reward": 0.7120535969734192,
      "reward_std": 0.1408672332763672,
      "rewards/verify_math_reward/mean": 0.7120535969734192,
      "rewards/verify_math_reward/std": 0.4530589282512665,
      "step": 1011
    },
    {
      "clip_ratio/high_max": 0.0024156742329068948,
      "clip_ratio/high_mean": 0.0006409119605450542,
      "clip_ratio/low_mean": 0.00046294131243485026,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001103853246604558,
      "completions/clipped_ratio": 0.1227678571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3469.0,
      "completions/mean_length": 935.9553833007812,
      "completions/mean_terminated_length": 493.7099304199219,
      "completions/min_length": 131.0,
      "completions/min_terminated_length": 131.0,
      "epoch": 9.457142857142857,
      "grad_norm": 0.21210302412509918,
      "learning_rate": 1e-06,
      "loss": -0.0552,
      "num_tokens": 563994424.0,
      "reward": 0.6875000596046448,
      "reward_std": 0.11306330561637878,
      "rewards/verify_math_reward/mean": 0.6875,
      "rewards/verify_math_reward/std": 0.4637712836265564,
      "step": 1012
    },
    {
      "clip_ratio/high_max": 0.0017442624230170622,
      "clip_ratio/high_mean": 0.0006475191639765399,
      "clip_ratio/low_mean": 0.0003780283554988273,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010255475135636516,
      "completions/clipped_ratio": 0.0915178571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3601.0,
      "completions/mean_length": 867.5848388671875,
      "completions/mean_terminated_length": 542.3636474609375,
      "completions/min_length": 140.0,
      "completions/min_terminated_length": 140.0,
      "epoch": 9.466472303206997,
      "grad_norm": 0.23791077733039856,
      "learning_rate": 1e-06,
      "loss": -0.0416,
      "num_tokens": 564542748.0,
      "reward": 0.6026785969734192,
      "reward_std": 0.1381983608007431,
      "rewards/verify_math_reward/mean": 0.6026785969734192,
      "rewards/verify_math_reward/std": 0.48961687088012695,
      "step": 1013
    },
    {
      "clip_ratio/high_max": 0.0013634544957312755,
      "clip_ratio/high_mean": 0.0004106922224309528,
      "clip_ratio/low_mean": 0.0003951218922111366,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008058141083893133,
      "completions/clipped_ratio": 0.1082589285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4086.0,
      "completions/mean_length": 962.7332763671875,
      "completions/mean_terminated_length": 582.3491821289062,
      "completions/min_length": 156.0,
      "completions/min_terminated_length": 156.0,
      "epoch": 9.475801749271136,
      "grad_norm": 0.2896384596824646,
      "learning_rate": 1e-06,
      "loss": -0.0506,
      "num_tokens": 565091525.0,
      "reward": 0.637276828289032,
      "reward_std": 0.1061122789978981,
      "rewards/verify_math_reward/mean": 0.6372767686843872,
      "rewards/verify_math_reward/std": 0.481054425239563,
      "step": 1014
    },
    {
      "clip_ratio/high_max": 0.0016935610274231294,
      "clip_ratio/high_mean": 0.0006141749763628468,
      "clip_ratio/low_mean": 0.0005587855757767102,
      "clip_ratio/low_min": 2.942099490610417e-05,
      "clip_ratio/region_mean": 0.0011729605448635994,
      "completions/clipped_ratio": 0.1540178571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3707.0,
      "completions/mean_length": 1130.489990234375,
      "completions/mean_terminated_length": 590.5950317382812,
      "completions/min_length": 112.0,
      "completions/min_terminated_length": 112.0,
      "epoch": 9.485131195335278,
      "grad_norm": 0.22907555103302002,
      "learning_rate": 1e-06,
      "loss": -0.0671,
      "num_tokens": 565629284.0,
      "reward": 0.5267857313156128,
      "reward_std": 0.16352775692939758,
      "rewards/verify_math_reward/mean": 0.5267857313156128,
      "rewards/verify_math_reward/std": 0.4995608627796173,
      "step": 1015
    },
    {
      "clip_ratio/high_max": 0.001559922326123342,
      "clip_ratio/high_mean": 0.0005506666593646514,
      "clip_ratio/low_mean": 0.0003646592513177893,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009153259288723348,
      "completions/clipped_ratio": 0.0837053571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3843.0,
      "completions/mean_length": 807.7064819335938,
      "completions/mean_terminated_length": 507.31427001953125,
      "completions/min_length": 112.0,
      "completions/min_terminated_length": 112.0,
      "epoch": 9.494460641399417,
      "grad_norm": 0.2359188050031662,
      "learning_rate": 1e-06,
      "loss": -0.0217,
      "num_tokens": 566125861.0,
      "reward": 0.6941964626312256,
      "reward_std": 0.1270803064107895,
      "rewards/verify_math_reward/mean": 0.6941964030265808,
      "rewards/verify_math_reward/std": 0.4610042870044708,
      "step": 1016
    },
    {
      "clip_ratio/high_max": 0.001428876810678048,
      "clip_ratio/high_mean": 0.0005704977638743003,
      "clip_ratio/low_mean": 0.00041258256078435807,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009830803282966372,
      "completions/clipped_ratio": 0.1183035714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1877.0,
      "completions/mean_length": 947.3482666015625,
      "completions/mean_terminated_length": 524.8709106445312,
      "completions/min_length": 138.0,
      "completions/min_terminated_length": 138.0,
      "epoch": 9.503790087463557,
      "grad_norm": 0.26440975069999695,
      "learning_rate": 1e-06,
      "loss": -0.0509,
      "num_tokens": 566622821.0,
      "reward": 0.6741071939468384,
      "reward_std": 0.14056415855884552,
      "rewards/verify_math_reward/mean": 0.6741071343421936,
      "rewards/verify_math_reward/std": 0.4689692556858063,
      "step": 1017
    },
    {
      "clip_ratio/high_max": 0.0027742850725189783,
      "clip_ratio/high_mean": 0.001015650592307793,
      "clip_ratio/low_mean": 0.0005431211029645056,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001558771666168468,
      "completions/clipped_ratio": 0.1071428571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3354.0,
      "completions/mean_length": 922.216552734375,
      "completions/mean_terminated_length": 541.3624877929688,
      "completions/min_length": 136.0,
      "completions/min_terminated_length": 136.0,
      "epoch": 9.513119533527696,
      "grad_norm": 0.31896474957466125,
      "learning_rate": 1e-06,
      "loss": -0.0804,
      "num_tokens": 567149303.0,
      "reward": 0.6808035969734192,
      "reward_std": 0.19448629021644592,
      "rewards/verify_math_reward/mean": 0.6808035969734192,
      "rewards/verify_math_reward/std": 0.4664256274700165,
      "step": 1018
    },
    {
      "clip_ratio/high_max": 0.002351549257582519,
      "clip_ratio/high_mean": 0.0008312666177516803,
      "clip_ratio/low_mean": 0.00045399160035231034,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012852582221967168,
      "completions/clipped_ratio": 0.109375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3881.0,
      "completions/mean_length": 924.6942138671875,
      "completions/mean_terminated_length": 535.235595703125,
      "completions/min_length": 115.0,
      "completions/min_terminated_length": 115.0,
      "epoch": 9.522448979591836,
      "grad_norm": 0.33340534567832947,
      "learning_rate": 1e-06,
      "loss": -0.0641,
      "num_tokens": 567657621.0,
      "reward": 0.6941964626312256,
      "reward_std": 0.14458851516246796,
      "rewards/verify_math_reward/mean": 0.6941964030265808,
      "rewards/verify_math_reward/std": 0.4610042870044708,
      "step": 1019
    },
    {
      "clip_ratio/high_max": 0.0013591579408966936,
      "clip_ratio/high_mean": 0.00044431955302570714,
      "clip_ratio/low_mean": 0.0003737329818704893,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008180525419447804,
      "completions/clipped_ratio": 0.1294642857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3327.0,
      "completions/mean_length": 1015.2891235351562,
      "completions/mean_terminated_length": 557.132080078125,
      "completions/min_length": 110.0,
      "completions/min_terminated_length": 110.0,
      "epoch": 9.531778425655977,
      "grad_norm": 0.24508723616600037,
      "learning_rate": 1e-06,
      "loss": -0.0641,
      "num_tokens": 568177032.0,
      "reward": 0.6506696939468384,
      "reward_std": 0.134664386510849,
      "rewards/verify_math_reward/mean": 0.6506696343421936,
      "rewards/verify_math_reward/std": 0.47702476382255554,
      "step": 1020
    },
    {
      "clip_ratio/high_max": 0.0019166687125107273,
      "clip_ratio/high_mean": 0.0006429423883673735,
      "clip_ratio/low_mean": 0.00033448019394199946,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009774225909495726,
      "completions/clipped_ratio": 0.1116071428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3983.0,
      "completions/mean_length": 914.7913208007812,
      "completions/mean_terminated_length": 515.1419677734375,
      "completions/min_length": 154.0,
      "completions/min_terminated_length": 154.0,
      "epoch": 9.541107871720117,
      "grad_norm": 0.17712129652500153,
      "learning_rate": 1e-06,
      "loss": -0.0671,
      "num_tokens": 568674877.0,
      "reward": 0.6473214626312256,
      "reward_std": 0.12050722539424896,
      "rewards/verify_math_reward/mean": 0.6473214030265808,
      "rewards/verify_math_reward/std": 0.47807058691978455,
      "step": 1021
    },
    {
      "clip_ratio/high_max": 0.0019794206964434125,
      "clip_ratio/high_mean": 0.0005501120795088354,
      "clip_ratio/low_mean": 0.0003695714331115596,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009196835089824162,
      "completions/clipped_ratio": 0.1194196428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3052.0,
      "completions/mean_length": 955.3906860351562,
      "completions/mean_terminated_length": 529.4778442382812,
      "completions/min_length": 142.0,
      "completions/min_terminated_length": 142.0,
      "epoch": 9.550437317784256,
      "grad_norm": 0.23466232419013977,
      "learning_rate": 1e-06,
      "loss": -0.07,
      "num_tokens": 569180787.0,
      "reward": 0.6796875596046448,
      "reward_std": 0.11937858909368515,
      "rewards/verify_math_reward/mean": 0.6796875,
      "rewards/verify_math_reward/std": 0.4668572247028351,
      "step": 1022
    },
    {
      "clip_ratio/high_max": 0.001762232495821081,
      "clip_ratio/high_mean": 0.0005917552807659376,
      "clip_ratio/low_mean": 0.0004402920067150262,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010320473120373208,
      "completions/clipped_ratio": 0.1305803571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2850.0,
      "completions/mean_length": 1067.9320068359375,
      "completions/mean_terminated_length": 613.138671875,
      "completions/min_length": 151.0,
      "completions/min_terminated_length": 151.0,
      "epoch": 9.559766763848396,
      "grad_norm": 19.286802291870117,
      "learning_rate": 1e-06,
      "loss": -0.066,
      "num_tokens": 569759566.0,
      "reward": 0.5714285969734192,
      "reward_std": 0.14981746673583984,
      "rewards/verify_math_reward/mean": 0.5714285969734192,
      "rewards/verify_math_reward/std": 0.49514803290367126,
      "step": 1023
    },
    {
      "clip_ratio/high_max": 0.0021669982379535213,
      "clip_ratio/high_mean": 0.0006441391160478815,
      "clip_ratio/low_mean": 0.0005867732265869563,
      "clip_ratio/low_min": 1.822157355491072e-05,
      "clip_ratio/region_mean": 0.0012309123158047441,
      "completions/clipped_ratio": 0.0859375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3607.0,
      "completions/mean_length": 841.3928833007812,
      "completions/mean_terminated_length": 535.4041748046875,
      "completions/min_length": 154.0,
      "completions/min_terminated_length": 154.0,
      "epoch": 9.569096209912537,
      "grad_norm": 0.2259806990623474,
      "learning_rate": 1e-06,
      "loss": -0.0246,
      "num_tokens": 570297430.0,
      "reward": 0.6506696939468384,
      "reward_std": 0.13339193165302277,
      "rewards/verify_math_reward/mean": 0.6506696343421936,
      "rewards/verify_math_reward/std": 0.47702476382255554,
      "step": 1024
    },
    {
      "clip_ratio/high_max": 0.0018299978873983491,
      "clip_ratio/high_mean": 0.0007017430471023545,
      "clip_ratio/low_mean": 0.0004816478376596933,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011833908647531644,
      "completions/clipped_ratio": 0.1238839285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2896.0,
      "completions/mean_length": 965.9063110351562,
      "completions/mean_terminated_length": 523.3070068359375,
      "completions/min_length": 156.0,
      "completions/min_terminated_length": 156.0,
      "epoch": 9.578425655976677,
      "grad_norm": 0.3631192147731781,
      "learning_rate": 1e-06,
      "loss": -0.0409,
      "num_tokens": 570791330.0,
      "reward": 0.668526828289032,
      "reward_std": 0.14312496781349182,
      "rewards/verify_math_reward/mean": 0.6685267686843872,
      "rewards/verify_math_reward/std": 0.4710056483745575,
      "step": 1025
    },
    {
      "clip_ratio/high_max": 0.0014764480984013062,
      "clip_ratio/high_mean": 0.0005153620877536014,
      "clip_ratio/low_mean": 0.00024874671225916245,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007641087759111542,
      "completions/clipped_ratio": 0.1194196428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2611.0,
      "completions/mean_length": 915.0256958007812,
      "completions/mean_terminated_length": 483.6387939453125,
      "completions/min_length": 138.0,
      "completions/min_terminated_length": 138.0,
      "epoch": 9.587755102040816,
      "grad_norm": 0.18286962807178497,
      "learning_rate": 1e-06,
      "loss": -0.0731,
      "num_tokens": 571270201.0,
      "reward": 0.6584821939468384,
      "reward_std": 0.09014318883419037,
      "rewards/verify_math_reward/mean": 0.6584821343421936,
      "rewards/verify_math_reward/std": 0.4744836091995239,
      "step": 1026
    },
    {
      "clip_ratio/high_max": 0.0013748594756179955,
      "clip_ratio/high_mean": 0.0004334319114605023,
      "clip_ratio/low_mean": 0.0002134787928298465,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006469107211160008,
      "completions/clipped_ratio": 0.0792410714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1857.0,
      "completions/mean_length": 798.8560791015625,
      "completions/mean_terminated_length": 515.101806640625,
      "completions/min_length": 143.0,
      "completions/min_terminated_length": 143.0,
      "epoch": 9.597084548104956,
      "grad_norm": 0.18646889925003052,
      "learning_rate": 1e-06,
      "loss": -0.0442,
      "num_tokens": 571781144.0,
      "reward": 0.6886160969734192,
      "reward_std": 0.08683561533689499,
      "rewards/verify_math_reward/mean": 0.6886160969734192,
      "rewards/verify_math_reward/std": 0.46331802010536194,
      "step": 1027
    },
    {
      "clip_ratio/high_max": 0.0016353268656530418,
      "clip_ratio/high_mean": 0.0005685481555701699,
      "clip_ratio/low_mean": 0.0005310405799718865,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010995887278113514,
      "completions/clipped_ratio": 0.1194196428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2253.0,
      "completions/mean_length": 976.1060791015625,
      "completions/mean_terminated_length": 553.0025024414062,
      "completions/min_length": 108.0,
      "completions/min_terminated_length": 108.0,
      "epoch": 9.606413994169095,
      "grad_norm": 0.25454413890838623,
      "learning_rate": 1e-06,
      "loss": -0.0431,
      "num_tokens": 572311895.0,
      "reward": 0.6350446939468384,
      "reward_std": 0.11892351508140564,
      "rewards/verify_math_reward/mean": 0.6350446343421936,
      "rewards/verify_math_reward/std": 0.481686532497406,
      "step": 1028
    },
    {
      "clip_ratio/high_max": 0.0025896687766362447,
      "clip_ratio/high_mean": 0.0008289245524792932,
      "clip_ratio/low_mean": 0.000477146599223488,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0013060711607977282,
      "completions/clipped_ratio": 0.0915178571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3626.0,
      "completions/mean_length": 844.107177734375,
      "completions/mean_terminated_length": 516.5208740234375,
      "completions/min_length": 149.0,
      "completions/min_terminated_length": 149.0,
      "epoch": 9.615743440233237,
      "grad_norm": 0.2483367770910263,
      "learning_rate": 1e-06,
      "loss": -0.0663,
      "num_tokens": 572827111.0,
      "reward": 0.6540178656578064,
      "reward_std": 0.14229939877986908,
      "rewards/verify_math_reward/mean": 0.6540178656578064,
      "rewards/verify_math_reward/std": 0.4759531021118164,
      "step": 1029
    },
    {
      "clip_ratio/high_max": 0.0017741328192641959,
      "clip_ratio/high_mean": 0.0006298645785136614,
      "clip_ratio/low_mean": 0.00039479202087022713,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.00102465658710571,
      "completions/clipped_ratio": 0.0915178571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4030.0,
      "completions/mean_length": 833.6998291015625,
      "completions/mean_terminated_length": 505.0650939941406,
      "completions/min_length": 139.0,
      "completions/min_terminated_length": 139.0,
      "epoch": 9.625072886297376,
      "grad_norm": 0.2444959580898285,
      "learning_rate": 1e-06,
      "loss": -0.0477,
      "num_tokens": 573323586.0,
      "reward": 0.7020089626312256,
      "reward_std": 0.13373075425624847,
      "rewards/verify_math_reward/mean": 0.7020089030265808,
      "rewards/verify_math_reward/std": 0.45763099193573,
      "step": 1030
    },
    {
      "clip_ratio/high_max": 0.0019861087275785394,
      "clip_ratio/high_mean": 0.0006300596960500116,
      "clip_ratio/low_mean": 0.0004409523812682892,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010710120586736593,
      "completions/clipped_ratio": 0.1138392857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3505.0,
      "completions/mean_length": 940.4420166015625,
      "completions/mean_terminated_length": 535.0679931640625,
      "completions/min_length": 154.0,
      "completions/min_terminated_length": 154.0,
      "epoch": 9.634402332361516,
      "grad_norm": 0.2334720492362976,
      "learning_rate": 1e-06,
      "loss": -0.0682,
      "num_tokens": 573838094.0,
      "reward": 0.6283482313156128,
      "reward_std": 0.13989083468914032,
      "rewards/verify_math_reward/mean": 0.6283482313156128,
      "rewards/verify_math_reward/std": 0.4835159480571747,
      "step": 1031
    },
    {
      "clip_ratio/high_max": 0.0019386091371416114,
      "clip_ratio/high_mean": 0.0007693026418564841,
      "clip_ratio/low_mean": 0.00048742640728960396,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012567290759761818,
      "completions/clipped_ratio": 0.1328125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2034.0,
      "completions/mean_length": 1007.23779296875,
      "completions/mean_terminated_length": 534.1840209960938,
      "completions/min_length": 125.0,
      "completions/min_terminated_length": 125.0,
      "epoch": 9.643731778425655,
      "grad_norm": 0.2592686712741852,
      "learning_rate": 1e-06,
      "loss": -0.0581,
      "num_tokens": 574347395.0,
      "reward": 0.6350446939468384,
      "reward_std": 0.16180318593978882,
      "rewards/verify_math_reward/mean": 0.6350446343421936,
      "rewards/verify_math_reward/std": 0.481686532497406,
      "step": 1032
    },
    {
      "clip_ratio/high_max": 0.0020356640306999907,
      "clip_ratio/high_mean": 0.0006960072187212063,
      "clip_ratio/low_mean": 0.0004890329191766796,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011850401460833382,
      "completions/clipped_ratio": 0.1104910714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2936.0,
      "completions/mean_length": 911.08935546875,
      "completions/mean_terminated_length": 515.4730224609375,
      "completions/min_length": 124.0,
      "completions/min_terminated_length": 124.0,
      "epoch": 9.653061224489797,
      "grad_norm": 0.326799213886261,
      "learning_rate": 1e-06,
      "loss": -0.0304,
      "num_tokens": 574842171.0,
      "reward": 0.65625,
      "reward_std": 0.12971457839012146,
      "rewards/verify_math_reward/mean": 0.65625,
      "rewards/verify_math_reward/std": 0.4752241373062134,
      "step": 1033
    },
    {
      "clip_ratio/high_max": 0.0018600934199639596,
      "clip_ratio/high_mean": 0.000609039145274437,
      "clip_ratio/low_mean": 0.0002478077349223895,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008568468692828901,
      "completions/clipped_ratio": 0.125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1869.0,
      "completions/mean_length": 975.6328735351562,
      "completions/mean_terminated_length": 529.8660888671875,
      "completions/min_length": 123.0,
      "completions/min_terminated_length": 123.0,
      "epoch": 9.662390670553936,
      "grad_norm": 0.2780069410800934,
      "learning_rate": 1e-06,
      "loss": -0.0528,
      "num_tokens": 575347210.0,
      "reward": 0.6640625,
      "reward_std": 0.12602722644805908,
      "rewards/verify_math_reward/mean": 0.6640625,
      "rewards/verify_math_reward/std": 0.4725809693336487,
      "step": 1034
    },
    {
      "clip_ratio/high_max": 0.0019211800281482283,
      "clip_ratio/high_mean": 0.0006911874461366097,
      "clip_ratio/low_mean": 0.0005047573258707416,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011959447656408884,
      "completions/clipped_ratio": 0.0982142857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3855.0,
      "completions/mean_length": 869.3192138671875,
      "completions/mean_terminated_length": 517.8984985351562,
      "completions/min_length": 140.0,
      "completions/min_terminated_length": 140.0,
      "epoch": 9.671720116618076,
      "grad_norm": 0.2505824565887451,
      "learning_rate": 1e-06,
      "loss": -0.0279,
      "num_tokens": 575850784.0,
      "reward": 0.637276828289032,
      "reward_std": 0.14011836051940918,
      "rewards/verify_math_reward/mean": 0.6372767686843872,
      "rewards/verify_math_reward/std": 0.481054425239563,
      "step": 1035
    },
    {
      "clip_ratio/high_max": 0.002050023802439682,
      "clip_ratio/high_mean": 0.0006352996188070392,
      "clip_ratio/low_mean": 0.0004639841963580693,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010992838651873171,
      "completions/clipped_ratio": 0.1049107142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3891.0,
      "completions/mean_length": 899.2489013671875,
      "completions/mean_terminated_length": 524.5673217773438,
      "completions/min_length": 170.0,
      "completions/min_terminated_length": 170.0,
      "epoch": 9.681049562682215,
      "grad_norm": 0.2341313511133194,
      "learning_rate": 1e-06,
      "loss": -0.0663,
      "num_tokens": 576360479.0,
      "reward": 0.6383928656578064,
      "reward_std": 0.14800554513931274,
      "rewards/verify_math_reward/mean": 0.6383928656578064,
      "rewards/verify_math_reward/std": 0.4807341992855072,
      "step": 1036
    },
    {
      "clip_ratio/high_max": 0.0021974491100991145,
      "clip_ratio/high_mean": 0.00077992530168558,
      "clip_ratio/low_mean": 0.00038363828616638784,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001163563625595998,
      "completions/clipped_ratio": 0.1171875,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3857.0,
      "completions/mean_length": 977.1641235351562,
      "completions/mean_terminated_length": 563.1593017578125,
      "completions/min_length": 162.0,
      "completions/min_terminated_length": 162.0,
      "epoch": 9.690379008746355,
      "grad_norm": 0.26642727851867676,
      "learning_rate": 1e-06,
      "loss": -0.0462,
      "num_tokens": 576885210.0,
      "reward": 0.6495535969734192,
      "reward_std": 0.1529289036989212,
      "rewards/verify_math_reward/mean": 0.6495535969734192,
      "rewards/verify_math_reward/std": 0.477376252412796,
      "step": 1037
    },
    {
      "clip_ratio/high_max": 0.0017975972768908832,
      "clip_ratio/high_mean": 0.0007029525049802032,
      "clip_ratio/low_mean": 0.0003928080700461578,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001095760573662119,
      "completions/clipped_ratio": 0.1071428571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2652.0,
      "completions/mean_length": 942.3638916015625,
      "completions/mean_terminated_length": 563.927490234375,
      "completions/min_length": 110.0,
      "completions/min_terminated_length": 110.0,
      "epoch": 9.699708454810496,
      "grad_norm": 0.4081231951713562,
      "learning_rate": 1e-06,
      "loss": -0.0574,
      "num_tokens": 577420016.0,
      "reward": 0.6339285969734192,
      "reward_std": 0.1612718552350998,
      "rewards/verify_math_reward/mean": 0.6339285969734192,
      "rewards/verify_math_reward/std": 0.48199838399887085,
      "step": 1038
    },
    {
      "clip_ratio/high_max": 0.001977262072614394,
      "clip_ratio/high_mean": 0.0006768327257304918,
      "clip_ratio/low_mean": 0.0005148207956153783,
      "clip_ratio/low_min": 1.3133010725141503e-05,
      "clip_ratio/region_mean": 0.0011916535222553648,
      "completions/clipped_ratio": 0.0881696428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4071.0,
      "completions/mean_length": 833.1574096679688,
      "completions/mean_terminated_length": 517.6560668945312,
      "completions/min_length": 142.0,
      "completions/min_terminated_length": 142.0,
      "epoch": 9.709037900874636,
      "grad_norm": 0.2300080955028534,
      "learning_rate": 1e-06,
      "loss": -0.0546,
      "num_tokens": 577932253.0,
      "reward": 0.6383928656578064,
      "reward_std": 0.14117145538330078,
      "rewards/verify_math_reward/mean": 0.6383928656578064,
      "rewards/verify_math_reward/std": 0.4807341992855072,
      "step": 1039
    },
    {
      "clip_ratio/high_max": 0.002007761017011944,
      "clip_ratio/high_mean": 0.0007245595497806789,
      "clip_ratio/low_mean": 0.00047620994223507296,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001200769496790599,
      "completions/clipped_ratio": 0.1339285714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1806.0,
      "completions/mean_length": 992.5123291015625,
      "completions/mean_terminated_length": 512.5914916992188,
      "completions/min_length": 155.0,
      "completions/min_terminated_length": 155.0,
      "epoch": 9.718367346938775,
      "grad_norm": 0.3068905472755432,
      "learning_rate": 1e-06,
      "loss": -0.07,
      "num_tokens": 578415184.0,
      "reward": 0.6171875,
      "reward_std": 0.13873080909252167,
      "rewards/verify_math_reward/mean": 0.6171875,
      "rewards/verify_math_reward/std": 0.4863446056842804,
      "step": 1040
    },
    {
      "clip_ratio/high_max": 0.0017484277159383055,
      "clip_ratio/high_mean": 0.0006192948048919789,
      "clip_ratio/low_mean": 0.00036504063518805197,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009843354600889143,
      "completions/clipped_ratio": 0.1004464285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3675.0,
      "completions/mean_length": 846.3895263671875,
      "completions/mean_terminated_length": 483.52978515625,
      "completions/min_length": 151.0,
      "completions/min_terminated_length": 151.0,
      "epoch": 9.727696793002915,
      "grad_norm": 285.1891784667969,
      "learning_rate": 1e-06,
      "loss": 4276.3335,
      "num_tokens": 578895469.0,
      "reward": 0.699776828289032,
      "reward_std": 0.13534586131572723,
      "rewards/verify_math_reward/mean": 0.6997767686843872,
      "rewards/verify_math_reward/std": 0.4586109220981598,
      "step": 1041
    },
    {
      "clip_ratio/high_max": 0.0017575875244801864,
      "clip_ratio/high_mean": 0.0006908914892846951,
      "clip_ratio/low_mean": 0.0003417383965143017,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010326299016014673,
      "completions/clipped_ratio": 0.0870535714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2526.0,
      "completions/mean_length": 823.8717041015625,
      "completions/mean_terminated_length": 511.85943603515625,
      "completions/min_length": 160.0,
      "completions/min_terminated_length": 160.0,
      "epoch": 9.737026239067056,
      "grad_norm": 0.27488529682159424,
      "learning_rate": 1e-06,
      "loss": -0.0782,
      "num_tokens": 579396554.0,
      "reward": 0.7053571939468384,
      "reward_std": 0.15097612142562866,
      "rewards/verify_math_reward/mean": 0.7053571343421936,
      "rewards/verify_math_reward/std": 0.45613667368888855,
      "step": 1042
    },
    {
      "clip_ratio/high_max": 0.0018153538767364807,
      "clip_ratio/high_mean": 0.0006185945339893806,
      "clip_ratio/low_mean": 0.0003676805572467856,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009862750848697033,
      "completions/clipped_ratio": 0.1261160714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3552.0,
      "completions/mean_length": 974.7020263671875,
      "completions/mean_terminated_length": 524.2464599609375,
      "completions/min_length": 137.0,
      "completions/min_terminated_length": 137.0,
      "epoch": 9.746355685131196,
      "grad_norm": 0.21488317847251892,
      "learning_rate": 1e-06,
      "loss": -0.0482,
      "num_tokens": 579893967.0,
      "reward": 0.637276828289032,
      "reward_std": 0.13380491733551025,
      "rewards/verify_math_reward/mean": 0.6372767686843872,
      "rewards/verify_math_reward/std": 0.481054425239563,
      "step": 1043
    },
    {
      "clip_ratio/high_max": 0.0021967298671370372,
      "clip_ratio/high_mean": 0.0008200539982681221,
      "clip_ratio/low_mean": 0.0004588539322867291,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001278907944652019,
      "completions/clipped_ratio": 0.0904017857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2737.0,
      "completions/mean_length": 878.49560546875,
      "completions/mean_terminated_length": 558.718994140625,
      "completions/min_length": 104.0,
      "completions/min_terminated_length": 104.0,
      "epoch": 9.755685131195335,
      "grad_norm": 0.2162492871284485,
      "learning_rate": 1e-06,
      "loss": -0.061,
      "num_tokens": 580435051.0,
      "reward": 0.6863839626312256,
      "reward_std": 0.1738969385623932,
      "rewards/verify_math_reward/mean": 0.6863839030265808,
      "rewards/verify_math_reward/std": 0.46422144770622253,
      "step": 1044
    },
    {
      "clip_ratio/high_max": 0.001430839121894678,
      "clip_ratio/high_mean": 0.0005467789214890217,
      "clip_ratio/low_mean": 0.0002811250105878571,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008279039466287941,
      "completions/clipped_ratio": 0.1149553571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3511.0,
      "completions/mean_length": 915.7645263671875,
      "completions/mean_terminated_length": 502.69482421875,
      "completions/min_length": 141.0,
      "completions/min_terminated_length": 141.0,
      "epoch": 9.765014577259475,
      "grad_norm": 0.33657416701316833,
      "learning_rate": 1e-06,
      "loss": -0.0416,
      "num_tokens": 580908656.0,
      "reward": 0.6830357313156128,
      "reward_std": 0.12523627281188965,
      "rewards/verify_math_reward/mean": 0.6830357313156128,
      "rewards/verify_math_reward/std": 0.46555325388908386,
      "step": 1045
    },
    {
      "clip_ratio/high_max": 0.001537170282972511,
      "clip_ratio/high_mean": 0.0005386366037782864,
      "clip_ratio/low_mean": 0.0003947968293687154,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009334334226878127,
      "completions/clipped_ratio": 0.125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3805.0,
      "completions/mean_length": 1052.7210693359375,
      "completions/mean_terminated_length": 617.966796875,
      "completions/min_length": 140.0,
      "completions/min_terminated_length": 140.0,
      "epoch": 9.774344023323614,
      "grad_norm": 0.2501138746738434,
      "learning_rate": 1e-06,
      "loss": -0.0503,
      "num_tokens": 581487438.0,
      "reward": 0.5970982313156128,
      "reward_std": 0.14368626475334167,
      "rewards/verify_math_reward/mean": 0.5970982313156128,
      "rewards/verify_math_reward/std": 0.49075525999069214,
      "step": 1046
    },
    {
      "clip_ratio/high_max": 0.001727103088342119,
      "clip_ratio/high_mean": 0.000598639273448498,
      "clip_ratio/low_mean": 0.0003986700412497157,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009973093328881077,
      "completions/clipped_ratio": 0.1573660714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2466.0,
      "completions/mean_length": 1109.1942138671875,
      "completions/mean_terminated_length": 551.3933715820312,
      "completions/min_length": 164.0,
      "completions/min_terminated_length": 164.0,
      "epoch": 9.783673469387756,
      "grad_norm": 0.2089390605688095,
      "learning_rate": 1e-06,
      "loss": -0.062,
      "num_tokens": 582001148.0,
      "reward": 0.5970982313156128,
      "reward_std": 0.13110283017158508,
      "rewards/verify_math_reward/mean": 0.5970982313156128,
      "rewards/verify_math_reward/std": 0.49075525999069214,
      "step": 1047
    },
    {
      "clip_ratio/high_max": 0.0013950355314591434,
      "clip_ratio/high_mean": 0.0005012010387872579,
      "clip_ratio/low_mean": 0.00040065501434582984,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009018560567710665,
      "completions/clipped_ratio": 0.1149553571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3303.0,
      "completions/mean_length": 962.224365234375,
      "completions/mean_terminated_length": 555.1891479492188,
      "completions/min_length": 149.0,
      "completions/min_terminated_length": 149.0,
      "epoch": 9.793002915451895,
      "grad_norm": 0.20835205912590027,
      "learning_rate": 1e-06,
      "loss": -0.0275,
      "num_tokens": 582534837.0,
      "reward": 0.6305803656578064,
      "reward_std": 0.10434380918741226,
      "rewards/verify_math_reward/mean": 0.6305803656578064,
      "rewards/verify_math_reward/std": 0.4829172194004059,
      "step": 1048
    },
    {
      "clip_ratio/high_max": 0.0016963560519798193,
      "clip_ratio/high_mean": 0.0006850205991213443,
      "clip_ratio/low_mean": 0.00036972204134144704,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010547426354605705,
      "completions/clipped_ratio": 0.0982142857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3177.0,
      "completions/mean_length": 896.427490234375,
      "completions/mean_terminated_length": 547.9591674804688,
      "completions/min_length": 178.0,
      "completions/min_terminated_length": 178.0,
      "epoch": 9.802332361516035,
      "grad_norm": 0.2542431354522705,
      "learning_rate": 1e-06,
      "loss": -0.0307,
      "num_tokens": 583065428.0,
      "reward": 0.625,
      "reward_std": 0.14147454500198364,
      "rewards/verify_math_reward/mean": 0.625,
      "rewards/verify_math_reward/std": 0.48439329862594604,
      "step": 1049
    },
    {
      "clip_ratio/high_max": 0.0015421499847434461,
      "clip_ratio/high_mean": 0.0005322884244378656,
      "clip_ratio/low_mean": 0.0003362369741353177,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008685253978910623,
      "completions/clipped_ratio": 0.109375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3959.0,
      "completions/mean_length": 919.0402221679688,
      "completions/mean_terminated_length": 528.88720703125,
      "completions/min_length": 144.0,
      "completions/min_terminated_length": 144.0,
      "epoch": 9.811661807580174,
      "grad_norm": 0.2551579773426056,
      "learning_rate": 1e-06,
      "loss": -0.037,
      "num_tokens": 583575320.0,
      "reward": 0.715401828289032,
      "reward_std": 0.10818270593881607,
      "rewards/verify_math_reward/mean": 0.7154017686843872,
      "rewards/verify_math_reward/std": 0.4514748752117157,
      "step": 1050
    },
    {
      "clip_ratio/high_max": 0.002346133565879427,
      "clip_ratio/high_mean": 0.0007620485121151432,
      "clip_ratio/low_mean": 0.00047810708201723173,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012401555977703538,
      "completions/clipped_ratio": 0.1216517857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3246.0,
      "completions/mean_length": 954.54248046875,
      "completions/mean_terminated_length": 519.448486328125,
      "completions/min_length": 79.0,
      "completions/min_terminated_length": 79.0,
      "epoch": 9.820991253644316,
      "grad_norm": 0.2352941483259201,
      "learning_rate": 1e-06,
      "loss": -0.0785,
      "num_tokens": 584064694.0,
      "reward": 0.6640625,
      "reward_std": 0.13989335298538208,
      "rewards/verify_math_reward/mean": 0.6640625,
      "rewards/verify_math_reward/std": 0.4725809693336487,
      "step": 1051
    },
    {
      "clip_ratio/high_max": 0.0018258053314639255,
      "clip_ratio/high_mean": 0.000634061367236427,
      "clip_ratio/low_mean": 0.0002967467048620165,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009308080552727915,
      "completions/clipped_ratio": 0.1551339285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3250.0,
      "completions/mean_length": 1112.204345703125,
      "completions/mean_terminated_length": 564.321044921875,
      "completions/min_length": 125.0,
      "completions/min_terminated_length": 125.0,
      "epoch": 9.830320699708455,
      "grad_norm": 0.1994365155696869,
      "learning_rate": 1e-06,
      "loss": -0.0779,
      "num_tokens": 584586133.0,
      "reward": 0.6171875,
      "reward_std": 0.126701220870018,
      "rewards/verify_math_reward/mean": 0.6171875,
      "rewards/verify_math_reward/std": 0.4863446056842804,
      "step": 1052
    },
    {
      "clip_ratio/high_max": 0.001893172120617237,
      "clip_ratio/high_mean": 0.0007289749746632879,
      "clip_ratio/low_mean": 0.0004387708750073216,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011677458642225247,
      "completions/clipped_ratio": 0.1328125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3258.0,
      "completions/mean_length": 984.044677734375,
      "completions/mean_terminated_length": 507.4388732910156,
      "completions/min_length": 125.0,
      "completions/min_terminated_length": 125.0,
      "epoch": 9.839650145772595,
      "grad_norm": 0.287319153547287,
      "learning_rate": 1e-06,
      "loss": -0.0876,
      "num_tokens": 585063253.0,
      "reward": 0.640625,
      "reward_std": 0.15353761613368988,
      "rewards/verify_math_reward/mean": 0.640625,
      "rewards/verify_math_reward/std": 0.48008525371551514,
      "step": 1053
    },
    {
      "clip_ratio/high_max": 0.001670144782110583,
      "clip_ratio/high_mean": 0.0005264264609650127,
      "clip_ratio/low_mean": 0.0002991365536217927,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008255630054918583,
      "completions/clipped_ratio": 0.1305803571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3909.0,
      "completions/mean_length": 1017.6551513671875,
      "completions/mean_terminated_length": 555.3106689453125,
      "completions/min_length": 160.0,
      "completions/min_terminated_length": 160.0,
      "epoch": 9.848979591836734,
      "grad_norm": 0.22084204852581024,
      "learning_rate": 1e-06,
      "loss": -0.0572,
      "num_tokens": 585585896.0,
      "reward": 0.613839328289032,
      "reward_std": 0.1277529001235962,
      "rewards/verify_math_reward/mean": 0.6138392686843872,
      "rewards/verify_math_reward/std": 0.48714008927345276,
      "step": 1054
    },
    {
      "clip_ratio/high_max": 0.0020957619417458773,
      "clip_ratio/high_mean": 0.0008615199967607623,
      "clip_ratio/low_mean": 0.0003166778906233958,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001178197893750621,
      "completions/clipped_ratio": 0.1171875,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1949.0,
      "completions/mean_length": 947.6082763671875,
      "completions/mean_terminated_length": 529.68017578125,
      "completions/min_length": 134.0,
      "completions/min_terminated_length": 134.0,
      "epoch": 9.858309037900874,
      "grad_norm": 0.24934609234333038,
      "learning_rate": 1e-06,
      "loss": -0.0474,
      "num_tokens": 586107025.0,
      "reward": 0.6350446939468384,
      "reward_std": 0.14316701889038086,
      "rewards/verify_math_reward/mean": 0.6350446343421936,
      "rewards/verify_math_reward/std": 0.481686532497406,
      "step": 1055
    },
    {
      "clip_ratio/high_max": 0.0015364606842922512,
      "clip_ratio/high_mean": 0.0006110137910582125,
      "clip_ratio/low_mean": 0.0004378621529212978,
      "clip_ratio/low_min": 3.377717985131312e-05,
      "clip_ratio/region_mean": 0.0010488759216968901,
      "completions/clipped_ratio": 0.1305803571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3396.0,
      "completions/mean_length": 1020.7489013671875,
      "completions/mean_terminated_length": 558.8690795898438,
      "completions/min_length": 124.0,
      "completions/min_terminated_length": 124.0,
      "epoch": 9.867638483965015,
      "grad_norm": 0.8965012431144714,
      "learning_rate": 1e-06,
      "loss": -0.0467,
      "num_tokens": 586636592.0,
      "reward": 0.6015625,
      "reward_std": 0.15491561591625214,
      "rewards/verify_math_reward/mean": 0.6015625,
      "rewards/verify_math_reward/std": 0.48984986543655396,
      "step": 1056
    },
    {
      "clip_ratio/high_max": 0.0020013274006487336,
      "clip_ratio/high_mean": 0.0007237715963128721,
      "clip_ratio/low_mean": 0.0003563764864793484,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010801480912050465,
      "completions/clipped_ratio": 0.1350446428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1985.0,
      "completions/mean_length": 1004.0324096679688,
      "completions/mean_terminated_length": 521.2864990234375,
      "completions/min_length": 141.0,
      "completions/min_terminated_length": 141.0,
      "epoch": 9.876967930029155,
      "grad_norm": 0.25626060366630554,
      "learning_rate": 1e-06,
      "loss": -0.0494,
      "num_tokens": 587127693.0,
      "reward": 0.6194196939468384,
      "reward_std": 0.13981668651103973,
      "rewards/verify_math_reward/mean": 0.6194196343421936,
      "rewards/verify_math_reward/std": 0.48580074310302734,
      "step": 1057
    },
    {
      "clip_ratio/high_max": 0.0024253955562016927,
      "clip_ratio/high_mean": 0.0009168129126919666,
      "clip_ratio/low_mean": 0.0003930366920030792,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0013098496492602862,
      "completions/clipped_ratio": 0.1171875,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4070.0,
      "completions/mean_length": 959.0770263671875,
      "completions/mean_terminated_length": 542.6713256835938,
      "completions/min_length": 176.0,
      "completions/min_terminated_length": 176.0,
      "epoch": 9.886297376093294,
      "grad_norm": 0.31020262837409973,
      "learning_rate": 1e-06,
      "loss": -0.0591,
      "num_tokens": 587650794.0,
      "reward": 0.652901828289032,
      "reward_std": 0.17055658996105194,
      "rewards/verify_math_reward/mean": 0.6529017686843872,
      "rewards/verify_math_reward/std": 0.47631320357322693,
      "step": 1058
    },
    {
      "clip_ratio/high_max": 0.001750830779201351,
      "clip_ratio/high_mean": 0.0005940463465776702,
      "clip_ratio/low_mean": 0.0003966069361922564,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009906532814056845,
      "completions/clipped_ratio": 0.1160714285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3746.0,
      "completions/mean_length": 958.6585083007812,
      "completions/mean_terminated_length": 546.684326171875,
      "completions/min_length": 132.0,
      "completions/min_terminated_length": 132.0,
      "epoch": 9.895626822157434,
      "grad_norm": 0.3441726267337799,
      "learning_rate": 1e-06,
      "loss": -0.064,
      "num_tokens": 588170280.0,
      "reward": 0.6071428656578064,
      "reward_std": 0.15503397583961487,
      "rewards/verify_math_reward/mean": 0.6071428656578064,
      "rewards/verify_math_reward/std": 0.48865827918052673,
      "step": 1059
    },
    {
      "clip_ratio/high_max": 0.0015743103140266612,
      "clip_ratio/high_mean": 0.0005436696628748905,
      "clip_ratio/low_mean": 0.00041004702279678895,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009537166752124904,
      "completions/clipped_ratio": 0.0993303571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3171.0,
      "completions/mean_length": 880.1082763671875,
      "completions/mean_terminated_length": 525.443603515625,
      "completions/min_length": 133.0,
      "completions/min_terminated_length": 133.0,
      "epoch": 9.904956268221575,
      "grad_norm": 0.2554823160171509,
      "learning_rate": 1e-06,
      "loss": -0.046,
      "num_tokens": 588680497.0,
      "reward": 0.6272321939468384,
      "reward_std": 0.1368863582611084,
      "rewards/verify_math_reward/mean": 0.6272321343421936,
      "rewards/verify_math_reward/std": 0.4838111400604248,
      "step": 1060
    },
    {
      "clip_ratio/high_max": 0.0015315202963392949,
      "clip_ratio/high_mean": 0.0005128437583152845,
      "clip_ratio/low_mean": 0.0002542623176395864,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007671060775464866,
      "completions/clipped_ratio": 0.0725446428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2751.0,
      "completions/mean_length": 740.3248291015625,
      "completions/mean_terminated_length": 477.8471984863281,
      "completions/min_length": 142.0,
      "completions/min_terminated_length": 142.0,
      "epoch": 9.914285714285715,
      "grad_norm": 0.27816301584243774,
      "learning_rate": 1e-06,
      "loss": -0.0438,
      "num_tokens": 589158212.0,
      "reward": 0.7109375596046448,
      "reward_std": 0.09841014444828033,
      "rewards/verify_math_reward/mean": 0.7109375,
      "rewards/verify_math_reward/std": 0.45358020067214966,
      "step": 1061
    },
    {
      "clip_ratio/high_max": 0.0016542789126106072,
      "clip_ratio/high_mean": 0.0005652393183481763,
      "clip_ratio/low_mean": 0.0003445334227762942,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009097727306652814,
      "completions/clipped_ratio": 0.1104910714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4055.0,
      "completions/mean_length": 918.325927734375,
      "completions/mean_terminated_length": 523.6085205078125,
      "completions/min_length": 171.0,
      "completions/min_terminated_length": 171.0,
      "epoch": 9.923615160349854,
      "grad_norm": 0.30398622155189514,
      "learning_rate": 1e-06,
      "loss": -0.0484,
      "num_tokens": 589664384.0,
      "reward": 0.6953125596046448,
      "reward_std": 0.12140554934740067,
      "rewards/verify_math_reward/mean": 0.6953125,
      "rewards/verify_math_reward/std": 0.4605320394039154,
      "step": 1062
    },
    {
      "clip_ratio/high_max": 0.002037021920841653,
      "clip_ratio/high_mean": 0.0006436750572902383,
      "clip_ratio/low_mean": 0.0003814122860603675,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010250873529003002,
      "completions/clipped_ratio": 0.1149553571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2636.0,
      "completions/mean_length": 896.7879638671875,
      "completions/mean_terminated_length": 481.25347900390625,
      "completions/min_length": 99.0,
      "completions/min_terminated_length": 99.0,
      "epoch": 9.932944606413994,
      "grad_norm": 0.306455135345459,
      "learning_rate": 1e-06,
      "loss": -0.0367,
      "num_tokens": 590141618.0,
      "reward": 0.6662946939468384,
      "reward_std": 0.14267736673355103,
      "rewards/verify_math_reward/mean": 0.6662946343421936,
      "rewards/verify_math_reward/std": 0.47179925441741943,
      "step": 1063
    },
    {
      "clip_ratio/high_max": 0.0013350238041311968,
      "clip_ratio/high_mean": 0.0003894673327522469,
      "clip_ratio/low_mean": 0.00020939956311849528,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0005988669072394259,
      "completions/clipped_ratio": 0.1116071428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2947.0,
      "completions/mean_length": 987.0770263671875,
      "completions/mean_terminated_length": 596.5087890625,
      "completions/min_length": 192.0,
      "completions/min_terminated_length": 192.0,
      "epoch": 9.942274052478133,
      "grad_norm": 0.2687012255191803,
      "learning_rate": 1e-06,
      "loss": -0.0425,
      "num_tokens": 590696279.0,
      "reward": 0.6517857313156128,
      "reward_std": 0.10803116858005524,
      "rewards/verify_math_reward/mean": 0.6517857313156128,
      "rewards/verify_math_reward/std": 0.47667041420936584,
      "step": 1064
    },
    {
      "clip_ratio/high_max": 0.0015185796437435783,
      "clip_ratio/high_mean": 0.0005965304562778329,
      "clip_ratio/low_mean": 0.0004224512408654846,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001018981718516443,
      "completions/clipped_ratio": 0.1238839285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3534.0,
      "completions/mean_length": 981.26904296875,
      "completions/mean_terminated_length": 540.842041015625,
      "completions/min_length": 134.0,
      "completions/min_terminated_length": 134.0,
      "epoch": 9.951603498542275,
      "grad_norm": 0.2318619042634964,
      "learning_rate": 1e-06,
      "loss": -0.0375,
      "num_tokens": 591212744.0,
      "reward": 0.637276828289032,
      "reward_std": 0.13365407288074493,
      "rewards/verify_math_reward/mean": 0.6372767686843872,
      "rewards/verify_math_reward/std": 0.481054425239563,
      "step": 1065
    },
    {
      "clip_ratio/high_max": 0.0019497639950714074,
      "clip_ratio/high_mean": 0.0007552758052042918,
      "clip_ratio/low_mean": 0.00045476459308702033,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012100404055672698,
      "completions/clipped_ratio": 0.0982142857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4029.0,
      "completions/mean_length": 875.8092041015625,
      "completions/mean_terminated_length": 525.0952758789062,
      "completions/min_length": 107.0,
      "completions/min_terminated_length": 107.0,
      "epoch": 9.960932944606414,
      "grad_norm": 0.2525951862335205,
      "learning_rate": 1e-06,
      "loss": -0.04,
      "num_tokens": 591728197.0,
      "reward": 0.65625,
      "reward_std": 0.15361177921295166,
      "rewards/verify_math_reward/mean": 0.65625,
      "rewards/verify_math_reward/std": 0.4752241373062134,
      "step": 1066
    },
    {
      "clip_ratio/high_max": 0.002424847443762701,
      "clip_ratio/high_mean": 0.0008103852305794135,
      "clip_ratio/low_mean": 0.0005829717265442014,
      "clip_ratio/low_min": 1.3742303963226732e-05,
      "clip_ratio/region_mean": 0.001393356917105848,
      "completions/clipped_ratio": 0.1149553571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4032.0,
      "completions/mean_length": 959.8973388671875,
      "completions/mean_terminated_length": 552.5598754882812,
      "completions/min_length": 140.0,
      "completions/min_terminated_length": 140.0,
      "epoch": 9.970262390670554,
      "grad_norm": 0.34032052755355835,
      "learning_rate": 1e-06,
      "loss": -0.0366,
      "num_tokens": 592259161.0,
      "reward": 0.6383928656578064,
      "reward_std": 0.15097863972187042,
      "rewards/verify_math_reward/mean": 0.6383928656578064,
      "rewards/verify_math_reward/std": 0.4807341694831848,
      "step": 1067
    },
    {
      "clip_ratio/high_max": 0.0017289511451963335,
      "clip_ratio/high_mean": 0.0006448427611758234,
      "clip_ratio/low_mean": 0.00045808370032318635,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011029264569515362,
      "completions/clipped_ratio": 0.0892857142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2139.0,
      "completions/mean_length": 849.5435791015625,
      "completions/mean_terminated_length": 531.2634887695312,
      "completions/min_length": 154.0,
      "completions/min_terminated_length": 154.0,
      "epoch": 9.979591836734693,
      "grad_norm": 0.20769155025482178,
      "learning_rate": 1e-06,
      "loss": -0.0622,
      "num_tokens": 592771616.0,
      "reward": 0.7031250596046448,
      "reward_std": 0.13996823132038116,
      "rewards/verify_math_reward/mean": 0.703125,
      "rewards/verify_math_reward/std": 0.4571361541748047,
      "step": 1068
    },
    {
      "clip_ratio/high_max": 0.0016924042320169974,
      "clip_ratio/high_mean": 0.0007165549150158768,
      "clip_ratio/low_mean": 0.00043740931596403243,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011539641964191105,
      "completions/clipped_ratio": 0.1428571428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3103.0,
      "completions/mean_length": 1057.489990234375,
      "completions/mean_terminated_length": 551.0716552734375,
      "completions/min_length": 130.0,
      "completions/min_terminated_length": 130.0,
      "epoch": 9.988921282798835,
      "grad_norm": 0.48560819029808044,
      "learning_rate": 1e-06,
      "loss": -0.047,
      "num_tokens": 593298927.0,
      "reward": 0.5948660969734192,
      "reward_std": 0.14324119687080383,
      "rewards/verify_math_reward/mean": 0.5948660969734192,
      "rewards/verify_math_reward/std": 0.49119213223457336,
      "step": 1069
    },
    {
      "clip_ratio/high_max": 0.0022205749482964166,
      "clip_ratio/high_mean": 0.0007762669993098825,
      "clip_ratio/low_mean": 0.0003609112159210781,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011371782056812663,
      "completions/clipped_ratio": 0.15340909090909094,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2645.0,
      "completions/mean_length": 1100.4346923828125,
      "completions/mean_terminated_length": 557.6140747070312,
      "completions/min_length": 217.0,
      "completions/min_terminated_length": 217.0,
      "epoch": 9.998250728862974,
      "grad_norm": 0.3290969133377075,
      "learning_rate": 1e-06,
      "loss": -0.0801,
      "num_tokens": 593800012.0,
      "reward": 0.6383928656578064,
      "reward_std": 0.1505221724510193,
      "rewards/verify_math_reward/mean": 0.6383928656578064,
      "rewards/verify_math_reward/std": 0.4807341992855072,
      "step": 1070
    },
    {
      "clip_ratio/high_max": 0.0019547574120224454,
      "clip_ratio/high_mean": 0.0006853751092421589,
      "clip_ratio/low_mean": 0.00044320657161733834,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011285816799500026,
      "completions/clipped_ratio": 0.125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3782.0,
      "completions/mean_length": 979.6172485351562,
      "completions/mean_terminated_length": 534.4196166992188,
      "completions/min_length": 179.0,
      "completions/min_terminated_length": 179.0,
      "epoch": 10.00932944606414,
      "grad_norm": 0.33982887864112854,
      "learning_rate": 1e-06,
      "loss": -0.0397,
      "num_tokens": 594311501.0,
      "reward": 0.6350446939468384,
      "reward_std": 0.12790445983409882,
      "rewards/verify_math_reward/mean": 0.6350446343421936,
      "rewards/verify_math_reward/std": 0.481686532497406,
      "step": 1071
    },
    {
      "clip_ratio/high_max": 0.0016944085218710825,
      "clip_ratio/high_mean": 0.000578192245484388,
      "clip_ratio/low_mean": 0.000361381742550293,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009395739762112498,
      "completions/clipped_ratio": 0.1138392857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3844.0,
      "completions/mean_length": 975.099365234375,
      "completions/mean_terminated_length": 574.1775512695312,
      "completions/min_length": 143.0,
      "completions/min_terminated_length": 143.0,
      "epoch": 10.018658892128279,
      "grad_norm": 0.18213008344173431,
      "learning_rate": 1e-06,
      "loss": -0.0564,
      "num_tokens": 594860806.0,
      "reward": 0.621651828289032,
      "reward_std": 0.13233955204486847,
      "rewards/verify_math_reward/mean": 0.6216517686843872,
      "rewards/verify_math_reward/std": 0.4852459728717804,
      "step": 1072
    },
    {
      "clip_ratio/high_max": 0.0018270213731739204,
      "clip_ratio/high_mean": 0.0006081969531805953,
      "clip_ratio/low_mean": 0.0003907687978426111,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009989657373807859,
      "completions/clipped_ratio": 0.1194196428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2686.0,
      "completions/mean_length": 954.239990234375,
      "completions/mean_terminated_length": 528.1710815429688,
      "completions/min_length": 144.0,
      "completions/min_terminated_length": 144.0,
      "epoch": 10.02798833819242,
      "grad_norm": 0.261699378490448,
      "learning_rate": 1e-06,
      "loss": -0.0499,
      "num_tokens": 595368461.0,
      "reward": 0.6149553656578064,
      "reward_std": 0.1317012757062912,
      "rewards/verify_math_reward/mean": 0.6149553656578064,
      "rewards/verify_math_reward/std": 0.4868776500225067,
      "step": 1073
    },
    {
      "clip_ratio/high_max": 0.0021941313025308773,
      "clip_ratio/high_mean": 0.0007479084279111703,
      "clip_ratio/low_mean": 0.0003567929979908513,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001104701434087474,
      "completions/clipped_ratio": 0.1171875,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2046.0,
      "completions/mean_length": 931.4576416015625,
      "completions/mean_terminated_length": 511.3856201171875,
      "completions/min_length": 148.0,
      "completions/min_terminated_length": 148.0,
      "epoch": 10.03731778425656,
      "grad_norm": 0.36655399203300476,
      "learning_rate": 1e-06,
      "loss": -0.0408,
      "num_tokens": 595858919.0,
      "reward": 0.65625,
      "reward_std": 0.13549739122390747,
      "rewards/verify_math_reward/mean": 0.65625,
      "rewards/verify_math_reward/std": 0.4752241373062134,
      "step": 1074
    },
    {
      "clip_ratio/high_max": 0.0018448381779307965,
      "clip_ratio/high_mean": 0.000697012113960227,
      "clip_ratio/low_mean": 0.0005023614039600943,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001199373546114657,
      "completions/clipped_ratio": 0.1127232142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3958.0,
      "completions/mean_length": 961.7645263671875,
      "completions/mean_terminated_length": 563.57861328125,
      "completions/min_length": 133.0,
      "completions/min_terminated_length": 133.0,
      "epoch": 10.0466472303207,
      "grad_norm": 0.5529072880744934,
      "learning_rate": 1e-06,
      "loss": -0.032,
      "num_tokens": 596402396.0,
      "reward": 0.629464328289032,
      "reward_std": 0.1545158475637436,
      "rewards/verify_math_reward/mean": 0.6294642686843872,
      "rewards/verify_math_reward/std": 0.4832179844379425,
      "step": 1075
    },
    {
      "clip_ratio/high_max": 0.0011781354987761006,
      "clip_ratio/high_mean": 0.0003795152715611039,
      "clip_ratio/low_mean": 0.0003574213467345544,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007369366157945478,
      "completions/clipped_ratio": 0.0982142857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3359.0,
      "completions/mean_length": 882.857177734375,
      "completions/mean_terminated_length": 532.910888671875,
      "completions/min_length": 137.0,
      "completions/min_terminated_length": 137.0,
      "epoch": 10.055976676384839,
      "grad_norm": 0.1720803827047348,
      "learning_rate": 1e-06,
      "loss": -0.0373,
      "num_tokens": 596909564.0,
      "reward": 0.7209821939468384,
      "reward_std": 0.10836746543645859,
      "rewards/verify_math_reward/mean": 0.7209821343421936,
      "rewards/verify_math_reward/std": 0.448766827583313,
      "step": 1076
    },
    {
      "clip_ratio/high_max": 0.0018365723990427796,
      "clip_ratio/high_mean": 0.0005781471554655582,
      "clip_ratio/low_mean": 0.000381889202799357,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009600363628123887,
      "completions/clipped_ratio": 0.1060267857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3820.0,
      "completions/mean_length": 917.3069458007812,
      "completions/mean_terminated_length": 540.308349609375,
      "completions/min_length": 125.0,
      "completions/min_terminated_length": 125.0,
      "epoch": 10.06530612244898,
      "grad_norm": 0.22136181592941284,
      "learning_rate": 1e-06,
      "loss": -0.0448,
      "num_tokens": 597434695.0,
      "reward": 0.6707589626312256,
      "reward_std": 0.11945415288209915,
      "rewards/verify_math_reward/mean": 0.6707589030265808,
      "rewards/verify_math_reward/std": 0.4702001214027405,
      "step": 1077
    },
    {
      "clip_ratio/high_max": 0.0018467046065779869,
      "clip_ratio/high_mean": 0.0006715934341627872,
      "clip_ratio/low_mean": 0.00040892525566960103,
      "clip_ratio/low_min": 1.7944301362149417e-05,
      "clip_ratio/region_mean": 0.001080518692106125,
      "completions/clipped_ratio": 0.1071428571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3786.0,
      "completions/mean_length": 965.060302734375,
      "completions/mean_terminated_length": 589.3474731445312,
      "completions/min_length": 125.0,
      "completions/min_terminated_length": 125.0,
      "epoch": 10.07463556851312,
      "grad_norm": 0.24332016706466675,
      "learning_rate": 1e-06,
      "loss": -0.0542,
      "num_tokens": 597993653.0,
      "reward": 0.6629464626312256,
      "reward_std": 0.15751849114894867,
      "rewards/verify_math_reward/mean": 0.6629464030265808,
      "rewards/verify_math_reward/std": 0.47296738624572754,
      "step": 1078
    },
    {
      "clip_ratio/high_max": 0.002218414934759494,
      "clip_ratio/high_mean": 0.0006293981896305922,
      "clip_ratio/low_mean": 0.00042385161577840336,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010532498054089956,
      "completions/clipped_ratio": 0.1205357142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3999.0,
      "completions/mean_length": 974.58935546875,
      "completions/mean_terminated_length": 546.78173828125,
      "completions/min_length": 111.0,
      "completions/min_terminated_length": 111.0,
      "epoch": 10.08396501457726,
      "grad_norm": 0.23597940802574158,
      "learning_rate": 1e-06,
      "loss": -0.0302,
      "num_tokens": 598527301.0,
      "reward": 0.6428571939468384,
      "reward_std": 0.1306481808423996,
      "rewards/verify_math_reward/mean": 0.6428571343421936,
      "rewards/verify_math_reward/std": 0.47942501306533813,
      "step": 1079
    },
    {
      "clip_ratio/high_max": 0.0019827116848318838,
      "clip_ratio/high_mean": 0.000614834561929456,
      "clip_ratio/low_mean": 0.0004315246228543401,
      "clip_ratio/low_min": 1.72986437974032e-05,
      "clip_ratio/region_mean": 0.001046359198880964,
      "completions/clipped_ratio": 0.1227678571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3841.0,
      "completions/mean_length": 1004.4029541015625,
      "completions/mean_terminated_length": 571.7366333007812,
      "completions/min_length": 177.0,
      "completions/min_terminated_length": 177.0,
      "epoch": 10.093294460641399,
      "grad_norm": 0.22727219760417938,
      "learning_rate": 1e-06,
      "loss": -0.049,
      "num_tokens": 599078558.0,
      "reward": 0.6551339626312256,
      "reward_std": 0.14266961812973022,
      "rewards/verify_math_reward/mean": 0.6551339030265808,
      "rewards/verify_math_reward/std": 0.4755900502204895,
      "step": 1080
    },
    {
      "clip_ratio/high_max": 0.0020448147115530446,
      "clip_ratio/high_mean": 0.0006679803354927571,
      "clip_ratio/low_mean": 0.00035781238693743944,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010257927096972708,
      "completions/clipped_ratio": 0.1205357142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1963.0,
      "completions/mean_length": 938.966552734375,
      "completions/mean_terminated_length": 506.2766418457031,
      "completions/min_length": 147.0,
      "completions/min_terminated_length": 147.0,
      "epoch": 10.102623906705539,
      "grad_norm": 0.3594035804271698,
      "learning_rate": 1e-06,
      "loss": -0.0771,
      "num_tokens": 599570168.0,
      "reward": 0.6417410969734192,
      "reward_std": 0.1418525129556656,
      "rewards/verify_math_reward/mean": 0.6417410969734192,
      "rewards/verify_math_reward/std": 0.47975656390190125,
      "step": 1081
    },
    {
      "clip_ratio/high_max": 0.001946572694578208,
      "clip_ratio/high_mean": 0.0005750222899223445,
      "clip_ratio/low_mean": 0.0004999674974897061,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010749898101494182,
      "completions/clipped_ratio": 0.1082589285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2290.0,
      "completions/mean_length": 899.0803833007812,
      "completions/mean_terminated_length": 510.9687194824219,
      "completions/min_length": 138.0,
      "completions/min_terminated_length": 138.0,
      "epoch": 10.11195335276968,
      "grad_norm": 1.8174324035644531,
      "learning_rate": 1e-06,
      "loss": -0.0362,
      "num_tokens": 600073704.0,
      "reward": 0.6238839626312256,
      "reward_std": 0.13121160864830017,
      "rewards/verify_math_reward/mean": 0.6238839030265808,
      "rewards/verify_math_reward/std": 0.48468026518821716,
      "step": 1082
    },
    {
      "clip_ratio/high_max": 0.0021044298555352725,
      "clip_ratio/high_mean": 0.0007841647275199648,
      "clip_ratio/low_mean": 0.0005819887319375994,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0013661534867424052,
      "completions/clipped_ratio": 0.1183035714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3108.0,
      "completions/mean_length": 990.1250610351562,
      "completions/mean_terminated_length": 573.3873291015625,
      "completions/min_length": 125.0,
      "completions/min_terminated_length": 125.0,
      "epoch": 10.12128279883382,
      "grad_norm": 0.5238993763923645,
      "learning_rate": 1e-06,
      "loss": -0.0619,
      "num_tokens": 600626696.0,
      "reward": 0.6037946939468384,
      "reward_std": 0.17453886568546295,
      "rewards/verify_math_reward/mean": 0.6037946343421936,
      "rewards/verify_math_reward/std": 0.48938122391700745,
      "step": 1083
    },
    {
      "clip_ratio/high_max": 0.001777457473508548,
      "clip_ratio/high_mean": 0.0006237813104235101,
      "clip_ratio/low_mean": 0.00034005245925072813,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000963833752393839,
      "completions/clipped_ratio": 0.1283482142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2444.0,
      "completions/mean_length": 1014.5938110351562,
      "completions/mean_terminated_length": 560.8655395507812,
      "completions/min_length": 139.0,
      "completions/min_terminated_length": 139.0,
      "epoch": 10.130612244897959,
      "grad_norm": 0.3062381446361542,
      "learning_rate": 1e-06,
      "loss": -0.0414,
      "num_tokens": 601159172.0,
      "reward": 0.6127232313156128,
      "reward_std": 0.16194264590740204,
      "rewards/verify_math_reward/mean": 0.6127232313156128,
      "rewards/verify_math_reward/std": 0.4873998463153839,
      "step": 1084
    },
    {
      "clip_ratio/high_max": 0.0016755195738369366,
      "clip_ratio/high_mean": 0.0005690128355126944,
      "clip_ratio/low_mean": 0.00031835315428452304,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008873659971868619,
      "completions/clipped_ratio": 0.1060267857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3889.0,
      "completions/mean_length": 938.6685791015625,
      "completions/mean_terminated_length": 564.2034912109375,
      "completions/min_length": 95.0,
      "completions/min_terminated_length": 95.0,
      "epoch": 10.139941690962099,
      "grad_norm": 0.21554887294769287,
      "learning_rate": 1e-06,
      "loss": -0.0395,
      "num_tokens": 601694747.0,
      "reward": 0.6395089626312256,
      "reward_std": 0.11678526550531387,
      "rewards/verify_math_reward/mean": 0.6395089030265808,
      "rewards/verify_math_reward/std": 0.4804111123085022,
      "step": 1085
    },
    {
      "clip_ratio/high_max": 0.001962107911822386,
      "clip_ratio/high_mean": 0.0006806953333580168,
      "clip_ratio/low_mean": 0.00027986448321826174,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009605598188500153,
      "completions/clipped_ratio": 0.0982142857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3661.0,
      "completions/mean_length": 845.0859985351562,
      "completions/mean_terminated_length": 491.0259704589844,
      "completions/min_length": 137.0,
      "completions/min_terminated_length": 137.0,
      "epoch": 10.14927113702624,
      "grad_norm": 0.25139451026916504,
      "learning_rate": 1e-06,
      "loss": -0.0555,
      "num_tokens": 602175048.0,
      "reward": 0.7377232313156128,
      "reward_std": 0.11832760274410248,
      "rewards/verify_math_reward/mean": 0.7377232313156128,
      "rewards/verify_math_reward/std": 0.4401180148124695,
      "step": 1086
    },
    {
      "clip_ratio/high_max": 0.0020427578729140805,
      "clip_ratio/high_mean": 0.000650840588605206,
      "clip_ratio/low_mean": 0.00035162167387170484,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010024622570199426,
      "completions/clipped_ratio": 0.1049107142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3064.0,
      "completions/mean_length": 901.12060546875,
      "completions/mean_terminated_length": 526.6583862304688,
      "completions/min_length": 133.0,
      "completions/min_terminated_length": 133.0,
      "epoch": 10.15860058309038,
      "grad_norm": 0.25762447714805603,
      "learning_rate": 1e-06,
      "loss": -0.0455,
      "num_tokens": 602692516.0,
      "reward": 0.6852678656578064,
      "reward_std": 0.10096701979637146,
      "rewards/verify_math_reward/mean": 0.6852678656578064,
      "rewards/verify_math_reward/std": 0.46466848254203796,
      "step": 1087
    },
    {
      "clip_ratio/high_max": 0.0014616701155318879,
      "clip_ratio/high_mean": 0.0004872079198321444,
      "clip_ratio/low_mean": 0.0003279360041688051,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008151439233188285,
      "completions/clipped_ratio": 0.1473214285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3851.0,
      "completions/mean_length": 1087.3695068359375,
      "completions/mean_terminated_length": 567.5536499023438,
      "completions/min_length": 160.0,
      "completions/min_terminated_length": 160.0,
      "epoch": 10.167930029154519,
      "grad_norm": 0.25292402505874634,
      "learning_rate": 1e-06,
      "loss": -0.0597,
      "num_tokens": 603211151.0,
      "reward": 0.6361607313156128,
      "reward_std": 0.12399842590093613,
      "rewards/verify_math_reward/mean": 0.6361607313156128,
      "rewards/verify_math_reward/std": 0.4813718795776367,
      "step": 1088
    },
    {
      "clip_ratio/high_max": 0.001685253420873778,
      "clip_ratio/high_mean": 0.000670446495860233,
      "clip_ratio/low_mean": 0.0003939537546102656,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010644002595654456,
      "completions/clipped_ratio": 0.1506696428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3662.0,
      "completions/mean_length": 1110.7210693359375,
      "completions/mean_terminated_length": 581.137939453125,
      "completions/min_length": 191.0,
      "completions/min_terminated_length": 191.0,
      "epoch": 10.177259475218658,
      "grad_norm": 0.2215045541524887,
      "learning_rate": 1e-06,
      "loss": -0.0229,
      "num_tokens": 603744957.0,
      "reward": 0.6417410969734192,
      "reward_std": 0.11768680810928345,
      "rewards/verify_math_reward/mean": 0.6417410969734192,
      "rewards/verify_math_reward/std": 0.47975659370422363,
      "step": 1089
    },
    {
      "clip_ratio/high_max": 0.00181549544504378,
      "clip_ratio/high_mean": 0.0006686062297376338,
      "clip_ratio/low_mean": 0.00045631784269062337,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011249240524193738,
      "completions/clipped_ratio": 0.0825892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4071.0,
      "completions/mean_length": 790.1361694335938,
      "completions/mean_terminated_length": 492.5279846191406,
      "completions/min_length": 103.0,
      "completions/min_terminated_length": 103.0,
      "epoch": 10.186588921282798,
      "grad_norm": 0.32556310296058655,
      "learning_rate": 1e-06,
      "loss": -0.0486,
      "num_tokens": 604235239.0,
      "reward": 0.7209821939468384,
      "reward_std": 0.14537875354290009,
      "rewards/verify_math_reward/mean": 0.7209821343421936,
      "rewards/verify_math_reward/std": 0.448766827583313,
      "step": 1090
    },
    {
      "clip_ratio/high_max": 0.001611052437510807,
      "clip_ratio/high_mean": 0.0005533401954380679,
      "clip_ratio/low_mean": 0.00036057048328075325,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009139106950897258,
      "completions/clipped_ratio": 0.0758928571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2410.0,
      "completions/mean_length": 767.6082763671875,
      "completions/mean_terminated_length": 494.2620849609375,
      "completions/min_length": 133.0,
      "completions/min_terminated_length": 133.0,
      "epoch": 10.19591836734694,
      "grad_norm": 0.27630820870399475,
      "learning_rate": 1e-06,
      "loss": -0.0482,
      "num_tokens": 604728640.0,
      "reward": 0.7087053656578064,
      "reward_std": 0.1345965713262558,
      "rewards/verify_math_reward/mean": 0.7087053656578064,
      "rewards/verify_math_reward/std": 0.45461276173591614,
      "step": 1091
    },
    {
      "clip_ratio/high_max": 0.001777386132744141,
      "clip_ratio/high_mean": 0.0006654456246906193,
      "clip_ratio/low_mean": 0.0003801905838827224,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001045636188791832,
      "completions/clipped_ratio": 0.1417410714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3302.0,
      "completions/mean_length": 1055.607177734375,
      "completions/mean_terminated_length": 553.4876708984375,
      "completions/min_length": 144.0,
      "completions/min_terminated_length": 144.0,
      "epoch": 10.205247813411079,
      "grad_norm": 0.26237422227859497,
      "learning_rate": 1e-06,
      "loss": -0.0309,
      "num_tokens": 605246168.0,
      "reward": 0.6004464626312256,
      "reward_std": 0.1293707937002182,
      "rewards/verify_math_reward/mean": 0.6004464030265808,
      "rewards/verify_math_reward/std": 0.49008017778396606,
      "step": 1092
    },
    {
      "clip_ratio/high_max": 0.0017463438598497305,
      "clip_ratio/high_mean": 0.0005789542519778479,
      "clip_ratio/low_mean": 0.0003651085903584317,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009440628382435534,
      "completions/clipped_ratio": 0.1261160714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3867.0,
      "completions/mean_length": 997.7902221679688,
      "completions/mean_terminated_length": 550.6666259765625,
      "completions/min_length": 185.0,
      "completions/min_terminated_length": 185.0,
      "epoch": 10.214577259475218,
      "grad_norm": 0.6341986060142517,
      "learning_rate": 1e-06,
      "loss": -0.0337,
      "num_tokens": 605760532.0,
      "reward": 0.625,
      "reward_std": 0.12325026839971542,
      "rewards/verify_math_reward/mean": 0.625,
      "rewards/verify_math_reward/std": 0.48439329862594604,
      "step": 1093
    },
    {
      "clip_ratio/high_max": 0.0017340375889034476,
      "clip_ratio/high_mean": 0.000649502487249265,
      "clip_ratio/low_mean": 0.00038692057205480523,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010364230583945755,
      "completions/clipped_ratio": 0.1439732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2260.0,
      "completions/mean_length": 1056.0457763671875,
      "completions/mean_terminated_length": 544.7626953125,
      "completions/min_length": 147.0,
      "completions/min_terminated_length": 147.0,
      "epoch": 10.223906705539358,
      "grad_norm": 0.4272231161594391,
      "learning_rate": 1e-06,
      "loss": -0.0425,
      "num_tokens": 606274949.0,
      "reward": 0.6149553656578064,
      "reward_std": 0.16096946597099304,
      "rewards/verify_math_reward/mean": 0.6149553656578064,
      "rewards/verify_math_reward/std": 0.4868776500225067,
      "step": 1094
    },
    {
      "clip_ratio/high_max": 0.002043367341684643,
      "clip_ratio/high_mean": 0.0007410595953842858,
      "clip_ratio/low_mean": 0.0004182979655524832,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011593575509323273,
      "completions/clipped_ratio": 0.1138392857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2527.0,
      "completions/mean_length": 938.2645263671875,
      "completions/mean_terminated_length": 532.61083984375,
      "completions/min_length": 170.0,
      "completions/min_terminated_length": 170.0,
      "epoch": 10.2332361516035,
      "grad_norm": 0.22444264590740204,
      "learning_rate": 1e-06,
      "loss": -0.0266,
      "num_tokens": 606781818.0,
      "reward": 0.6194196939468384,
      "reward_std": 0.14793068170547485,
      "rewards/verify_math_reward/mean": 0.6194196343421936,
      "rewards/verify_math_reward/std": 0.48580074310302734,
      "step": 1095
    },
    {
      "clip_ratio/high_max": 0.001567937772051664,
      "clip_ratio/high_mean": 0.0005112565959279891,
      "clip_ratio/low_mean": 0.0002891954236474703,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008004520204849541,
      "completions/clipped_ratio": 0.1439732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3562.0,
      "completions/mean_length": 1068.3638916015625,
      "completions/mean_terminated_length": 559.1525268554688,
      "completions/min_length": 150.0,
      "completions/min_terminated_length": 150.0,
      "epoch": 10.242565597667639,
      "grad_norm": 0.22511501610279083,
      "learning_rate": 1e-06,
      "loss": -0.0795,
      "num_tokens": 607301448.0,
      "reward": 0.65625,
      "reward_std": 0.12707918882369995,
      "rewards/verify_math_reward/mean": 0.65625,
      "rewards/verify_math_reward/std": 0.4752241373062134,
      "step": 1096
    },
    {
      "clip_ratio/high_max": 0.0018530640354583738,
      "clip_ratio/high_mean": 0.0007109063044481445,
      "clip_ratio/low_mean": 0.00029942098353785696,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001010327274343581,
      "completions/clipped_ratio": 0.1049107142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1970.0,
      "completions/mean_length": 876.0960083007812,
      "completions/mean_terminated_length": 498.7007751464844,
      "completions/min_length": 132.0,
      "completions/min_terminated_length": 132.0,
      "epoch": 10.251895043731778,
      "grad_norm": 0.2433689534664154,
      "learning_rate": 1e-06,
      "loss": -0.0549,
      "num_tokens": 607787886.0,
      "reward": 0.691964328289032,
      "reward_std": 0.12204564362764359,
      "rewards/verify_math_reward/mean": 0.6919642686843872,
      "rewards/verify_math_reward/std": 0.4619392454624176,
      "step": 1097
    },
    {
      "clip_ratio/high_max": 0.001672958787821699,
      "clip_ratio/high_mean": 0.0005305882577886223,
      "clip_ratio/low_mean": 0.000292459055344807,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008230473013099981,
      "completions/clipped_ratio": 0.1450892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2153.0,
      "completions/mean_length": 1066.1351318359375,
      "completions/mean_terminated_length": 551.92822265625,
      "completions/min_length": 155.0,
      "completions/min_terminated_length": 155.0,
      "epoch": 10.261224489795918,
      "grad_norm": 0.21952423453330994,
      "learning_rate": 1e-06,
      "loss": -0.0882,
      "num_tokens": 608296831.0,
      "reward": 0.6651785969734192,
      "reward_std": 0.12486424297094345,
      "rewards/verify_math_reward/mean": 0.6651785969734192,
      "rewards/verify_math_reward/std": 0.47219157218933105,
      "step": 1098
    },
    {
      "clip_ratio/high_max": 0.0017506926433270564,
      "clip_ratio/high_mean": 0.0005558587704399542,
      "clip_ratio/low_mean": 0.00022451321137850755,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007803719736330095,
      "completions/clipped_ratio": 0.1294642857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2918.0,
      "completions/mean_length": 973.5123291015625,
      "completions/mean_terminated_length": 509.142333984375,
      "completions/min_length": 178.0,
      "completions/min_terminated_length": 178.0,
      "epoch": 10.270553935860057,
      "grad_norm": 0.22734098136425018,
      "learning_rate": 1e-06,
      "loss": -0.0579,
      "num_tokens": 608787282.0,
      "reward": 0.6540178656578064,
      "reward_std": 0.13109326362609863,
      "rewards/verify_math_reward/mean": 0.6540178656578064,
      "rewards/verify_math_reward/std": 0.4759531021118164,
      "step": 1099
    },
    {
      "clip_ratio/high_max": 0.0020766484158230014,
      "clip_ratio/high_mean": 0.0008685218981554499,
      "clip_ratio/low_mean": 0.0005024680372116563,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0013709899249079172,
      "completions/clipped_ratio": 0.1294642857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3960.0,
      "completions/mean_length": 1012.6574096679688,
      "completions/mean_terminated_length": 554.1090087890625,
      "completions/min_length": 134.0,
      "completions/min_terminated_length": 134.0,
      "epoch": 10.279883381924199,
      "grad_norm": 0.32426655292510986,
      "learning_rate": 1e-06,
      "loss": -0.0795,
      "num_tokens": 609320943.0,
      "reward": 0.5915178656578064,
      "reward_std": 0.17569930851459503,
      "rewards/verify_math_reward/mean": 0.5915178656578064,
      "rewards/verify_math_reward/std": 0.49182769656181335,
      "step": 1100
    },
    {
      "clip_ratio/high_max": 0.0015093450565473177,
      "clip_ratio/high_mean": 0.0005851605619682232,
      "clip_ratio/low_mean": 0.00048782230987853836,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001072982864570804,
      "completions/clipped_ratio": 0.0904017857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3776.0,
      "completions/mean_length": 854.3370971679688,
      "completions/mean_terminated_length": 532.1594848632812,
      "completions/min_length": 142.0,
      "completions/min_terminated_length": 142.0,
      "epoch": 10.289212827988338,
      "grad_norm": 0.2743111848831177,
      "learning_rate": 1e-06,
      "loss": -0.0262,
      "num_tokens": 609844589.0,
      "reward": 0.6383928656578064,
      "reward_std": 0.12869539856910706,
      "rewards/verify_math_reward/mean": 0.6383928656578064,
      "rewards/verify_math_reward/std": 0.4807341992855072,
      "step": 1101
    },
    {
      "clip_ratio/high_max": 0.0015608931244059931,
      "clip_ratio/high_mean": 0.0006012780695527908,
      "clip_ratio/low_mean": 0.0002929804020368465,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008942584863689262,
      "completions/clipped_ratio": 0.1194196428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2797.0,
      "completions/mean_length": 967.1551513671875,
      "completions/mean_terminated_length": 542.8377685546875,
      "completions/min_length": 137.0,
      "completions/min_terminated_length": 137.0,
      "epoch": 10.298542274052478,
      "grad_norm": 0.28705301880836487,
      "learning_rate": 1e-06,
      "loss": -0.0553,
      "num_tokens": 610365536.0,
      "reward": 0.6506696939468384,
      "reward_std": 0.12377132475376129,
      "rewards/verify_math_reward/mean": 0.6506696343421936,
      "rewards/verify_math_reward/std": 0.47702476382255554,
      "step": 1102
    },
    {
      "clip_ratio/high_max": 0.0018560715179773979,
      "clip_ratio/high_mean": 0.0007170960343501065,
      "clip_ratio/low_mean": 0.0004387990820760024,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011558951082406566,
      "completions/clipped_ratio": 0.1294642857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3475.0,
      "completions/mean_length": 1000.857177734375,
      "completions/mean_terminated_length": 540.5538330078125,
      "completions/min_length": 168.0,
      "completions/min_terminated_length": 168.0,
      "epoch": 10.307871720116617,
      "grad_norm": 0.7247589230537415,
      "learning_rate": 1e-06,
      "loss": -0.0638,
      "num_tokens": 610880184.0,
      "reward": 0.6506696939468384,
      "reward_std": 0.15488240122795105,
      "rewards/verify_math_reward/mean": 0.6506696343421936,
      "rewards/verify_math_reward/std": 0.47702476382255554,
      "step": 1103
    },
    {
      "clip_ratio/high_max": 0.0018103200854966417,
      "clip_ratio/high_mean": 0.0005872240326425526,
      "clip_ratio/low_mean": 0.0003009614656548365,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000888185486473958,
      "completions/clipped_ratio": 0.1283482142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2950.0,
      "completions/mean_length": 1025.8070068359375,
      "completions/mean_terminated_length": 573.7298583984375,
      "completions/min_length": 151.0,
      "completions/min_terminated_length": 151.0,
      "epoch": 10.317201166180759,
      "grad_norm": 0.27747467160224915,
      "learning_rate": 1e-06,
      "loss": -0.0454,
      "num_tokens": 611412611.0,
      "reward": 0.6205357313156128,
      "reward_std": 0.11817465722560883,
      "rewards/verify_math_reward/mean": 0.6205357313156128,
      "rewards/verify_math_reward/std": 0.4855247139930725,
      "step": 1104
    },
    {
      "clip_ratio/high_max": 0.0020701095745607745,
      "clip_ratio/high_mean": 0.0007507752070523566,
      "clip_ratio/low_mean": 0.0003860654078380321,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011368406121619046,
      "completions/clipped_ratio": 0.1049107142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3113.0,
      "completions/mean_length": 925.513427734375,
      "completions/mean_terminated_length": 553.9102172851562,
      "completions/min_length": 147.0,
      "completions/min_terminated_length": 147.0,
      "epoch": 10.326530612244898,
      "grad_norm": 0.33168232440948486,
      "learning_rate": 1e-06,
      "loss": -0.0583,
      "num_tokens": 611941535.0,
      "reward": 0.65625,
      "reward_std": 0.16686922311782837,
      "rewards/verify_math_reward/mean": 0.65625,
      "rewards/verify_math_reward/std": 0.4752241373062134,
      "step": 1105
    },
    {
      "clip_ratio/high_max": 0.0017028767106239684,
      "clip_ratio/high_mean": 0.0007002319762250409,
      "clip_ratio/low_mean": 0.000340368744218722,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010406007277197205,
      "completions/clipped_ratio": 0.1205357142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3763.0,
      "completions/mean_length": 980.3839721679688,
      "completions/mean_terminated_length": 553.3705444335938,
      "completions/min_length": 119.0,
      "completions/min_terminated_length": 119.0,
      "epoch": 10.335860058309038,
      "grad_norm": 0.18526038527488708,
      "learning_rate": 1e-06,
      "loss": -0.0572,
      "num_tokens": 612462367.0,
      "reward": 0.625,
      "reward_std": 0.14011907577514648,
      "rewards/verify_math_reward/mean": 0.625,
      "rewards/verify_math_reward/std": 0.48439329862594604,
      "step": 1106
    },
    {
      "clip_ratio/high_max": 0.0014553765322489198,
      "clip_ratio/high_mean": 0.0005131906091264682,
      "clip_ratio/low_mean": 0.00034756784725686884,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000860758464114042,
      "completions/clipped_ratio": 0.0703125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3041.0,
      "completions/mean_length": 748.4185791015625,
      "completions/mean_terminated_length": 495.2400817871094,
      "completions/min_length": 154.0,
      "completions/min_terminated_length": 154.0,
      "epoch": 10.345189504373177,
      "grad_norm": 0.3814004957675934,
      "learning_rate": 1e-06,
      "loss": -0.0353,
      "num_tokens": 612961486.0,
      "reward": 0.6875000596046448,
      "reward_std": 0.11032137274742126,
      "rewards/verify_math_reward/mean": 0.6875,
      "rewards/verify_math_reward/std": 0.4637712836265564,
      "step": 1107
    },
    {
      "clip_ratio/high_max": 0.001951545764313778,
      "clip_ratio/high_mean": 0.0006905036000262044,
      "clip_ratio/low_mean": 0.0004273316717444686,
      "clip_ratio/low_min": 1.80792594619561e-05,
      "clip_ratio/region_mean": 0.0011178352779097622,
      "completions/clipped_ratio": 0.1227678571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2128.0,
      "completions/mean_length": 947.2701416015625,
      "completions/mean_terminated_length": 506.608154296875,
      "completions/min_length": 128.0,
      "completions/min_terminated_length": 128.0,
      "epoch": 10.354518950437317,
      "grad_norm": 3.8275399208068848,
      "learning_rate": 1e-06,
      "loss": -0.0547,
      "num_tokens": 613443592.0,
      "reward": 0.652901828289032,
      "reward_std": 0.1478540003299713,
      "rewards/verify_math_reward/mean": 0.6529017686843872,
      "rewards/verify_math_reward/std": 0.47631320357322693,
      "step": 1108
    },
    {
      "clip_ratio/high_max": 0.001640619975660229,
      "clip_ratio/high_mean": 0.0005905162179260515,
      "clip_ratio/low_mean": 0.00019823863942747266,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007887548681537737,
      "completions/clipped_ratio": 0.0926339285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2430.0,
      "completions/mean_length": 833.5670166015625,
      "completions/mean_terminated_length": 500.5018310546875,
      "completions/min_length": 130.0,
      "completions/min_terminated_length": 130.0,
      "epoch": 10.363848396501458,
      "grad_norm": 0.47877609729766846,
      "learning_rate": 1e-06,
      "loss": -0.0441,
      "num_tokens": 613937892.0,
      "reward": 0.7299107313156128,
      "reward_std": 0.09724828600883484,
      "rewards/verify_math_reward/mean": 0.7299107313156128,
      "rewards/verify_math_reward/std": 0.44425368309020996,
      "step": 1109
    },
    {
      "clip_ratio/high_max": 0.0015125883110158611,
      "clip_ratio/high_mean": 0.0005202195807214594,
      "clip_ratio/low_mean": 0.0003805459919021814,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009007655899040401,
      "completions/clipped_ratio": 0.1328125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4002.0,
      "completions/mean_length": 1018.919677734375,
      "completions/mean_terminated_length": 547.6550903320312,
      "completions/min_length": 173.0,
      "completions/min_terminated_length": 173.0,
      "epoch": 10.373177842565598,
      "grad_norm": 0.2592847943305969,
      "learning_rate": 1e-06,
      "loss": -0.0306,
      "num_tokens": 614448516.0,
      "reward": 0.6227678656578064,
      "reward_std": 0.12711350619792938,
      "rewards/verify_math_reward/mean": 0.6227678656578064,
      "rewards/verify_math_reward/std": 0.4849644601345062,
      "step": 1110
    },
    {
      "clip_ratio/high_max": 0.0018024676373897819,
      "clip_ratio/high_mean": 0.0006374157117079449,
      "clip_ratio/low_mean": 0.00029162323539821955,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009290389598390902,
      "completions/clipped_ratio": 0.1339285714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4093.0,
      "completions/mean_length": 1014.2031860351562,
      "completions/mean_terminated_length": 537.6365966796875,
      "completions/min_length": 130.0,
      "completions/min_terminated_length": 130.0,
      "epoch": 10.382507288629737,
      "grad_norm": 0.2130023092031479,
      "learning_rate": 1e-06,
      "loss": -0.0576,
      "num_tokens": 614939474.0,
      "reward": 0.6696428656578064,
      "reward_std": 0.12125540524721146,
      "rewards/verify_math_reward/mean": 0.6696428656578064,
      "rewards/verify_math_reward/std": 0.47060438990592957,
      "step": 1111
    },
    {
      "clip_ratio/high_max": 0.0017755914850567933,
      "clip_ratio/high_mean": 0.0006347213902699878,
      "clip_ratio/low_mean": 0.00036021375035488745,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000994935144262854,
      "completions/clipped_ratio": 0.1138392857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3961.0,
      "completions/mean_length": 930.060302734375,
      "completions/mean_terminated_length": 523.3526000976562,
      "completions/min_length": 166.0,
      "completions/min_terminated_length": 166.0,
      "epoch": 10.391836734693877,
      "grad_norm": 0.2003924697637558,
      "learning_rate": 1e-06,
      "loss": -0.0563,
      "num_tokens": 615440536.0,
      "reward": 0.7031250596046448,
      "reward_std": 0.12978127598762512,
      "rewards/verify_math_reward/mean": 0.703125,
      "rewards/verify_math_reward/std": 0.4571361541748047,
      "step": 1112
    },
    {
      "clip_ratio/high_max": 0.002548636020947015,
      "clip_ratio/high_mean": 0.000742576129596273,
      "clip_ratio/low_mean": 0.0003879735222653835,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011305496627755929,
      "completions/clipped_ratio": 0.1618303571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2619.0,
      "completions/mean_length": 1151.227783203125,
      "completions/mean_terminated_length": 582.6630859375,
      "completions/min_length": 114.0,
      "completions/min_terminated_length": 114.0,
      "epoch": 10.401166180758018,
      "grad_norm": 0.23142004013061523,
      "learning_rate": 1e-06,
      "loss": -0.0584,
      "num_tokens": 615977868.0,
      "reward": 0.5848214626312256,
      "reward_std": 0.12005076557397842,
      "rewards/verify_math_reward/mean": 0.5848214030265808,
      "rewards/verify_math_reward/std": 0.49302801489830017,
      "step": 1113
    },
    {
      "clip_ratio/high_max": 0.0020881131422356702,
      "clip_ratio/high_mean": 0.0007140845464164158,
      "clip_ratio/low_mean": 0.00042816682525881333,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011422513744037133,
      "completions/clipped_ratio": 0.1037946428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2029.0,
      "completions/mean_length": 878.8761596679688,
      "completions/mean_terminated_length": 506.2826843261719,
      "completions/min_length": 158.0,
      "completions/min_terminated_length": 158.0,
      "epoch": 10.410495626822158,
      "grad_norm": 0.25440171360969543,
      "learning_rate": 1e-06,
      "loss": -0.08,
      "num_tokens": 616478781.0,
      "reward": 0.691964328289032,
      "reward_std": 0.15477293729782104,
      "rewards/verify_math_reward/mean": 0.6919642686843872,
      "rewards/verify_math_reward/std": 0.4619392454624176,
      "step": 1114
    },
    {
      "clip_ratio/high_max": 0.0019092388392891735,
      "clip_ratio/high_mean": 0.0006588499018107541,
      "clip_ratio/low_mean": 0.0002634126562952588,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009222625667462125,
      "completions/clipped_ratio": 0.1183035714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3362.0,
      "completions/mean_length": 983.9922485351562,
      "completions/mean_terminated_length": 566.431640625,
      "completions/min_length": 130.0,
      "completions/min_terminated_length": 130.0,
      "epoch": 10.419825072886297,
      "grad_norm": 1.254032015800476,
      "learning_rate": 1e-06,
      "loss": -0.0607,
      "num_tokens": 617005558.0,
      "reward": 0.6718750596046448,
      "reward_std": 0.13696163892745972,
      "rewards/verify_math_reward/mean": 0.671875,
      "rewards/verify_math_reward/std": 0.46979284286499023,
      "step": 1115
    },
    {
      "clip_ratio/high_max": 0.0014197824675648008,
      "clip_ratio/high_mean": 0.0005624952509606373,
      "clip_ratio/low_mean": 0.0003861075886106846,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009486028357059695,
      "completions/clipped_ratio": 0.140625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2565.0,
      "completions/mean_length": 1041.786865234375,
      "completions/mean_terminated_length": 542.0064697265625,
      "completions/min_length": 136.0,
      "completions/min_terminated_length": 136.0,
      "epoch": 10.429154518950437,
      "grad_norm": 0.27578651905059814,
      "learning_rate": 1e-06,
      "loss": -0.0518,
      "num_tokens": 617515159.0,
      "reward": 0.6272321939468384,
      "reward_std": 0.13436834514141083,
      "rewards/verify_math_reward/mean": 0.6272321343421936,
      "rewards/verify_math_reward/std": 0.4838111698627472,
      "step": 1116
    },
    {
      "clip_ratio/high_max": 0.0018176795165345538,
      "clip_ratio/high_mean": 0.0006974294037718209,
      "clip_ratio/low_mean": 0.0004957339324391796,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011931633416679688,
      "completions/clipped_ratio": 0.0892857142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3993.0,
      "completions/mean_length": 860.3995971679688,
      "completions/mean_terminated_length": 543.183837890625,
      "completions/min_length": 129.0,
      "completions/min_terminated_length": 129.0,
      "epoch": 10.438483965014576,
      "grad_norm": 12.257213592529297,
      "learning_rate": 1e-06,
      "loss": -0.0364,
      "num_tokens": 618038285.0,
      "reward": 0.6852678656578064,
      "reward_std": 0.15059886872768402,
      "rewards/verify_math_reward/mean": 0.6852678656578064,
      "rewards/verify_math_reward/std": 0.46466848254203796,
      "step": 1117
    },
    {
      "clip_ratio/high_max": 0.0021207968675298616,
      "clip_ratio/high_mean": 0.0008221366988436785,
      "clip_ratio/low_mean": 0.0002894268523050414,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001111563527956605,
      "completions/clipped_ratio": 0.1361607142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2600.0,
      "completions/mean_length": 1019.6563110351562,
      "completions/mean_terminated_length": 534.7545166015625,
      "completions/min_length": 158.0,
      "completions/min_terminated_length": 158.0,
      "epoch": 10.447813411078718,
      "grad_norm": 0.24080021679401398,
      "learning_rate": 1e-06,
      "loss": -0.0816,
      "num_tokens": 618543457.0,
      "reward": 0.645089328289032,
      "reward_std": 0.14800554513931274,
      "rewards/verify_math_reward/mean": 0.6450892686843872,
      "rewards/verify_math_reward/std": 0.4787535071372986,
      "step": 1118
    },
    {
      "clip_ratio/high_max": 0.0016128017959999852,
      "clip_ratio/high_mean": 0.00048695453187974636,
      "clip_ratio/low_mean": 0.0004715990726253949,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009585536354279611,
      "completions/clipped_ratio": 0.1383928571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1929.0,
      "completions/mean_length": 1042.7734375,
      "completions/mean_terminated_length": 552.3587646484375,
      "completions/min_length": 144.0,
      "completions/min_terminated_length": 144.0,
      "epoch": 10.457142857142857,
      "grad_norm": 0.4047533869743347,
      "learning_rate": 1e-06,
      "loss": -0.0117,
      "num_tokens": 619063030.0,
      "reward": 0.5970982313156128,
      "reward_std": 0.11321737617254257,
      "rewards/verify_math_reward/mean": 0.5970982313156128,
      "rewards/verify_math_reward/std": 0.4907552897930145,
      "step": 1119
    },
    {
      "clip_ratio/high_max": 0.0019372772803762928,
      "clip_ratio/high_mean": 0.0008348385472345399,
      "clip_ratio/low_mean": 0.0002843746135567926,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011192131423740648,
      "completions/clipped_ratio": 0.1417410714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2557.0,
      "completions/mean_length": 1025.466552734375,
      "completions/mean_terminated_length": 518.3693237304688,
      "completions/min_length": 133.0,
      "completions/min_terminated_length": 133.0,
      "epoch": 10.466472303206997,
      "grad_norm": 0.33380305767059326,
      "learning_rate": 1e-06,
      "loss": -0.0881,
      "num_tokens": 619553032.0,
      "reward": 0.6640625,
      "reward_std": 0.16371390223503113,
      "rewards/verify_math_reward/mean": 0.6640625,
      "rewards/verify_math_reward/std": 0.4725809693336487,
      "step": 1120
    },
    {
      "clip_ratio/high_max": 0.0017307962280028732,
      "clip_ratio/high_mean": 0.000534700634489127,
      "clip_ratio/low_mean": 0.0005774096780442051,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011121103161713108,
      "completions/clipped_ratio": 0.1294642857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3031.0,
      "completions/mean_length": 1008.3438110351562,
      "completions/mean_terminated_length": 549.1538696289062,
      "completions/min_length": 142.0,
      "completions/min_terminated_length": 142.0,
      "epoch": 10.475801749271136,
      "grad_norm": 1.0805732011795044,
      "learning_rate": 1e-06,
      "loss": -0.0313,
      "num_tokens": 620072348.0,
      "reward": 0.606026828289032,
      "reward_std": 0.1387321949005127,
      "rewards/verify_math_reward/mean": 0.6060267686843872,
      "rewards/verify_math_reward/std": 0.48890194296836853,
      "step": 1121
    },
    {
      "clip_ratio/high_max": 0.0016814146165415877,
      "clip_ratio/high_mean": 0.0006143256759969518,
      "clip_ratio/low_mean": 0.0004442820040821971,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010586076587060234,
      "completions/clipped_ratio": 0.1071428571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1927.0,
      "completions/mean_length": 881.3426513671875,
      "completions/mean_terminated_length": 495.583740234375,
      "completions/min_length": 126.0,
      "completions/min_terminated_length": 126.0,
      "epoch": 10.485131195335278,
      "grad_norm": 1.1055032014846802,
      "learning_rate": 1e-06,
      "loss": -0.0291,
      "num_tokens": 620549447.0,
      "reward": 0.6774553656578064,
      "reward_std": 0.12392497807741165,
      "rewards/verify_math_reward/mean": 0.6774553656578064,
      "rewards/verify_math_reward/std": 0.4677111804485321,
      "step": 1122
    },
    {
      "clip_ratio/high_max": 0.0016898810972634237,
      "clip_ratio/high_mean": 0.0006642912476308993,
      "clip_ratio/low_mean": 0.00029380264049905236,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009580938731232891,
      "completions/clipped_ratio": 0.109375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3983.0,
      "completions/mean_length": 935.8672485351562,
      "completions/mean_terminated_length": 547.7807006835938,
      "completions/min_length": 155.0,
      "completions/min_terminated_length": 155.0,
      "epoch": 10.494460641399417,
      "grad_norm": 0.21529246866703033,
      "learning_rate": 1e-06,
      "loss": -0.0423,
      "num_tokens": 621070672.0,
      "reward": 0.645089328289032,
      "reward_std": 0.13260099291801453,
      "rewards/verify_math_reward/mean": 0.6450892686843872,
      "rewards/verify_math_reward/std": 0.4787535071372986,
      "step": 1123
    },
    {
      "clip_ratio/high_max": 0.0013960718279122375,
      "clip_ratio/high_mean": 0.000468820533114922,
      "clip_ratio/low_mean": 0.00036943050645277253,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008382510241062846,
      "completions/clipped_ratio": 0.1361607142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3974.0,
      "completions/mean_length": 1006.9620971679688,
      "completions/mean_terminated_length": 520.0594482421875,
      "completions/min_length": 140.0,
      "completions/min_terminated_length": 140.0,
      "epoch": 10.503790087463557,
      "grad_norm": 0.2957065999507904,
      "learning_rate": 1e-06,
      "loss": -0.0126,
      "num_tokens": 621559102.0,
      "reward": 0.6383928656578064,
      "reward_std": 0.1060783639550209,
      "rewards/verify_math_reward/mean": 0.6383928656578064,
      "rewards/verify_math_reward/std": 0.4807341694831848,
      "step": 1124
    },
    {
      "clip_ratio/high_max": 0.0018658112785487901,
      "clip_ratio/high_mean": 0.0006119315316936991,
      "clip_ratio/low_mean": 0.00037690872568418854,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009888402819342446,
      "completions/clipped_ratio": 0.1149553571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2599.0,
      "completions/mean_length": 899.810302734375,
      "completions/mean_terminated_length": 484.6683349609375,
      "completions/min_length": 127.0,
      "completions/min_terminated_length": 127.0,
      "epoch": 10.513119533527696,
      "grad_norm": 0.30778059363365173,
      "learning_rate": 1e-06,
      "loss": -0.057,
      "num_tokens": 622023668.0,
      "reward": 0.6662946939468384,
      "reward_std": 0.11937858164310455,
      "rewards/verify_math_reward/mean": 0.6662946343421936,
      "rewards/verify_math_reward/std": 0.47179925441741943,
      "step": 1125
    },
    {
      "clip_ratio/high_max": 0.0014257887396524893,
      "clip_ratio/high_mean": 0.00043771538548753597,
      "clip_ratio/low_mean": 0.00021784382488476695,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006555592099175556,
      "completions/clipped_ratio": 0.1205357142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2198.0,
      "completions/mean_length": 919.7980346679688,
      "completions/mean_terminated_length": 484.48095703125,
      "completions/min_length": 129.0,
      "completions/min_terminated_length": 129.0,
      "epoch": 10.522448979591836,
      "grad_norm": 0.21127763390541077,
      "learning_rate": 1e-06,
      "loss": -0.0179,
      "num_tokens": 622511223.0,
      "reward": 0.684151828289032,
      "reward_std": 0.10167493671178818,
      "rewards/verify_math_reward/mean": 0.6841517686843872,
      "rewards/verify_math_reward/std": 0.4651124179363251,
      "step": 1126
    },
    {
      "clip_ratio/high_max": 0.0018633555264386814,
      "clip_ratio/high_mean": 0.0006138461822047248,
      "clip_ratio/low_mean": 0.0003756702421924274,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009895164184854366,
      "completions/clipped_ratio": 0.1261160714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1989.0,
      "completions/mean_length": 948.1585083007812,
      "completions/mean_terminated_length": 493.8722839355469,
      "completions/min_length": 117.0,
      "completions/min_terminated_length": 117.0,
      "epoch": 10.531778425655977,
      "grad_norm": 0.8563103079795837,
      "learning_rate": 1e-06,
      "loss": -0.0495,
      "num_tokens": 622989765.0,
      "reward": 0.6908482313156128,
      "reward_std": 0.11843496561050415,
      "rewards/verify_math_reward/mean": 0.6908482313156128,
      "rewards/verify_math_reward/std": 0.46240198612213135,
      "step": 1127
    },
    {
      "clip_ratio/high_max": 0.0018796679833030794,
      "clip_ratio/high_mean": 0.0007108889294613618,
      "clip_ratio/low_mean": 0.00045899574706709245,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011698846719809808,
      "completions/clipped_ratio": 0.1183035714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3387.0,
      "completions/mean_length": 919.7734985351562,
      "completions/mean_terminated_length": 493.59619140625,
      "completions/min_length": 155.0,
      "completions/min_terminated_length": 155.0,
      "epoch": 10.541107871720117,
      "grad_norm": 0.2738456130027771,
      "learning_rate": 1e-06,
      "loss": -0.0508,
      "num_tokens": 623481194.0,
      "reward": 0.6506696939468384,
      "reward_std": 0.1384280025959015,
      "rewards/verify_math_reward/mean": 0.6506696343421936,
      "rewards/verify_math_reward/std": 0.47702476382255554,
      "step": 1128
    },
    {
      "clip_ratio/high_max": 0.00207661838066997,
      "clip_ratio/high_mean": 0.0006093209012760781,
      "clip_ratio/low_mean": 0.0004467806620596093,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010561015842540655,
      "completions/clipped_ratio": 0.1171875,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3407.0,
      "completions/mean_length": 917.294677734375,
      "completions/mean_terminated_length": 495.3426208496094,
      "completions/min_length": 137.0,
      "completions/min_terminated_length": 137.0,
      "epoch": 10.550437317784256,
      "grad_norm": 0.21665704250335693,
      "learning_rate": 1e-06,
      "loss": -0.0468,
      "num_tokens": 623961794.0,
      "reward": 0.6316964626312256,
      "reward_std": 0.12328347563743591,
      "rewards/verify_math_reward/mean": 0.6316964030265808,
      "rewards/verify_math_reward/std": 0.4826137125492096,
      "step": 1129
    },
    {
      "clip_ratio/high_max": 0.001891064501251094,
      "clip_ratio/high_mean": 0.0007400507220154395,
      "clip_ratio/low_mean": 0.0003224086344744137,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010624593396642013,
      "completions/clipped_ratio": 0.0970982142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3923.0,
      "completions/mean_length": 892.1250610351562,
      "completions/mean_terminated_length": 547.5797119140625,
      "completions/min_length": 150.0,
      "completions/min_terminated_length": 150.0,
      "epoch": 10.559766763848396,
      "grad_norm": 0.3111117482185364,
      "learning_rate": 1e-06,
      "loss": -0.0549,
      "num_tokens": 624491698.0,
      "reward": 0.7120535969734192,
      "reward_std": 0.13665854930877686,
      "rewards/verify_math_reward/mean": 0.7120535969734192,
      "rewards/verify_math_reward/std": 0.4530589282512665,
      "step": 1130
    },
    {
      "clip_ratio/high_max": 0.0017117916722781956,
      "clip_ratio/high_mean": 0.0006911886903253617,
      "clip_ratio/low_mean": 0.00034528584274085006,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010364745103288442,
      "completions/clipped_ratio": 0.0892857142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1861.0,
      "completions/mean_length": 815.4642944335938,
      "completions/mean_terminated_length": 493.8431396484375,
      "completions/min_length": 144.0,
      "completions/min_terminated_length": 144.0,
      "epoch": 10.569096209912537,
      "grad_norm": 0.38041985034942627,
      "learning_rate": 1e-06,
      "loss": -0.0459,
      "num_tokens": 624975138.0,
      "reward": 0.6774553656578064,
      "reward_std": 0.12974987924098969,
      "rewards/verify_math_reward/mean": 0.6774553656578064,
      "rewards/verify_math_reward/std": 0.4677111804485321,
      "step": 1131
    },
    {
      "clip_ratio/high_max": 0.002079599682474509,
      "clip_ratio/high_mean": 0.000777964811277343,
      "clip_ratio/low_mean": 0.0005423197117124801,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0013202844784245826,
      "completions/clipped_ratio": 0.1741071428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3252.0,
      "completions/mean_length": 1179.4320068359375,
      "completions/mean_terminated_length": 564.5878295898438,
      "completions/min_length": 156.0,
      "completions/min_terminated_length": 156.0,
      "epoch": 10.578425655976677,
      "grad_norm": 0.23912523686885834,
      "learning_rate": 1e-06,
      "loss": -0.1093,
      "num_tokens": 625486781.0,
      "reward": 0.598214328289032,
      "reward_std": 0.17626453936100006,
      "rewards/verify_math_reward/mean": 0.5982142686843872,
      "rewards/verify_math_reward/std": 0.49053287506103516,
      "step": 1132
    },
    {
      "clip_ratio/high_max": 0.0020934834028594196,
      "clip_ratio/high_mean": 0.0007495282061427133,
      "clip_ratio/low_mean": 0.0004241990473019541,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011737272616301198,
      "completions/clipped_ratio": 0.1127232142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4081.0,
      "completions/mean_length": 943.2902221679688,
      "completions/mean_terminated_length": 542.7572021484375,
      "completions/min_length": 114.0,
      "completions/min_terminated_length": 114.0,
      "epoch": 10.587755102040816,
      "grad_norm": 0.2606118321418762,
      "learning_rate": 1e-06,
      "loss": -0.0624,
      "num_tokens": 626017193.0,
      "reward": 0.6573660969734192,
      "reward_std": 0.14613012969493866,
      "rewards/verify_math_reward/mean": 0.6573660969734192,
      "rewards/verify_math_reward/std": 0.47485533356666565,
      "step": 1133
    },
    {
      "clip_ratio/high_max": 0.0017134799454652239,
      "clip_ratio/high_mean": 0.0006363854117807932,
      "clip_ratio/low_mean": 0.0005908216662646737,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012272070634935517,
      "completions/clipped_ratio": 0.1171875,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2954.0,
      "completions/mean_length": 952.458740234375,
      "completions/mean_terminated_length": 535.1744995117188,
      "completions/min_length": 142.0,
      "completions/min_terminated_length": 142.0,
      "epoch": 10.597084548104956,
      "grad_norm": 0.26522207260131836,
      "learning_rate": 1e-06,
      "loss": -0.0445,
      "num_tokens": 626530348.0,
      "reward": 0.6316964626312256,
      "reward_std": 0.14245164394378662,
      "rewards/verify_math_reward/mean": 0.6316964030265808,
      "rewards/verify_math_reward/std": 0.4826137125492096,
      "step": 1134
    },
    {
      "clip_ratio/high_max": 0.002229847199487267,
      "clip_ratio/high_mean": 0.0007801658339303685,
      "clip_ratio/low_mean": 0.0003450735118804005,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011252393596805632,
      "completions/clipped_ratio": 0.1160714285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3440.0,
      "completions/mean_length": 926.1785888671875,
      "completions/mean_terminated_length": 509.93939208984375,
      "completions/min_length": 120.0,
      "completions/min_terminated_length": 120.0,
      "epoch": 10.606413994169095,
      "grad_norm": 0.4457687735557556,
      "learning_rate": 1e-06,
      "loss": -0.0384,
      "num_tokens": 627019428.0,
      "reward": 0.6785714626312256,
      "reward_std": 0.1283590942621231,
      "rewards/verify_math_reward/mean": 0.6785714030265808,
      "rewards/verify_math_reward/std": 0.46728572249412537,
      "step": 1135
    },
    {
      "clip_ratio/high_max": 0.0015379057549580466,
      "clip_ratio/high_mean": 0.0005165713519090787,
      "clip_ratio/low_mean": 0.00024278755722662027,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007593589161842829,
      "completions/clipped_ratio": 0.1339285714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3915.0,
      "completions/mean_length": 996.0379638671875,
      "completions/mean_terminated_length": 516.662353515625,
      "completions/min_length": 79.0,
      "completions/min_terminated_length": 79.0,
      "epoch": 10.615743440233237,
      "grad_norm": 0.22791990637779236,
      "learning_rate": 1e-06,
      "loss": -0.0556,
      "num_tokens": 627508086.0,
      "reward": 0.6662946939468384,
      "reward_std": 0.10994119197130203,
      "rewards/verify_math_reward/mean": 0.6662946343421936,
      "rewards/verify_math_reward/std": 0.47179925441741943,
      "step": 1136
    },
    {
      "clip_ratio/high_max": 0.001585584872373147,
      "clip_ratio/high_mean": 0.0004915774989058264,
      "clip_ratio/low_mean": 0.00035343909712537425,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008450166023976635,
      "completions/clipped_ratio": 0.1294642857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2700.0,
      "completions/mean_length": 968.2969360351562,
      "completions/mean_terminated_length": 503.15130615234375,
      "completions/min_length": 124.0,
      "completions/min_terminated_length": 124.0,
      "epoch": 10.625072886297376,
      "grad_norm": 0.23939189314842224,
      "learning_rate": 1e-06,
      "loss": -0.0592,
      "num_tokens": 627997344.0,
      "reward": 0.6495535969734192,
      "reward_std": 0.12399844080209732,
      "rewards/verify_math_reward/mean": 0.6495535969734192,
      "rewards/verify_math_reward/std": 0.477376252412796,
      "step": 1137
    },
    {
      "clip_ratio/high_max": 0.001792915580153931,
      "clip_ratio/high_mean": 0.0005436054998426698,
      "clip_ratio/low_mean": 0.0002602719532660558,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008038774612941779,
      "completions/clipped_ratio": 0.1104910714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3635.0,
      "completions/mean_length": 912.0770263671875,
      "completions/mean_terminated_length": 516.5834350585938,
      "completions/min_length": 89.0,
      "completions/min_terminated_length": 89.0,
      "epoch": 10.634402332361516,
      "grad_norm": 0.2562861442565918,
      "learning_rate": 1e-06,
      "loss": -0.0208,
      "num_tokens": 628500997.0,
      "reward": 0.7008928656578064,
      "reward_std": 0.10750053822994232,
      "rewards/verify_math_reward/mean": 0.7008928656578064,
      "rewards/verify_math_reward/std": 0.458122581243515,
      "step": 1138
    },
    {
      "clip_ratio/high_max": 0.0023553940845886245,
      "clip_ratio/high_mean": 0.0008548100431653438,
      "clip_ratio/low_mean": 0.0003927026818928425,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012475127550715115,
      "completions/clipped_ratio": 0.0926339285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1756.0,
      "completions/mean_length": 839.8638916015625,
      "completions/mean_terminated_length": 507.4415588378906,
      "completions/min_length": 127.0,
      "completions/min_terminated_length": 127.0,
      "epoch": 10.643731778425655,
      "grad_norm": 0.3247152864933014,
      "learning_rate": 1e-06,
      "loss": -0.0415,
      "num_tokens": 628990683.0,
      "reward": 0.6875000596046448,
      "reward_std": 0.14902332425117493,
      "rewards/verify_math_reward/mean": 0.6875,
      "rewards/verify_math_reward/std": 0.4637712836265564,
      "step": 1139
    },
    {
      "clip_ratio/high_max": 0.0014639936889579985,
      "clip_ratio/high_mean": 0.0005834889207108063,
      "clip_ratio/low_mean": 0.0005985020097796223,
      "clip_ratio/low_min": 2.5578065105946735e-05,
      "clip_ratio/region_mean": 0.0011819909159385134,
      "completions/clipped_ratio": 0.1183035714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4078.0,
      "completions/mean_length": 987.2277221679688,
      "completions/mean_terminated_length": 570.1012573242188,
      "completions/min_length": 135.0,
      "completions/min_terminated_length": 135.0,
      "epoch": 10.653061224489797,
      "grad_norm": 0.26748228073120117,
      "learning_rate": 1e-06,
      "loss": -0.022,
      "num_tokens": 629539639.0,
      "reward": 0.5837053656578064,
      "reward_std": 0.14725808799266815,
      "rewards/verify_math_reward/mean": 0.5837053656578064,
      "rewards/verify_math_reward/std": 0.49321892857551575,
      "step": 1140
    },
    {
      "clip_ratio/high_max": 0.00182073871474131,
      "clip_ratio/high_mean": 0.0006503117965621641,
      "clip_ratio/low_mean": 0.0003231200644222554,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009734318555274513,
      "completions/clipped_ratio": 0.1294642857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2689.0,
      "completions/mean_length": 1005.7422485351562,
      "completions/mean_terminated_length": 546.1654052734375,
      "completions/min_length": 143.0,
      "completions/min_terminated_length": 143.0,
      "epoch": 10.662390670553936,
      "grad_norm": 0.3622390329837799,
      "learning_rate": 1e-06,
      "loss": -0.0625,
      "num_tokens": 630049584.0,
      "reward": 0.6350446939468384,
      "reward_std": 0.14733223617076874,
      "rewards/verify_math_reward/mean": 0.6350446343421936,
      "rewards/verify_math_reward/std": 0.481686532497406,
      "step": 1141
    },
    {
      "clip_ratio/high_max": 0.0017305209694313817,
      "clip_ratio/high_mean": 0.0006389486898115138,
      "clip_ratio/low_mean": 0.00033793572038121056,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000976884417468682,
      "completions/clipped_ratio": 0.1149553571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3463.0,
      "completions/mean_length": 956.1395263671875,
      "completions/mean_terminated_length": 548.3140258789062,
      "completions/min_length": 138.0,
      "completions/min_terminated_length": 138.0,
      "epoch": 10.671720116618076,
      "grad_norm": 0.20694097876548767,
      "learning_rate": 1e-06,
      "loss": -0.0548,
      "num_tokens": 630578989.0,
      "reward": 0.707589328289032,
      "reward_std": 0.12249323725700378,
      "rewards/verify_math_reward/mean": 0.7075892686843872,
      "rewards/verify_math_reward/std": 0.45512402057647705,
      "step": 1142
    },
    {
      "clip_ratio/high_max": 0.0018354836392973084,
      "clip_ratio/high_mean": 0.0006199017298058607,
      "clip_ratio/low_mean": 0.0003048370072065154,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009247387133655138,
      "completions/clipped_ratio": 0.1149553571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3703.0,
      "completions/mean_length": 907.5435791015625,
      "completions/mean_terminated_length": 493.40606689453125,
      "completions/min_length": 144.0,
      "completions/min_terminated_length": 144.0,
      "epoch": 10.681049562682215,
      "grad_norm": 0.36741942167282104,
      "learning_rate": 1e-06,
      "loss": -0.0377,
      "num_tokens": 631069132.0,
      "reward": 0.6785714626312256,
      "reward_std": 0.12140624225139618,
      "rewards/verify_math_reward/mean": 0.6785714030265808,
      "rewards/verify_math_reward/std": 0.46728572249412537,
      "step": 1143
    },
    {
      "clip_ratio/high_max": 0.0018875606656365562,
      "clip_ratio/high_mean": 0.0007068097020237474,
      "clip_ratio/low_mean": 0.00036111112331127515,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.00106792084807239,
      "completions/clipped_ratio": 0.1037946428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2339.0,
      "completions/mean_length": 892.716552734375,
      "completions/mean_terminated_length": 521.7260131835938,
      "completions/min_length": 145.0,
      "completions/min_terminated_length": 145.0,
      "epoch": 10.690379008746355,
      "grad_norm": 0.2807365655899048,
      "learning_rate": 1e-06,
      "loss": -0.0443,
      "num_tokens": 631577734.0,
      "reward": 0.6261160969734192,
      "reward_std": 0.13707111775875092,
      "rewards/verify_math_reward/mean": 0.6261160969734192,
      "rewards/verify_math_reward/std": 0.48410359025001526,
      "step": 1144
    },
    {
      "clip_ratio/high_max": 0.0013653917631017976,
      "clip_ratio/high_mean": 0.000463729195871565,
      "clip_ratio/low_mean": 0.00046903318366275926,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009327623738499824,
      "completions/clipped_ratio": 0.1060267857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2463.0,
      "completions/mean_length": 902.8672485351562,
      "completions/mean_terminated_length": 524.1560668945312,
      "completions/min_length": 144.0,
      "completions/min_terminated_length": 144.0,
      "epoch": 10.699708454810496,
      "grad_norm": 0.24377836287021637,
      "learning_rate": 1e-06,
      "loss": -0.034,
      "num_tokens": 632086639.0,
      "reward": 0.625,
      "reward_std": 0.13203826546669006,
      "rewards/verify_math_reward/mean": 0.625,
      "rewards/verify_math_reward/std": 0.48439329862594604,
      "step": 1145
    },
    {
      "clip_ratio/high_max": 0.001926941087731393,
      "clip_ratio/high_mean": 0.0005894028254260775,
      "clip_ratio/low_mean": 0.0005284619023768755,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011178647091583116,
      "completions/clipped_ratio": 0.0703125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3093.0,
      "completions/mean_length": 774.7767944335938,
      "completions/mean_terminated_length": 523.591796875,
      "completions/min_length": 140.0,
      "completions/min_terminated_length": 140.0,
      "epoch": 10.709037900874636,
      "grad_norm": 0.23393908143043518,
      "learning_rate": 1e-06,
      "loss": -0.0268,
      "num_tokens": 632604487.0,
      "reward": 0.6819196939468384,
      "reward_std": 0.12936869263648987,
      "rewards/verify_math_reward/mean": 0.6819196343421936,
      "rewards/verify_math_reward/std": 0.46599099040031433,
      "step": 1146
    },
    {
      "clip_ratio/high_max": 0.0017625708496780135,
      "clip_ratio/high_mean": 0.0006240743450689479,
      "clip_ratio/low_mean": 0.0003535435023422906,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009776178321772022,
      "completions/clipped_ratio": 0.1049107142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3627.0,
      "completions/mean_length": 903.35498046875,
      "completions/mean_terminated_length": 529.1546020507812,
      "completions/min_length": 142.0,
      "completions/min_terminated_length": 142.0,
      "epoch": 10.718367346938775,
      "grad_norm": 0.35867175459861755,
      "learning_rate": 1e-06,
      "loss": -0.0495,
      "num_tokens": 633118549.0,
      "reward": 0.6897321939468384,
      "reward_std": 0.11945345252752304,
      "rewards/verify_math_reward/mean": 0.6897321343421936,
      "rewards/verify_math_reward/std": 0.4628615975379944,
      "step": 1147
    },
    {
      "clip_ratio/high_max": 0.0016776008433225797,
      "clip_ratio/high_mean": 0.0005710745863325428,
      "clip_ratio/low_mean": 0.000358833816335391,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009299084103986388,
      "completions/clipped_ratio": 0.1417410714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2322.0,
      "completions/mean_length": 1028.234375,
      "completions/mean_terminated_length": 521.5942993164062,
      "completions/min_length": 134.0,
      "completions/min_terminated_length": 134.0,
      "epoch": 10.727696793002915,
      "grad_norm": 0.2209133505821228,
      "learning_rate": 1e-06,
      "loss": -0.0518,
      "num_tokens": 633605343.0,
      "reward": 0.640625,
      "reward_std": 0.1312875896692276,
      "rewards/verify_math_reward/mean": 0.640625,
      "rewards/verify_math_reward/std": 0.48008525371551514,
      "step": 1148
    },
    {
      "clip_ratio/high_max": 0.0016966796101769432,
      "clip_ratio/high_mean": 0.0006046265898476122,
      "clip_ratio/low_mean": 0.000288975000330538,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008936015910876449,
      "completions/clipped_ratio": 0.0970982142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3048.0,
      "completions/mean_length": 908.4063110351562,
      "completions/mean_terminated_length": 565.6118774414062,
      "completions/min_length": 137.0,
      "completions/min_terminated_length": 137.0,
      "epoch": 10.737026239067056,
      "grad_norm": 0.2552914619445801,
      "learning_rate": 1e-06,
      "loss": -0.054,
      "num_tokens": 634148451.0,
      "reward": 0.6796875596046448,
      "reward_std": 0.13193020224571228,
      "rewards/verify_math_reward/mean": 0.6796875,
      "rewards/verify_math_reward/std": 0.4668572247028351,
      "step": 1149
    },
    {
      "clip_ratio/high_max": 0.0018121568609785754,
      "clip_ratio/high_mean": 0.0006204616511240602,
      "clip_ratio/low_mean": 0.00036168388578516897,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000982145549642155,
      "completions/clipped_ratio": 0.1071428571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3298.0,
      "completions/mean_length": 921.3381958007812,
      "completions/mean_terminated_length": 540.3787231445312,
      "completions/min_length": 157.0,
      "completions/min_terminated_length": 157.0,
      "epoch": 10.746355685131196,
      "grad_norm": 0.28461557626724243,
      "learning_rate": 1e-06,
      "loss": -0.0429,
      "num_tokens": 634672034.0,
      "reward": 0.6361607313156128,
      "reward_std": 0.15477433800697327,
      "rewards/verify_math_reward/mean": 0.6361607313156128,
      "rewards/verify_math_reward/std": 0.4813718795776367,
      "step": 1150
    },
    {
      "clip_ratio/high_max": 0.0015115136702661403,
      "clip_ratio/high_mean": 0.00047510640524706105,
      "clip_ratio/low_mean": 0.0003698664345392899,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008449728629784659,
      "completions/clipped_ratio": 0.1216517857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3388.0,
      "completions/mean_length": 985.1295166015625,
      "completions/mean_terminated_length": 554.2719116210938,
      "completions/min_length": 163.0,
      "completions/min_terminated_length": 163.0,
      "epoch": 10.755685131195335,
      "grad_norm": 0.2832811176776886,
      "learning_rate": 1e-06,
      "loss": -0.0531,
      "num_tokens": 635198766.0,
      "reward": 0.5814732313156128,
      "reward_std": 0.13583439588546753,
      "rewards/verify_math_reward/mean": 0.5814732313156128,
      "rewards/verify_math_reward/std": 0.4935930073261261,
      "step": 1151
    },
    {
      "clip_ratio/high_max": 0.0017652543720032554,
      "clip_ratio/high_mean": 0.0006981578808336053,
      "clip_ratio/low_mean": 0.00021140358376214863,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009095614623220172,
      "completions/clipped_ratio": 0.1383928571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2570.0,
      "completions/mean_length": 1023.5714721679688,
      "completions/mean_terminated_length": 530.072509765625,
      "completions/min_length": 144.0,
      "completions/min_terminated_length": 144.0,
      "epoch": 10.765014577259475,
      "grad_norm": 0.2548813819885254,
      "learning_rate": 1e-06,
      "loss": -0.0865,
      "num_tokens": 635701902.0,
      "reward": 0.6417410969734192,
      "reward_std": 0.1361374855041504,
      "rewards/verify_math_reward/mean": 0.6417410969734192,
      "rewards/verify_math_reward/std": 0.47975656390190125,
      "step": 1152
    },
    {
      "clip_ratio/high_max": 0.0015753203006170224,
      "clip_ratio/high_mean": 0.000633159390417859,
      "clip_ratio/low_mean": 0.0003180185126439028,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009511779135209508,
      "completions/clipped_ratio": 0.1305803571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2231.0,
      "completions/mean_length": 973.185302734375,
      "completions/mean_terminated_length": 504.1617736816406,
      "completions/min_length": 139.0,
      "completions/min_terminated_length": 139.0,
      "epoch": 10.774344023323614,
      "grad_norm": 0.32098862528800964,
      "learning_rate": 1e-06,
      "loss": -0.078,
      "num_tokens": 636186060.0,
      "reward": 0.6785714626312256,
      "reward_std": 0.1409093141555786,
      "rewards/verify_math_reward/mean": 0.6785714030265808,
      "rewards/verify_math_reward/std": 0.46728572249412537,
      "step": 1153
    },
    {
      "clip_ratio/high_max": 0.0021992778820276726,
      "clip_ratio/high_mean": 0.0007671369257877814,
      "clip_ratio/low_mean": 0.0003616119411162799,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011287488487141673,
      "completions/clipped_ratio": 0.1015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3448.0,
      "completions/mean_length": 890.9029541015625,
      "completions/mean_terminated_length": 528.5875854492188,
      "completions/min_length": 164.0,
      "completions/min_terminated_length": 164.0,
      "epoch": 10.783673469387756,
      "grad_norm": 0.22918209433555603,
      "learning_rate": 1e-06,
      "loss": -0.0489,
      "num_tokens": 636699973.0,
      "reward": 0.6718750596046448,
      "reward_std": 0.146052747964859,
      "rewards/verify_math_reward/mean": 0.671875,
      "rewards/verify_math_reward/std": 0.46979284286499023,
      "step": 1154
    },
    {
      "clip_ratio/high_max": 0.001795498130377382,
      "clip_ratio/high_mean": 0.0006084727829147596,
      "clip_ratio/low_mean": 0.00030649165887552954,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009149644356511999,
      "completions/clipped_ratio": 0.1060267857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3645.0,
      "completions/mean_length": 924.3605346679688,
      "completions/mean_terminated_length": 548.198486328125,
      "completions/min_length": 155.0,
      "completions/min_terminated_length": 155.0,
      "epoch": 10.793002915451895,
      "grad_norm": 0.2120915800333023,
      "learning_rate": 1e-06,
      "loss": -0.0388,
      "num_tokens": 637226384.0,
      "reward": 0.6037946939468384,
      "reward_std": 0.12941217422485352,
      "rewards/verify_math_reward/mean": 0.6037946343421936,
      "rewards/verify_math_reward/std": 0.48938122391700745,
      "step": 1155
    },
    {
      "clip_ratio/high_max": 0.0018260637007188052,
      "clip_ratio/high_mean": 0.0006302491547103273,
      "clip_ratio/low_mean": 0.000405974820296251,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010362239736423362,
      "completions/clipped_ratio": 0.0993303571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3427.0,
      "completions/mean_length": 898.4107666015625,
      "completions/mean_terminated_length": 545.7645874023438,
      "completions/min_length": 125.0,
      "completions/min_terminated_length": 125.0,
      "epoch": 10.802332361516035,
      "grad_norm": 0.2945021688938141,
      "learning_rate": 1e-06,
      "loss": -0.0358,
      "num_tokens": 637757680.0,
      "reward": 0.6439732313156128,
      "reward_std": 0.1316271275281906,
      "rewards/verify_math_reward/mean": 0.6439732313156128,
      "rewards/verify_math_reward/std": 0.47909072041511536,
      "step": 1156
    },
    {
      "clip_ratio/high_max": 0.0019284265108581167,
      "clip_ratio/high_mean": 0.0007179477197496453,
      "clip_ratio/low_mean": 0.0004558523833111394,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011738000976038165,
      "completions/clipped_ratio": 0.1127232142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2167.0,
      "completions/mean_length": 926.93310546875,
      "completions/mean_terminated_length": 524.322021484375,
      "completions/min_length": 156.0,
      "completions/min_terminated_length": 156.0,
      "epoch": 10.811661807580174,
      "grad_norm": 0.3774220049381256,
      "learning_rate": 1e-06,
      "loss": -0.0378,
      "num_tokens": 638273036.0,
      "reward": 0.6305803656578064,
      "reward_std": 0.14763645827770233,
      "rewards/verify_math_reward/mean": 0.6305803656578064,
      "rewards/verify_math_reward/std": 0.4829172194004059,
      "step": 1157
    },
    {
      "clip_ratio/high_max": 0.001877379123470746,
      "clip_ratio/high_mean": 0.0005780792616860708,
      "clip_ratio/low_mean": 0.00041741555151020293,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000995494829112431,
      "completions/clipped_ratio": 0.1194196428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2687.0,
      "completions/mean_length": 941.0045166015625,
      "completions/mean_terminated_length": 513.1406860351562,
      "completions/min_length": 118.0,
      "completions/min_terminated_length": 118.0,
      "epoch": 10.820991253644316,
      "grad_norm": 0.25385749340057373,
      "learning_rate": 1e-06,
      "loss": -0.0305,
      "num_tokens": 638767688.0,
      "reward": 0.6517857313156128,
      "reward_std": 0.12471451610326767,
      "rewards/verify_math_reward/mean": 0.6517857313156128,
      "rewards/verify_math_reward/std": 0.47667038440704346,
      "step": 1158
    },
    {
      "clip_ratio/high_max": 0.0018527543907111976,
      "clip_ratio/high_mean": 0.000647066875899327,
      "clip_ratio/low_mean": 0.00025293010685345507,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008999970013974234,
      "completions/clipped_ratio": 0.0859375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3651.0,
      "completions/mean_length": 799.7924194335938,
      "completions/mean_terminated_length": 489.892578125,
      "completions/min_length": 142.0,
      "completions/min_terminated_length": 142.0,
      "epoch": 10.830320699708455,
      "grad_norm": 0.6219267845153809,
      "learning_rate": 1e-06,
      "loss": -0.0614,
      "num_tokens": 639254062.0,
      "reward": 0.7299107313156128,
      "reward_std": 0.11960569024085999,
      "rewards/verify_math_reward/mean": 0.7299107313156128,
      "rewards/verify_math_reward/std": 0.44425368309020996,
      "step": 1159
    },
    {
      "clip_ratio/high_max": 0.0020307375816628337,
      "clip_ratio/high_mean": 0.0007411050537484698,
      "clip_ratio/low_mean": 0.00042177719205938047,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011628822358034085,
      "completions/clipped_ratio": 0.1294642857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3510.0,
      "completions/mean_length": 996.0636596679688,
      "completions/mean_terminated_length": 535.0474243164062,
      "completions/min_length": 148.0,
      "completions/min_terminated_length": 148.0,
      "epoch": 10.839650145772595,
      "grad_norm": 0.2584342360496521,
      "learning_rate": 1e-06,
      "loss": -0.0712,
      "num_tokens": 639765207.0,
      "reward": 0.5948660969734192,
      "reward_std": 0.16175970435142517,
      "rewards/verify_math_reward/mean": 0.5948660969734192,
      "rewards/verify_math_reward/std": 0.49119213223457336,
      "step": 1160
    },
    {
      "clip_ratio/high_max": 0.0018753458425635472,
      "clip_ratio/high_mean": 0.0007418537570629269,
      "clip_ratio/low_mean": 0.0005814146361444728,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0013232683886599261,
      "completions/clipped_ratio": 0.0892857142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3906.0,
      "completions/mean_length": 835.4832763671875,
      "completions/mean_terminated_length": 515.8247680664062,
      "completions/min_length": 121.0,
      "completions/min_terminated_length": 121.0,
      "epoch": 10.848979591836734,
      "grad_norm": 0.31656092405319214,
      "learning_rate": 1e-06,
      "loss": -0.0306,
      "num_tokens": 640268168.0,
      "reward": 0.6897321939468384,
      "reward_std": 0.16123723983764648,
      "rewards/verify_math_reward/mean": 0.6897321343421936,
      "rewards/verify_math_reward/std": 0.4628615975379944,
      "step": 1161
    },
    {
      "clip_ratio/high_max": 0.002378771241637878,
      "clip_ratio/high_mean": 0.0009034149879880715,
      "clip_ratio/low_mean": 0.0004387713147480099,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0013421862604445778,
      "completions/clipped_ratio": 0.1015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3957.0,
      "completions/mean_length": 907.7422485351562,
      "completions/mean_terminated_length": 547.3304443359375,
      "completions/min_length": 178.0,
      "completions/min_terminated_length": 178.0,
      "epoch": 10.858309037900874,
      "grad_norm": 1.2304679155349731,
      "learning_rate": 1e-06,
      "loss": -0.0561,
      "num_tokens": 640796393.0,
      "reward": 0.652901828289032,
      "reward_std": 0.1844968944787979,
      "rewards/verify_math_reward/mean": 0.6529017686843872,
      "rewards/verify_math_reward/std": 0.47631320357322693,
      "step": 1162
    },
    {
      "clip_ratio/high_max": 0.0016241311132034753,
      "clip_ratio/high_mean": 0.0005267533124424517,
      "clip_ratio/low_mean": 0.000373558068076818,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009003113955259323,
      "completions/clipped_ratio": 0.0892857142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2979.0,
      "completions/mean_length": 840.7142944335938,
      "completions/mean_terminated_length": 521.5686645507812,
      "completions/min_length": 103.0,
      "completions/min_terminated_length": 103.0,
      "epoch": 10.867638483965015,
      "grad_norm": 0.3395094573497772,
      "learning_rate": 1e-06,
      "loss": -0.0268,
      "num_tokens": 641311673.0,
      "reward": 0.6852678656578064,
      "reward_std": 0.13256637752056122,
      "rewards/verify_math_reward/mean": 0.6852678656578064,
      "rewards/verify_math_reward/std": 0.4646684527397156,
      "step": 1163
    },
    {
      "clip_ratio/high_max": 0.0019190324965165928,
      "clip_ratio/high_mean": 0.0006439916669478407,
      "clip_ratio/low_mean": 0.00031507297444477445,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009590646368451416,
      "completions/clipped_ratio": 0.1540178571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3959.0,
      "completions/mean_length": 1078.12060546875,
      "completions/mean_terminated_length": 528.6912841796875,
      "completions/min_length": 146.0,
      "completions/min_terminated_length": 146.0,
      "epoch": 10.876967930029155,
      "grad_norm": 0.2265859842300415,
      "learning_rate": 1e-06,
      "loss": -0.0317,
      "num_tokens": 641794757.0,
      "reward": 0.5959821939468384,
      "reward_std": 0.1022394672036171,
      "rewards/verify_math_reward/mean": 0.5959821343421936,
      "rewards/verify_math_reward/std": 0.490975022315979,
      "step": 1164
    },
    {
      "clip_ratio/high_max": 0.002210411668784218,
      "clip_ratio/high_mean": 0.0008060793652475695,
      "clip_ratio/low_mean": 0.0003716223181982059,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011777016770793125,
      "completions/clipped_ratio": 0.1216517857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3364.0,
      "completions/mean_length": 972.458740234375,
      "completions/mean_terminated_length": 539.8462524414062,
      "completions/min_length": 119.0,
      "completions/min_terminated_length": 119.0,
      "epoch": 10.886297376093294,
      "grad_norm": 0.3146289885044098,
      "learning_rate": 1e-06,
      "loss": -0.0447,
      "num_tokens": 642304832.0,
      "reward": 0.6852678656578064,
      "reward_std": 0.14304757118225098,
      "rewards/verify_math_reward/mean": 0.6852678656578064,
      "rewards/verify_math_reward/std": 0.46466848254203796,
      "step": 1165
    },
    {
      "clip_ratio/high_max": 0.0018482201303413603,
      "clip_ratio/high_mean": 0.0006746981798642082,
      "clip_ratio/low_mean": 0.00021237814030428126,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008870763049344532,
      "completions/clipped_ratio": 0.1071428571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3116.0,
      "completions/mean_length": 916.677490234375,
      "completions/mean_terminated_length": 535.1587524414062,
      "completions/min_length": 145.0,
      "completions/min_terminated_length": 145.0,
      "epoch": 10.895626822157434,
      "grad_norm": 0.19967442750930786,
      "learning_rate": 1e-06,
      "loss": -0.057,
      "num_tokens": 642821431.0,
      "reward": 0.7176339626312256,
      "reward_std": 0.11817535012960434,
      "rewards/verify_math_reward/mean": 0.7176339030265808,
      "rewards/verify_math_reward/std": 0.4504019320011139,
      "step": 1166
    },
    {
      "clip_ratio/high_max": 0.0018805424515448976,
      "clip_ratio/high_mean": 0.000654468773973349,
      "clip_ratio/low_mean": 0.0002834320416695846,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009379008206451545,
      "completions/clipped_ratio": 0.1037946428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3328.0,
      "completions/mean_length": 910.1172485351562,
      "completions/mean_terminated_length": 541.1419677734375,
      "completions/min_length": 144.0,
      "completions/min_terminated_length": 144.0,
      "epoch": 10.904956268221575,
      "grad_norm": 0.3868400752544403,
      "learning_rate": 1e-06,
      "loss": -0.0566,
      "num_tokens": 643343816.0,
      "reward": 0.6830357313156128,
      "reward_std": 0.13218912482261658,
      "rewards/verify_math_reward/mean": 0.6830357313156128,
      "rewards/verify_math_reward/std": 0.46555325388908386,
      "step": 1167
    },
    {
      "clip_ratio/high_max": 0.0015009039525466505,
      "clip_ratio/high_mean": 0.0004454672989595565,
      "clip_ratio/low_mean": 0.00033791017085604835,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000783377481639036,
      "completions/clipped_ratio": 0.1294642857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2416.0,
      "completions/mean_length": 933.04248046875,
      "completions/mean_terminated_length": 462.65386962890625,
      "completions/min_length": 155.0,
      "completions/min_terminated_length": 155.0,
      "epoch": 10.914285714285715,
      "grad_norm": 0.26711606979370117,
      "learning_rate": 1e-06,
      "loss": -0.0589,
      "num_tokens": 643796438.0,
      "reward": 0.7053571939468384,
      "reward_std": 0.09337730705738068,
      "rewards/verify_math_reward/mean": 0.7053571343421936,
      "rewards/verify_math_reward/std": 0.45613667368888855,
      "step": 1168
    },
    {
      "clip_ratio/high_max": 0.0021313845718395896,
      "clip_ratio/high_mean": 0.0008946762845880585,
      "clip_ratio/low_mean": 0.00042373732230771566,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001318413622357184,
      "completions/clipped_ratio": 0.15625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3807.0,
      "completions/mean_length": 1145.243408203125,
      "completions/mean_terminated_length": 598.806884765625,
      "completions/min_length": 165.0,
      "completions/min_terminated_length": 165.0,
      "epoch": 10.923615160349854,
      "grad_norm": 0.26545679569244385,
      "learning_rate": 1e-06,
      "loss": -0.0718,
      "num_tokens": 644335232.0,
      "reward": 0.5803571939468384,
      "reward_std": 0.18103593587875366,
      "rewards/verify_math_reward/mean": 0.5803571343421936,
      "rewards/verify_math_reward/std": 0.4937761127948761,
      "step": 1169
    },
    {
      "clip_ratio/high_max": 0.002021483662247192,
      "clip_ratio/high_mean": 0.0008393947937292978,
      "clip_ratio/low_mean": 0.00041340081406815443,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012527956278063357,
      "completions/clipped_ratio": 0.0970982142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2759.0,
      "completions/mean_length": 851.1239013671875,
      "completions/mean_terminated_length": 502.1693420410156,
      "completions/min_length": 139.0,
      "completions/min_terminated_length": 139.0,
      "epoch": 10.932944606413994,
      "grad_norm": 0.27601340413093567,
      "learning_rate": 1e-06,
      "loss": -0.052,
      "num_tokens": 644832735.0,
      "reward": 0.6696428656578064,
      "reward_std": 0.15665017068386078,
      "rewards/verify_math_reward/mean": 0.6696428656578064,
      "rewards/verify_math_reward/std": 0.47060438990592957,
      "step": 1170
    },
    {
      "clip_ratio/high_max": 0.001975671486434294,
      "clip_ratio/high_mean": 0.0006402839853762998,
      "clip_ratio/low_mean": 0.00037960739700793056,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001019891383293725,
      "completions/clipped_ratio": 0.1227678571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3253.0,
      "completions/mean_length": 976.9364013671875,
      "completions/mean_terminated_length": 540.4262084960938,
      "completions/min_length": 117.0,
      "completions/min_terminated_length": 117.0,
      "epoch": 10.942274052478133,
      "grad_norm": 0.2558325529098511,
      "learning_rate": 1e-06,
      "loss": -0.0266,
      "num_tokens": 645353934.0,
      "reward": 0.5792410969734192,
      "reward_std": 0.12264476716518402,
      "rewards/verify_math_reward/mean": 0.5792410969734192,
      "rewards/verify_math_reward/std": 0.49395665526390076,
      "step": 1171
    },
    {
      "clip_ratio/high_max": 0.0014388494892045856,
      "clip_ratio/high_mean": 0.0005496395879163174,
      "clip_ratio/low_mean": 0.0002644868168317771,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000814126407931326,
      "completions/clipped_ratio": 0.1216517857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4066.0,
      "completions/mean_length": 1029.477783203125,
      "completions/mean_terminated_length": 604.7623901367188,
      "completions/min_length": 155.0,
      "completions/min_terminated_length": 155.0,
      "epoch": 10.951603498542275,
      "grad_norm": 0.24647484719753265,
      "learning_rate": 1e-06,
      "loss": -0.06,
      "num_tokens": 645920266.0,
      "reward": 0.590401828289032,
      "reward_std": 0.13016286492347717,
      "rewards/verify_math_reward/mean": 0.5904017686843872,
      "rewards/verify_math_reward/std": 0.49203425645828247,
      "step": 1172
    },
    {
      "clip_ratio/high_max": 0.0018436602185829543,
      "clip_ratio/high_mean": 0.0006677040037175175,
      "clip_ratio/low_mean": 0.000404253919441544,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010719579295255244,
      "completions/clipped_ratio": 0.1171875,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3685.0,
      "completions/mean_length": 1034.825927734375,
      "completions/mean_terminated_length": 628.475341796875,
      "completions/min_length": 144.0,
      "completions/min_terminated_length": 144.0,
      "epoch": 10.960932944606414,
      "grad_norm": 0.22879140079021454,
      "learning_rate": 1e-06,
      "loss": -0.05,
      "num_tokens": 646498918.0,
      "reward": 0.6183035969734192,
      "reward_std": 0.1615314483642578,
      "rewards/verify_math_reward/mean": 0.6183035969734192,
      "rewards/verify_math_reward/std": 0.4860740303993225,
      "step": 1173
    },
    {
      "clip_ratio/high_max": 0.0020206565677654,
      "clip_ratio/high_mean": 0.0006967060598981334,
      "clip_ratio/low_mean": 0.000314914309001324,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010116203666257206,
      "completions/clipped_ratio": 0.0993303571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3733.0,
      "completions/mean_length": 929.1563110351562,
      "completions/mean_terminated_length": 579.90087890625,
      "completions/min_length": 161.0,
      "completions/min_terminated_length": 161.0,
      "epoch": 10.970262390670554,
      "grad_norm": 0.22495341300964355,
      "learning_rate": 1e-06,
      "loss": -0.0447,
      "num_tokens": 647052314.0,
      "reward": 0.6227678656578064,
      "reward_std": 0.11986783146858215,
      "rewards/verify_math_reward/mean": 0.6227678656578064,
      "rewards/verify_math_reward/std": 0.4849644899368286,
      "step": 1174
    },
    {
      "clip_ratio/high_max": 0.0020932027109665796,
      "clip_ratio/high_mean": 0.0006908383847985533,
      "clip_ratio/low_mean": 0.0004698266920968308,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011606650878093205,
      "completions/clipped_ratio": 0.0904017857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3935.0,
      "completions/mean_length": 867.8939819335938,
      "completions/mean_terminated_length": 547.0637817382812,
      "completions/min_length": 118.0,
      "completions/min_terminated_length": 118.0,
      "epoch": 10.979591836734693,
      "grad_norm": 0.5894312858581543,
      "learning_rate": 1e-06,
      "loss": -0.039,
      "num_tokens": 647591171.0,
      "reward": 0.6886160969734192,
      "reward_std": 0.14011836051940918,
      "rewards/verify_math_reward/mean": 0.6886160969734192,
      "rewards/verify_math_reward/std": 0.46331799030303955,
      "step": 1175
    },
    {
      "clip_ratio/high_max": 0.0019288870716991369,
      "clip_ratio/high_mean": 0.0007526243389293086,
      "clip_ratio/low_mean": 0.00043504604309418937,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011876703883899609,
      "completions/clipped_ratio": 0.1294642857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3127.0,
      "completions/mean_length": 978.0859985351562,
      "completions/mean_terminated_length": 514.3961791992188,
      "completions/min_length": 161.0,
      "completions/min_terminated_length": 161.0,
      "epoch": 10.988921282798835,
      "grad_norm": 0.3286615312099457,
      "learning_rate": 1e-06,
      "loss": -0.0506,
      "num_tokens": 648082280.0,
      "reward": 0.6651785969734192,
      "reward_std": 0.1680738478899002,
      "rewards/verify_math_reward/mean": 0.6651785969734192,
      "rewards/verify_math_reward/std": 0.47219160199165344,
      "step": 1176
    },
    {
      "clip_ratio/high_max": 0.0017645190782786813,
      "clip_ratio/high_mean": 0.0007083440468704794,
      "clip_ratio/low_mean": 0.0002699465910609433,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009782906345208175,
      "completions/clipped_ratio": 0.11079545454545459,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1755.0,
      "completions/mean_length": 975.0966186523438,
      "completions/mean_terminated_length": 586.2300415039062,
      "completions/min_length": 142.0,
      "completions/min_terminated_length": 142.0,
      "epoch": 10.998250728862974,
      "grad_norm": 0.228280171751976,
      "learning_rate": 1e-06,
      "loss": -0.0402,
      "num_tokens": 648615781.0,
      "reward": 0.6082589626312256,
      "reward_std": 0.11532102525234222,
      "rewards/verify_math_reward/mean": 0.6082589030265808,
      "rewards/verify_math_reward/std": 0.48841196298599243,
      "step": 1177
    },
    {
      "clip_ratio/high_max": 0.0017149225714092609,
      "clip_ratio/high_mean": 0.0007062563181534642,
      "clip_ratio/low_mean": 0.0002461648487042112,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009524211800453486,
      "completions/clipped_ratio": 0.1328125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3793.0,
      "completions/mean_length": 1036.0670166015625,
      "completions/mean_terminated_length": 567.4285888671875,
      "completions/min_length": 188.0,
      "completions/min_terminated_length": 188.0,
      "epoch": 11.00932944606414,
      "grad_norm": 0.25313082337379456,
      "learning_rate": 1e-06,
      "loss": -0.0859,
      "num_tokens": 649150369.0,
      "reward": 0.6395089626312256,
      "reward_std": 0.13485869765281677,
      "rewards/verify_math_reward/mean": 0.6395089030265808,
      "rewards/verify_math_reward/std": 0.4804111421108246,
      "step": 1178
    },
    {
      "clip_ratio/high_max": 0.0015608993235218804,
      "clip_ratio/high_mean": 0.0006532988390972605,
      "clip_ratio/low_mean": 0.00038358610345312627,
      "clip_ratio/low_min": 1.3867317647964228e-05,
      "clip_ratio/region_mean": 0.0010368849434598815,
      "completions/clipped_ratio": 0.1439732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3859.0,
      "completions/mean_length": 1100.68310546875,
      "completions/mean_terminated_length": 596.9074096679688,
      "completions/min_length": 151.0,
      "completions/min_terminated_length": 151.0,
      "epoch": 11.018658892128279,
      "grad_norm": 0.22719907760620117,
      "learning_rate": 1e-06,
      "loss": -0.0463,
      "num_tokens": 649699669.0,
      "reward": 0.598214328289032,
      "reward_std": 0.14011907577514648,
      "rewards/verify_math_reward/mean": 0.5982142686843872,
      "rewards/verify_math_reward/std": 0.49053287506103516,
      "step": 1179
    },
    {
      "clip_ratio/high_max": 0.001578674309712369,
      "clip_ratio/high_mean": 0.0005197370301175397,
      "clip_ratio/low_mean": 0.00040229929800261743,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009220363372151041,
      "completions/clipped_ratio": 0.1462053571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3810.0,
      "completions/mean_length": 1035.2388916015625,
      "completions/mean_terminated_length": 511.1085205078125,
      "completions/min_length": 114.0,
      "completions/min_terminated_length": 114.0,
      "epoch": 11.02798833819242,
      "grad_norm": 0.4742317795753479,
      "learning_rate": 1e-06,
      "loss": -0.0464,
      "num_tokens": 650172035.0,
      "reward": 0.6350446939468384,
      "reward_std": 0.12835979461669922,
      "rewards/verify_math_reward/mean": 0.6350446343421936,
      "rewards/verify_math_reward/std": 0.4816865026950836,
      "step": 1180
    },
    {
      "clip_ratio/high_max": 0.0018998483137693256,
      "clip_ratio/high_mean": 0.0007278305856743827,
      "clip_ratio/low_mean": 0.00037418897682073293,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011020195488526952,
      "completions/clipped_ratio": 0.1328125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2734.0,
      "completions/mean_length": 978.14404296875,
      "completions/mean_terminated_length": 500.6344909667969,
      "completions/min_length": 161.0,
      "completions/min_terminated_length": 161.0,
      "epoch": 11.03731778425656,
      "grad_norm": 0.3449740409851074,
      "learning_rate": 1e-06,
      "loss": -0.0711,
      "num_tokens": 650647180.0,
      "reward": 0.6462053656578064,
      "reward_std": 0.15537208318710327,
      "rewards/verify_math_reward/mean": 0.6462053656578064,
      "rewards/verify_math_reward/std": 0.478413462638855,
      "step": 1181
    },
    {
      "clip_ratio/high_max": 0.0015139637580432463,
      "clip_ratio/high_mean": 0.0005015873794036452,
      "clip_ratio/low_mean": 0.0003548100041825819,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008563973942727898,
      "completions/clipped_ratio": 0.1450892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3161.0,
      "completions/mean_length": 1060.3739013671875,
      "completions/mean_terminated_length": 545.1893310546875,
      "completions/min_length": 152.0,
      "completions/min_terminated_length": 152.0,
      "epoch": 11.0466472303207,
      "grad_norm": 0.21541571617126465,
      "learning_rate": 1e-06,
      "loss": -0.0705,
      "num_tokens": 651151259.0,
      "reward": 0.637276828289032,
      "reward_std": 0.12651576101779938,
      "rewards/verify_math_reward/mean": 0.6372767686843872,
      "rewards/verify_math_reward/std": 0.481054425239563,
      "step": 1182
    },
    {
      "clip_ratio/high_max": 0.001237130420122412,
      "clip_ratio/high_mean": 0.00043247520306977094,
      "clip_ratio/low_mean": 0.0003234145438000269,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007558897486887872,
      "completions/clipped_ratio": 0.0926339285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3711.0,
      "completions/mean_length": 864.3471069335938,
      "completions/mean_terminated_length": 534.42431640625,
      "completions/min_length": 130.0,
      "completions/min_terminated_length": 130.0,
      "epoch": 11.055976676384839,
      "grad_norm": 4.377254962921143,
      "learning_rate": 1e-06,
      "loss": -0.0372,
      "num_tokens": 651675130.0,
      "reward": 0.6941964626312256,
      "reward_std": 0.12456366419792175,
      "rewards/verify_math_reward/mean": 0.6941964030265808,
      "rewards/verify_math_reward/std": 0.46100425720214844,
      "step": 1183
    },
    {
      "clip_ratio/high_max": 0.0019570005460991524,
      "clip_ratio/high_mean": 0.0006469885356636951,
      "clip_ratio/low_mean": 0.0004330397459852975,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010800282616401091,
      "completions/clipped_ratio": 0.1171875,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3125.0,
      "completions/mean_length": 976.755615234375,
      "completions/mean_terminated_length": 562.6965942382812,
      "completions/min_length": 150.0,
      "completions/min_terminated_length": 150.0,
      "epoch": 11.06530612244898,
      "grad_norm": 1.472005844116211,
      "learning_rate": 1e-06,
      "loss": -0.0461,
      "num_tokens": 652216159.0,
      "reward": 0.6328125,
      "reward_std": 0.13154789805412292,
      "rewards/verify_math_reward/mean": 0.6328125,
      "rewards/verify_math_reward/std": 0.48230743408203125,
      "step": 1184
    },
    {
      "clip_ratio/high_max": 0.0019672623384394683,
      "clip_ratio/high_mean": 0.0007594211947434815,
      "clip_ratio/low_mean": 0.00042961725557688624,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011890384485013783,
      "completions/clipped_ratio": 0.1328125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3493.0,
      "completions/mean_length": 1023.700927734375,
      "completions/mean_terminated_length": 553.1685791015625,
      "completions/min_length": 167.0,
      "completions/min_terminated_length": 167.0,
      "epoch": 11.07463556851312,
      "grad_norm": 0.4396667778491974,
      "learning_rate": 1e-06,
      "loss": -0.0512,
      "num_tokens": 652735523.0,
      "reward": 0.6049107313156128,
      "reward_std": 0.16266193985939026,
      "rewards/verify_math_reward/mean": 0.6049107313156128,
      "rewards/verify_math_reward/std": 0.48914292454719543,
      "step": 1185
    },
    {
      "clip_ratio/high_max": 0.0020886435340798926,
      "clip_ratio/high_mean": 0.0005994874472889933,
      "clip_ratio/low_mean": 0.0002933752721219207,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008928627175919246,
      "completions/clipped_ratio": 0.1495535714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3389.0,
      "completions/mean_length": 1083.154052734375,
      "completions/mean_terminated_length": 553.3359375,
      "completions/min_length": 158.0,
      "completions/min_terminated_length": 158.0,
      "epoch": 11.08396501457726,
      "grad_norm": 0.1828635334968567,
      "learning_rate": 1e-06,
      "loss": -0.056,
      "num_tokens": 653252093.0,
      "reward": 0.5915178656578064,
      "reward_std": 0.11907366663217545,
      "rewards/verify_math_reward/mean": 0.5915178656578064,
      "rewards/verify_math_reward/std": 0.49182769656181335,
      "step": 1186
    },
    {
      "clip_ratio/high_max": 0.0019462131022009999,
      "clip_ratio/high_mean": 0.0007399519618047634,
      "clip_ratio/low_mean": 0.00045357361727837997,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001193525571579812,
      "completions/clipped_ratio": 0.1395089285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2473.0,
      "completions/mean_length": 1007.90185546875,
      "completions/mean_terminated_length": 507.23736572265625,
      "completions/min_length": 125.0,
      "completions/min_terminated_length": 125.0,
      "epoch": 11.093294460641399,
      "grad_norm": 0.2509366571903229,
      "learning_rate": 1e-06,
      "loss": -0.076,
      "num_tokens": 653736245.0,
      "reward": 0.6729910969734192,
      "reward_std": 0.14992554485797882,
      "rewards/verify_math_reward/mean": 0.6729910969734192,
      "rewards/verify_math_reward/std": 0.46938255429267883,
      "step": 1187
    },
    {
      "clip_ratio/high_max": 0.0019079922931268811,
      "clip_ratio/high_mean": 0.0008365356461581541,
      "clip_ratio/low_mean": 0.00040288263244292466,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012394182631396689,
      "completions/clipped_ratio": 0.1138392857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3518.0,
      "completions/mean_length": 982.1350708007812,
      "completions/mean_terminated_length": 582.1171264648438,
      "completions/min_length": 124.0,
      "completions/min_terminated_length": 124.0,
      "epoch": 11.102623906705539,
      "grad_norm": 1.9869697093963623,
      "learning_rate": 1e-06,
      "loss": -0.0591,
      "num_tokens": 654298454.0,
      "reward": 0.6328125,
      "reward_std": 0.17085081338882446,
      "rewards/verify_math_reward/mean": 0.6328125,
      "rewards/verify_math_reward/std": 0.48230743408203125,
      "step": 1188
    },
    {
      "clip_ratio/high_max": 0.0020058422014699318,
      "clip_ratio/high_mean": 0.0007086117475409992,
      "clip_ratio/low_mean": 0.00030152343833833584,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010101351435878314,
      "completions/clipped_ratio": 0.1227678571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2463.0,
      "completions/mean_length": 929.2332763671875,
      "completions/mean_terminated_length": 486.0470886230469,
      "completions/min_length": 131.0,
      "completions/min_terminated_length": 131.0,
      "epoch": 11.11195335276968,
      "grad_norm": 0.2725389301776886,
      "learning_rate": 1e-06,
      "loss": -0.0576,
      "num_tokens": 654755831.0,
      "reward": 0.6863839626312256,
      "reward_std": 0.1317012757062912,
      "rewards/verify_math_reward/mean": 0.6863839030265808,
      "rewards/verify_math_reward/std": 0.46422144770622253,
      "step": 1189
    },
    {
      "clip_ratio/high_max": 0.0017150229396065697,
      "clip_ratio/high_mean": 0.0006754527330485871,
      "clip_ratio/low_mean": 0.0003505973147639452,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010260500803269679,
      "completions/clipped_ratio": 0.1595982142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3999.0,
      "completions/mean_length": 1104.3304443359375,
      "completions/mean_terminated_length": 536.1912231445312,
      "completions/min_length": 149.0,
      "completions/min_terminated_length": 149.0,
      "epoch": 11.12128279883382,
      "grad_norm": 0.3129972219467163,
      "learning_rate": 1e-06,
      "loss": -0.061,
      "num_tokens": 655254359.0,
      "reward": 0.637276828289032,
      "reward_std": 0.15169291198253632,
      "rewards/verify_math_reward/mean": 0.6372767686843872,
      "rewards/verify_math_reward/std": 0.481054425239563,
      "step": 1190
    },
    {
      "clip_ratio/high_max": 0.0015929283217701595,
      "clip_ratio/high_mean": 0.000467680783913238,
      "clip_ratio/low_mean": 0.00022705319588567363,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006947339988983003,
      "completions/clipped_ratio": 0.1428571428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2747.0,
      "completions/mean_length": 1011.0692138671875,
      "completions/mean_terminated_length": 496.9140625,
      "completions/min_length": 140.0,
      "completions/min_terminated_length": 140.0,
      "epoch": 11.130612244897959,
      "grad_norm": 0.3865568935871124,
      "learning_rate": 1e-06,
      "loss": -0.0385,
      "num_tokens": 655713061.0,
      "reward": 0.6517857313156128,
      "reward_std": 0.0892007052898407,
      "rewards/verify_math_reward/mean": 0.6517857313156128,
      "rewards/verify_math_reward/std": 0.47667041420936584,
      "step": 1191
    },
    {
      "clip_ratio/high_max": 0.0014956098784750793,
      "clip_ratio/high_mean": 0.000487194334709784,
      "clip_ratio/low_mean": 0.00025403599784112885,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007412303348246496,
      "completions/clipped_ratio": 0.1551339285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3757.0,
      "completions/mean_length": 1117.485595703125,
      "completions/mean_terminated_length": 570.572021484375,
      "completions/min_length": 127.0,
      "completions/min_terminated_length": 127.0,
      "epoch": 11.139941690962099,
      "grad_norm": 0.31990551948547363,
      "learning_rate": 1e-06,
      "loss": -0.0382,
      "num_tokens": 656248016.0,
      "reward": 0.551339328289032,
      "reward_std": 0.1107742190361023,
      "rewards/verify_math_reward/mean": 0.5513392686843872,
      "rewards/verify_math_reward/std": 0.4976350665092468,
      "step": 1192
    },
    {
      "clip_ratio/high_max": 0.002139115455065621,
      "clip_ratio/high_mean": 0.0006330867236101767,
      "clip_ratio/low_mean": 0.0003242451542746494,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009573318893671967,
      "completions/clipped_ratio": 0.0970982142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3669.0,
      "completions/mean_length": 900.2042846679688,
      "completions/mean_terminated_length": 556.52783203125,
      "completions/min_length": 147.0,
      "completions/min_terminated_length": 147.0,
      "epoch": 11.14927113702624,
      "grad_norm": 0.2887069880962372,
      "learning_rate": 1e-06,
      "loss": -0.0654,
      "num_tokens": 656782271.0,
      "reward": 0.6819196939468384,
      "reward_std": 0.1191510558128357,
      "rewards/verify_math_reward/mean": 0.6819196343421936,
      "rewards/verify_math_reward/std": 0.46599096059799194,
      "step": 1193
    },
    {
      "clip_ratio/high_max": 0.0018818424177879933,
      "clip_ratio/high_mean": 0.0006496303103631362,
      "clip_ratio/low_mean": 0.00044450499535741983,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010941352920781355,
      "completions/clipped_ratio": 0.1305803571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3941.0,
      "completions/mean_length": 999.5480346679688,
      "completions/mean_terminated_length": 534.4839477539062,
      "completions/min_length": 149.0,
      "completions/min_terminated_length": 149.0,
      "epoch": 11.15860058309038,
      "grad_norm": 0.2673981785774231,
      "learning_rate": 1e-06,
      "loss": -0.0632,
      "num_tokens": 657283130.0,
      "reward": 0.6830357313156128,
      "reward_std": 0.13711389899253845,
      "rewards/verify_math_reward/mean": 0.6830357313156128,
      "rewards/verify_math_reward/std": 0.46555325388908386,
      "step": 1194
    },
    {
      "clip_ratio/high_max": 0.002027008975346689,
      "clip_ratio/high_mean": 0.0006321159180515679,
      "clip_ratio/low_mean": 0.0004025810528673901,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001034696979331784,
      "completions/clipped_ratio": 0.1238839285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3522.0,
      "completions/mean_length": 957.07373046875,
      "completions/mean_terminated_length": 513.2254638671875,
      "completions/min_length": 115.0,
      "completions/min_terminated_length": 115.0,
      "epoch": 11.167930029154519,
      "grad_norm": 0.23403170704841614,
      "learning_rate": 1e-06,
      "loss": -0.071,
      "num_tokens": 657787980.0,
      "reward": 0.6551339626312256,
      "reward_std": 0.1152033880352974,
      "rewards/verify_math_reward/mean": 0.6551339030265808,
      "rewards/verify_math_reward/std": 0.4755900502204895,
      "step": 1195
    },
    {
      "clip_ratio/high_max": 0.0016585695921094157,
      "clip_ratio/high_mean": 0.0006414505151042249,
      "clip_ratio/low_mean": 0.0005228253835412033,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011642758909147233,
      "completions/clipped_ratio": 0.1205357142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3893.0,
      "completions/mean_length": 981.00341796875,
      "completions/mean_terminated_length": 554.0748901367188,
      "completions/min_length": 124.0,
      "completions/min_terminated_length": 124.0,
      "epoch": 11.177259475218658,
      "grad_norm": 0.4831053912639618,
      "learning_rate": 1e-06,
      "loss": -0.0412,
      "num_tokens": 658303335.0,
      "reward": 0.6584821939468384,
      "reward_std": 0.14977401494979858,
      "rewards/verify_math_reward/mean": 0.6584821343421936,
      "rewards/verify_math_reward/std": 0.4744836091995239,
      "step": 1196
    },
    {
      "clip_ratio/high_max": 0.002041688341705594,
      "clip_ratio/high_mean": 0.0007752884885121603,
      "clip_ratio/low_mean": 0.0003015225431681756,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010768110296339728,
      "completions/clipped_ratio": 0.1004464285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2148.0,
      "completions/mean_length": 844.7533569335938,
      "completions/mean_terminated_length": 481.7109069824219,
      "completions/min_length": 136.0,
      "completions/min_terminated_length": 136.0,
      "epoch": 11.186588921282798,
      "grad_norm": 0.2372497320175171,
      "learning_rate": 1e-06,
      "loss": -0.0476,
      "num_tokens": 658778250.0,
      "reward": 0.6729910969734192,
      "reward_std": 0.1347053349018097,
      "rewards/verify_math_reward/mean": 0.6729910969734192,
      "rewards/verify_math_reward/std": 0.46938255429267883,
      "step": 1197
    },
    {
      "clip_ratio/high_max": 0.0018739405095402617,
      "clip_ratio/high_mean": 0.0005323157165548764,
      "clip_ratio/low_mean": 0.00033523128422530135,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008675469871377572,
      "completions/clipped_ratio": 0.1294642857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2979.0,
      "completions/mean_length": 991.8795166015625,
      "completions/mean_terminated_length": 530.2410278320312,
      "completions/min_length": 105.0,
      "completions/min_terminated_length": 105.0,
      "epoch": 11.19591836734694,
      "grad_norm": 0.18299052119255066,
      "learning_rate": 1e-06,
      "loss": -0.0515,
      "num_tokens": 659276254.0,
      "reward": 0.6674107313156128,
      "reward_std": 0.10367163270711899,
      "rewards/verify_math_reward/mean": 0.6674107313156128,
      "rewards/verify_math_reward/std": 0.47140389680862427,
      "step": 1198
    },
    {
      "clip_ratio/high_max": 0.0013865770124539267,
      "clip_ratio/high_mean": 0.0004048795308335684,
      "clip_ratio/low_mean": 0.0002652573957675486,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006701369184156647,
      "completions/clipped_ratio": 0.1104910714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2709.0,
      "completions/mean_length": 911.5803833007812,
      "completions/mean_terminated_length": 516.0250854492188,
      "completions/min_length": 154.0,
      "completions/min_terminated_length": 154.0,
      "epoch": 11.205247813411079,
      "grad_norm": 0.1874816119670868,
      "learning_rate": 1e-06,
      "loss": -0.0405,
      "num_tokens": 659783238.0,
      "reward": 0.684151828289032,
      "reward_std": 0.10107900947332382,
      "rewards/verify_math_reward/mean": 0.6841517686843872,
      "rewards/verify_math_reward/std": 0.4651124179363251,
      "step": 1199
    },
    {
      "clip_ratio/high_max": 0.0018549531014286913,
      "clip_ratio/high_mean": 0.0007177303068601759,
      "clip_ratio/low_mean": 0.0003987469649473496,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.00111647726589581,
      "completions/clipped_ratio": 0.0993303571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2364.0,
      "completions/mean_length": 886.935302734375,
      "completions/mean_terminated_length": 533.0235595703125,
      "completions/min_length": 125.0,
      "completions/min_terminated_length": 125.0,
      "epoch": 11.214577259475218,
      "grad_norm": 0.2875356376171112,
      "learning_rate": 1e-06,
      "loss": -0.0482,
      "num_tokens": 660308596.0,
      "reward": 0.715401828289032,
      "reward_std": 0.15646544098854065,
      "rewards/verify_math_reward/mean": 0.7154017686843872,
      "rewards/verify_math_reward/std": 0.4514748752117157,
      "step": 1200
    },
    {
      "clip_ratio/high_max": 0.0015939300137688406,
      "clip_ratio/high_mean": 0.0005759891691923258,
      "clip_ratio/low_mean": 0.0005601890934485709,
      "clip_ratio/low_min": 1.6933079677983187e-05,
      "clip_ratio/region_mean": 0.001136178234446561,
      "completions/clipped_ratio": 0.0926339285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2480.0,
      "completions/mean_length": 877.9141235351562,
      "completions/mean_terminated_length": 549.3763427734375,
      "completions/min_length": 141.0,
      "completions/min_terminated_length": 141.0,
      "epoch": 11.223906705539358,
      "grad_norm": 0.3419424295425415,
      "learning_rate": 1e-06,
      "loss": -0.028,
      "num_tokens": 660837935.0,
      "reward": 0.6395089626312256,
      "reward_std": 0.14905862510204315,
      "rewards/verify_math_reward/mean": 0.6395089030265808,
      "rewards/verify_math_reward/std": 0.4804111421108246,
      "step": 1201
    },
    {
      "clip_ratio/high_max": 0.0017622757513890974,
      "clip_ratio/high_mean": 0.0006267519165703561,
      "clip_ratio/low_mean": 0.0004762813796332921,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011030332898371853,
      "completions/clipped_ratio": 0.0948660714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3329.0,
      "completions/mean_length": 898.3035888671875,
      "completions/mean_terminated_length": 563.1565551757812,
      "completions/min_length": 164.0,
      "completions/min_terminated_length": 164.0,
      "epoch": 11.2332361516035,
      "grad_norm": 0.19576263427734375,
      "learning_rate": 1e-06,
      "loss": -0.0551,
      "num_tokens": 661381743.0,
      "reward": 0.6953125596046448,
      "reward_std": 0.1301603466272354,
      "rewards/verify_math_reward/mean": 0.6953125,
      "rewards/verify_math_reward/std": 0.4605320394039154,
      "step": 1202
    },
    {
      "clip_ratio/high_max": 0.002221898721472826,
      "clip_ratio/high_mean": 0.0008242839394370094,
      "clip_ratio/low_mean": 0.0003053017858292151,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011295857293589506,
      "completions/clipped_ratio": 0.1227678571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3709.0,
      "completions/mean_length": 946.91748046875,
      "completions/mean_terminated_length": 506.20611572265625,
      "completions/min_length": 141.0,
      "completions/min_terminated_length": 141.0,
      "epoch": 11.242565597667639,
      "grad_norm": 0.31083205342292786,
      "learning_rate": 1e-06,
      "loss": -0.0937,
      "num_tokens": 661866053.0,
      "reward": 0.691964328289032,
      "reward_std": 0.1568765938282013,
      "rewards/verify_math_reward/mean": 0.6919642686843872,
      "rewards/verify_math_reward/std": 0.4619392454624176,
      "step": 1203
    },
    {
      "clip_ratio/high_max": 0.0025405114611203317,
      "clip_ratio/high_mean": 0.0008033487756620161,
      "clip_ratio/low_mean": 0.0003372556275280658,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001140604394095135,
      "completions/clipped_ratio": 0.1082589285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2117.0,
      "completions/mean_length": 896.91748046875,
      "completions/mean_terminated_length": 508.5431823730469,
      "completions/min_length": 130.0,
      "completions/min_terminated_length": 130.0,
      "epoch": 11.251895043731778,
      "grad_norm": 0.243963360786438,
      "learning_rate": 1e-06,
      "loss": -0.06,
      "num_tokens": 662365531.0,
      "reward": 0.7109375596046448,
      "reward_std": 0.1301603466272354,
      "rewards/verify_math_reward/mean": 0.7109375,
      "rewards/verify_math_reward/std": 0.45358020067214966,
      "step": 1204
    },
    {
      "clip_ratio/high_max": 0.0023471055210393388,
      "clip_ratio/high_mean": 0.000682915258948924,
      "clip_ratio/low_mean": 0.0002559357396876294,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009388509988639271,
      "completions/clipped_ratio": 0.1116071428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1902.0,
      "completions/mean_length": 941.4788208007812,
      "completions/mean_terminated_length": 545.18212890625,
      "completions/min_length": 136.0,
      "completions/min_terminated_length": 136.0,
      "epoch": 11.261224489795918,
      "grad_norm": 0.21252906322479248,
      "learning_rate": 1e-06,
      "loss": -0.0584,
      "num_tokens": 662909056.0,
      "reward": 0.5837053656578064,
      "reward_std": 0.1277536004781723,
      "rewards/verify_math_reward/mean": 0.5837053656578064,
      "rewards/verify_math_reward/std": 0.49321895837783813,
      "step": 1205
    },
    {
      "clip_ratio/high_max": 0.0018565724603831768,
      "clip_ratio/high_mean": 0.0007533611615144764,
      "clip_ratio/low_mean": 0.00039845382980274735,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001151814960394404,
      "completions/clipped_ratio": 0.0848214285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3718.0,
      "completions/mean_length": 807.9185791015625,
      "completions/mean_terminated_length": 503.1695251464844,
      "completions/min_length": 131.0,
      "completions/min_terminated_length": 131.0,
      "epoch": 11.270553935860057,
      "grad_norm": 0.2475420981645584,
      "learning_rate": 1e-06,
      "loss": -0.0444,
      "num_tokens": 663412591.0,
      "reward": 0.676339328289032,
      "reward_std": 0.14905793964862823,
      "rewards/verify_math_reward/mean": 0.6763392686843872,
      "rewards/verify_math_reward/std": 0.4681335985660553,
      "step": 1206
    },
    {
      "clip_ratio/high_max": 0.0022256549273151904,
      "clip_ratio/high_mean": 0.0008872234229784226,
      "clip_ratio/low_mean": 0.0003282145335106179,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012154379546700511,
      "completions/clipped_ratio": 0.1328125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2261.0,
      "completions/mean_length": 996.9074096679688,
      "completions/mean_terminated_length": 522.2715454101562,
      "completions/min_length": 127.0,
      "completions/min_terminated_length": 127.0,
      "epoch": 11.279883381924199,
      "grad_norm": 0.7276267409324646,
      "learning_rate": 1e-06,
      "loss": -0.0656,
      "num_tokens": 663904524.0,
      "reward": 0.6328125,
      "reward_std": 0.15413424372673035,
      "rewards/verify_math_reward/mean": 0.6328125,
      "rewards/verify_math_reward/std": 0.48230743408203125,
      "step": 1207
    },
    {
      "clip_ratio/high_max": 0.0013603698025690392,
      "clip_ratio/high_mean": 0.0004850970781262731,
      "clip_ratio/low_mean": 0.00046215504994506773,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009472521396673983,
      "completions/clipped_ratio": 0.0959821428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2451.0,
      "completions/mean_length": 861.8348388671875,
      "completions/mean_terminated_length": 518.454345703125,
      "completions/min_length": 142.0,
      "completions/min_terminated_length": 142.0,
      "epoch": 11.289212827988338,
      "grad_norm": 0.38679319620132446,
      "learning_rate": 1e-06,
      "loss": -0.0291,
      "num_tokens": 664417440.0,
      "reward": 0.637276828289032,
      "reward_std": 0.11329153180122375,
      "rewards/verify_math_reward/mean": 0.6372767686843872,
      "rewards/verify_math_reward/std": 0.481054425239563,
      "step": 1208
    },
    {
      "clip_ratio/high_max": 0.001770718543411931,
      "clip_ratio/high_mean": 0.0006499004157376476,
      "clip_ratio/low_mean": 0.0002893336481974984,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009392340580234304,
      "completions/clipped_ratio": 0.0892857142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3344.0,
      "completions/mean_length": 799.7678833007812,
      "completions/mean_terminated_length": 476.60784912109375,
      "completions/min_length": 108.0,
      "completions/min_terminated_length": 108.0,
      "epoch": 11.298542274052478,
      "grad_norm": 0.2946408987045288,
      "learning_rate": 1e-06,
      "loss": -0.0565,
      "num_tokens": 664894040.0,
      "reward": 0.6662946939468384,
      "reward_std": 0.11028888076543808,
      "rewards/verify_math_reward/mean": 0.6662946343421936,
      "rewards/verify_math_reward/std": 0.47179925441741943,
      "step": 1209
    },
    {
      "clip_ratio/high_max": 0.0017693006157060154,
      "clip_ratio/high_mean": 0.0006510997354780557,
      "clip_ratio/low_mean": 0.0002872747079436522,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009383744290971663,
      "completions/clipped_ratio": 0.0703125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3840.0,
      "completions/mean_length": 755.3292846679688,
      "completions/mean_terminated_length": 502.6734619140625,
      "completions/min_length": 151.0,
      "completions/min_terminated_length": 151.0,
      "epoch": 11.307871720116617,
      "grad_norm": 0.20046192407608032,
      "learning_rate": 1e-06,
      "loss": -0.0408,
      "num_tokens": 665392023.0,
      "reward": 0.754464328289032,
      "reward_std": 0.10318448394536972,
      "rewards/verify_math_reward/mean": 0.7544642686843872,
      "rewards/verify_math_reward/std": 0.4306447505950928,
      "step": 1210
    },
    {
      "clip_ratio/high_max": 0.0023193726847239304,
      "clip_ratio/high_mean": 0.0007375519835477462,
      "clip_ratio/low_mean": 0.00037769984646729426,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011152518127346411,
      "completions/clipped_ratio": 0.1506696428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2980.0,
      "completions/mean_length": 1107.86279296875,
      "completions/mean_terminated_length": 577.7726440429688,
      "completions/min_length": 135.0,
      "completions/min_terminated_length": 135.0,
      "epoch": 11.317201166180759,
      "grad_norm": 0.27446576952934265,
      "learning_rate": 1e-06,
      "loss": -0.0533,
      "num_tokens": 665931924.0,
      "reward": 0.5837053656578064,
      "reward_std": 0.13403385877609253,
      "rewards/verify_math_reward/mean": 0.5837053656578064,
      "rewards/verify_math_reward/std": 0.49321895837783813,
      "step": 1211
    },
    {
      "clip_ratio/high_max": 0.0021604953799396753,
      "clip_ratio/high_mean": 0.0007509531460527796,
      "clip_ratio/low_mean": 0.0003034861226751673,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010544392389419954,
      "completions/clipped_ratio": 0.1194196428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2102.0,
      "completions/mean_length": 929.0960083007812,
      "completions/mean_terminated_length": 499.6172180175781,
      "completions/min_length": 144.0,
      "completions/min_terminated_length": 144.0,
      "epoch": 11.326530612244898,
      "grad_norm": 0.2643359899520874,
      "learning_rate": 1e-06,
      "loss": -0.0468,
      "num_tokens": 666412626.0,
      "reward": 0.6886160969734192,
      "reward_std": 0.13193020224571228,
      "rewards/verify_math_reward/mean": 0.6886160969734192,
      "rewards/verify_math_reward/std": 0.46331802010536194,
      "step": 1212
    },
    {
      "clip_ratio/high_max": 0.0015155598448473029,
      "clip_ratio/high_mean": 0.0005129636801939341,
      "clip_ratio/low_mean": 0.00033966854152822634,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008526322199031711,
      "completions/clipped_ratio": 0.0837053571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1765.0,
      "completions/mean_length": 801.5859985351562,
      "completions/mean_terminated_length": 500.6346130371094,
      "completions/min_length": 173.0,
      "completions/min_terminated_length": 173.0,
      "epoch": 11.335860058309038,
      "grad_norm": 0.27162256836891174,
      "learning_rate": 1e-06,
      "loss": -0.0225,
      "num_tokens": 666900975.0,
      "reward": 0.7198660969734192,
      "reward_std": 0.11047180742025375,
      "rewards/verify_math_reward/mean": 0.7198660969734192,
      "rewards/verify_math_reward/std": 0.44931530952453613,
      "step": 1213
    },
    {
      "clip_ratio/high_max": 0.0015463138843188062,
      "clip_ratio/high_mean": 0.0005077983978480916,
      "clip_ratio/low_mean": 0.00028306973149483383,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007908681291155517,
      "completions/clipped_ratio": 0.0959821428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4096.0,
      "completions/mean_length": 850.4688110351562,
      "completions/mean_terminated_length": 505.8815002441406,
      "completions/min_length": 111.0,
      "completions/min_terminated_length": 111.0,
      "epoch": 11.345189504373177,
      "grad_norm": 0.182473286986351,
      "learning_rate": 1e-06,
      "loss": -0.0396,
      "num_tokens": 667398955.0,
      "reward": 0.6127232313156128,
      "reward_std": 0.09709673374891281,
      "rewards/verify_math_reward/mean": 0.6127232313156128,
      "rewards/verify_math_reward/std": 0.4873998463153839,
      "step": 1214
    },
    {
      "clip_ratio/high_max": 0.002268584561534226,
      "clip_ratio/high_mean": 0.0007521238931076368,
      "clip_ratio/low_mean": 0.0005650601628985896,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0013171840928407619,
      "completions/clipped_ratio": 0.125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1998.0,
      "completions/mean_length": 1009.47216796875,
      "completions/mean_terminated_length": 568.53955078125,
      "completions/min_length": 166.0,
      "completions/min_terminated_length": 166.0,
      "epoch": 11.354518950437317,
      "grad_norm": 0.27947527170181274,
      "learning_rate": 1e-06,
      "loss": -0.0558,
      "num_tokens": 667939146.0,
      "reward": 0.65625,
      "reward_std": 0.14466407895088196,
      "rewards/verify_math_reward/mean": 0.65625,
      "rewards/verify_math_reward/std": 0.4752241373062134,
      "step": 1215
    },
    {
      "clip_ratio/high_max": 0.001618959940969944,
      "clip_ratio/high_mean": 0.0006013408369653916,
      "clip_ratio/low_mean": 0.00033932765779809415,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009406685057911091,
      "completions/clipped_ratio": 0.1071428571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4078.0,
      "completions/mean_length": 947.9922485351562,
      "completions/mean_terminated_length": 570.2312622070312,
      "completions/min_length": 143.0,
      "completions/min_terminated_length": 143.0,
      "epoch": 11.363848396501458,
      "grad_norm": 0.23285862803459167,
      "learning_rate": 1e-06,
      "loss": -0.0246,
      "num_tokens": 668477155.0,
      "reward": 0.6283482313156128,
      "reward_std": 0.13350322842597961,
      "rewards/verify_math_reward/mean": 0.6283482313156128,
      "rewards/verify_math_reward/std": 0.4835159182548523,
      "step": 1216
    },
    {
      "clip_ratio/high_max": 0.0015124055862543173,
      "clip_ratio/high_mean": 0.0005476295182234026,
      "clip_ratio/low_mean": 0.00036976215733375284,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009173916914733127,
      "completions/clipped_ratio": 0.1227678571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3562.0,
      "completions/mean_length": 963.3270263671875,
      "completions/mean_terminated_length": 524.9122314453125,
      "completions/min_length": 139.0,
      "completions/min_terminated_length": 139.0,
      "epoch": 11.373177842565598,
      "grad_norm": 0.26881372928619385,
      "learning_rate": 1e-06,
      "loss": -0.0432,
      "num_tokens": 668989984.0,
      "reward": 0.6830357313156128,
      "reward_std": 0.13016216456890106,
      "rewards/verify_math_reward/mean": 0.6830357313156128,
      "rewards/verify_math_reward/std": 0.46555325388908386,
      "step": 1217
    },
    {
      "clip_ratio/high_max": 0.0014185413492668886,
      "clip_ratio/high_mean": 0.0005190686224523233,
      "clip_ratio/low_mean": 0.00032771823771327036,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008467868610750884,
      "completions/clipped_ratio": 0.1294642857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3212.0,
      "completions/mean_length": 1001.3504638671875,
      "completions/mean_terminated_length": 541.1205444335938,
      "completions/min_length": 157.0,
      "completions/min_terminated_length": 157.0,
      "epoch": 11.382507288629737,
      "grad_norm": 0.27219751477241516,
      "learning_rate": 1e-06,
      "loss": -0.0392,
      "num_tokens": 669507074.0,
      "reward": 0.6595982313156128,
      "reward_std": 0.11855372786521912,
      "rewards/verify_math_reward/mean": 0.6595982313156128,
      "rewards/verify_math_reward/std": 0.4741089344024658,
      "step": 1218
    },
    {
      "clip_ratio/high_max": 0.0015868601949478034,
      "clip_ratio/high_mean": 0.0005713184427804663,
      "clip_ratio/low_mean": 0.00044737579719367204,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010186942463406012,
      "completions/clipped_ratio": 0.1004464285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2285.0,
      "completions/mean_length": 871.8114013671875,
      "completions/mean_terminated_length": 511.7903137207031,
      "completions/min_length": 145.0,
      "completions/min_terminated_length": 145.0,
      "epoch": 11.391836734693877,
      "grad_norm": 0.29111766815185547,
      "learning_rate": 1e-06,
      "loss": -0.0404,
      "num_tokens": 670009601.0,
      "reward": 0.6439732313156128,
      "reward_std": 0.11663442850112915,
      "rewards/verify_math_reward/mean": 0.6439732313156128,
      "rewards/verify_math_reward/std": 0.47909072041511536,
      "step": 1219
    },
    {
      "clip_ratio/high_max": 0.001709231373752118,
      "clip_ratio/high_mean": 0.0006412777138393722,
      "clip_ratio/low_mean": 0.0003243132555326156,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000965590948908357,
      "completions/clipped_ratio": 0.1071428571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3582.0,
      "completions/mean_length": 907.5926513671875,
      "completions/mean_terminated_length": 524.9837646484375,
      "completions/min_length": 146.0,
      "completions/min_terminated_length": 146.0,
      "epoch": 11.401166180758018,
      "grad_norm": 0.21829597651958466,
      "learning_rate": 1e-06,
      "loss": -0.0403,
      "num_tokens": 670517492.0,
      "reward": 0.6473214626312256,
      "reward_std": 0.13324150443077087,
      "rewards/verify_math_reward/mean": 0.6473214030265808,
      "rewards/verify_math_reward/std": 0.47807058691978455,
      "step": 1220
    },
    {
      "clip_ratio/high_max": 0.0018303882025065832,
      "clip_ratio/high_mean": 0.0007279628689502715,
      "clip_ratio/low_mean": 0.00040208381346928945,
      "clip_ratio/low_min": 3.4722223062999547e-05,
      "clip_ratio/region_mean": 0.0011300467122055124,
      "completions/clipped_ratio": 0.1283482142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2121.0,
      "completions/mean_length": 1006.7991333007812,
      "completions/mean_terminated_length": 551.9231567382812,
      "completions/min_length": 132.0,
      "completions/min_terminated_length": 132.0,
      "epoch": 11.410495626822158,
      "grad_norm": 0.2785889804363251,
      "learning_rate": 1e-06,
      "loss": -0.043,
      "num_tokens": 671030944.0,
      "reward": 0.6183035969734192,
      "reward_std": 0.14613084495067596,
      "rewards/verify_math_reward/mean": 0.6183035969734192,
      "rewards/verify_math_reward/std": 0.4860740303993225,
      "step": 1221
    },
    {
      "clip_ratio/high_max": 0.0018251664550916757,
      "clip_ratio/high_mean": 0.0006509021695819683,
      "clip_ratio/low_mean": 0.0003536196527420543,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010045218368759379,
      "completions/clipped_ratio": 0.1261160714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2393.0,
      "completions/mean_length": 1002.72216796875,
      "completions/mean_terminated_length": 556.310302734375,
      "completions/min_length": 164.0,
      "completions/min_terminated_length": 164.0,
      "epoch": 11.419825072886297,
      "grad_norm": 0.2255057394504547,
      "learning_rate": 1e-06,
      "loss": -0.0926,
      "num_tokens": 671553927.0,
      "reward": 0.6618303656578064,
      "reward_std": 0.14451110363006592,
      "rewards/verify_math_reward/mean": 0.6618303656578064,
      "rewards/verify_math_reward/std": 0.4733508229255676,
      "step": 1222
    },
    {
      "clip_ratio/high_max": 0.0018701410735957325,
      "clip_ratio/high_mean": 0.0006567813634319464,
      "clip_ratio/low_mean": 0.0004231369225635717,
      "clip_ratio/low_min": 1.3898154065827839e-05,
      "clip_ratio/region_mean": 0.0010799182800838025,
      "completions/clipped_ratio": 0.0892857142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2782.0,
      "completions/mean_length": 911.1629638671875,
      "completions/mean_terminated_length": 598.9240112304688,
      "completions/min_length": 139.0,
      "completions/min_terminated_length": 139.0,
      "epoch": 11.429154518950437,
      "grad_norm": 0.2082158774137497,
      "learning_rate": 1e-06,
      "loss": -0.015,
      "num_tokens": 672122753.0,
      "reward": 0.6417410969734192,
      "reward_std": 0.13200436532497406,
      "rewards/verify_math_reward/mean": 0.6417410969734192,
      "rewards/verify_math_reward/std": 0.47975659370422363,
      "step": 1223
    },
    {
      "clip_ratio/high_max": 0.0015425887959281681,
      "clip_ratio/high_mean": 0.00043351100589461566,
      "clip_ratio/low_mean": 0.00037760764553240733,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008111186634778278,
      "completions/clipped_ratio": 0.1037946428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3173.0,
      "completions/mean_length": 865.2266235351562,
      "completions/mean_terminated_length": 491.0522766113281,
      "completions/min_length": 125.0,
      "completions/min_terminated_length": 125.0,
      "epoch": 11.438483965014576,
      "grad_norm": 0.22037480771541595,
      "learning_rate": 1e-06,
      "loss": -0.0654,
      "num_tokens": 672601212.0,
      "reward": 0.6741071939468384,
      "reward_std": 0.11234975606203079,
      "rewards/verify_math_reward/mean": 0.6741071343421936,
      "rewards/verify_math_reward/std": 0.4689692258834839,
      "step": 1224
    },
    {
      "clip_ratio/high_max": 0.0014426990892388858,
      "clip_ratio/high_mean": 0.0005319876636349363,
      "clip_ratio/low_mean": 0.00028383291578393255,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008158205673680641,
      "completions/clipped_ratio": 0.1395089285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2694.0,
      "completions/mean_length": 1066.0670166015625,
      "completions/mean_terminated_length": 574.8327026367188,
      "completions/min_length": 145.0,
      "completions/min_terminated_length": 145.0,
      "epoch": 11.447813411078718,
      "grad_norm": 1.8511788845062256,
      "learning_rate": 1e-06,
      "loss": -0.0309,
      "num_tokens": 673136368.0,
      "reward": 0.6316964626312256,
      "reward_std": 0.12253418564796448,
      "rewards/verify_math_reward/mean": 0.6316964030265808,
      "rewards/verify_math_reward/std": 0.4826137125492096,
      "step": 1225
    },
    {
      "clip_ratio/high_max": 0.002710782464419026,
      "clip_ratio/high_mean": 0.0009395659435540438,
      "clip_ratio/low_mean": 0.0004538843431873829,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0013934503076598048,
      "completions/clipped_ratio": 0.1227678571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2999.0,
      "completions/mean_length": 969.044677734375,
      "completions/mean_terminated_length": 531.4300537109375,
      "completions/min_length": 135.0,
      "completions/min_terminated_length": 135.0,
      "epoch": 11.457142857142857,
      "grad_norm": 1.7392948865890503,
      "learning_rate": 1e-06,
      "loss": -0.044,
      "num_tokens": 673630488.0,
      "reward": 0.6540178656578064,
      "reward_std": 0.15582531690597534,
      "rewards/verify_math_reward/mean": 0.6540178656578064,
      "rewards/verify_math_reward/std": 0.4759531021118164,
      "step": 1226
    },
    {
      "clip_ratio/high_max": 0.002148006526113022,
      "clip_ratio/high_mean": 0.0008222622636822052,
      "clip_ratio/low_mean": 0.00033328503150187316,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011555473138287198,
      "completions/clipped_ratio": 0.1037946428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2454.0,
      "completions/mean_length": 887.09716796875,
      "completions/mean_terminated_length": 515.4557495117188,
      "completions/min_length": 130.0,
      "completions/min_terminated_length": 130.0,
      "epoch": 11.466472303206997,
      "grad_norm": 0.2571759521961212,
      "learning_rate": 1e-06,
      "loss": -0.0339,
      "num_tokens": 674126407.0,
      "reward": 0.6662946939468384,
      "reward_std": 0.14684367179870605,
      "rewards/verify_math_reward/mean": 0.6662946343421936,
      "rewards/verify_math_reward/std": 0.47179922461509705,
      "step": 1227
    },
    {
      "clip_ratio/high_max": 0.0019398934964556247,
      "clip_ratio/high_mean": 0.000737077415578824,
      "clip_ratio/low_mean": 0.0002981383008773264,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010352157023589825,
      "completions/clipped_ratio": 0.1618303571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3930.0,
      "completions/mean_length": 1102.921875,
      "completions/mean_terminated_length": 525.0305786132812,
      "completions/min_length": 157.0,
      "completions/min_terminated_length": 157.0,
      "epoch": 11.475801749271136,
      "grad_norm": 0.2206743210554123,
      "learning_rate": 1e-06,
      "loss": -0.0705,
      "num_tokens": 674605305.0,
      "reward": 0.613839328289032,
      "reward_std": 0.14004239439964294,
      "rewards/verify_math_reward/mean": 0.6138392686843872,
      "rewards/verify_math_reward/std": 0.48714008927345276,
      "step": 1228
    },
    {
      "clip_ratio/high_max": 0.0017158561931864824,
      "clip_ratio/high_mean": 0.0005499132798831852,
      "clip_ratio/low_mean": 0.0003985656717304664,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009484789752605138,
      "completions/clipped_ratio": 0.1160714285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2172.0,
      "completions/mean_length": 895.8482666015625,
      "completions/mean_terminated_length": 475.6262512207031,
      "completions/min_length": 146.0,
      "completions/min_terminated_length": 146.0,
      "epoch": 11.485131195335278,
      "grad_norm": 0.2786537706851959,
      "learning_rate": 1e-06,
      "loss": -0.0511,
      "num_tokens": 675054073.0,
      "reward": 0.6852678656578064,
      "reward_std": 0.13203828036785126,
      "rewards/verify_math_reward/mean": 0.6852678656578064,
      "rewards/verify_math_reward/std": 0.46466848254203796,
      "step": 1229
    },
    {
      "clip_ratio/high_max": 0.002075848875392694,
      "clip_ratio/high_mean": 0.0008127826840791386,
      "clip_ratio/low_mean": 0.00030058499078222667,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011133676816825755,
      "completions/clipped_ratio": 0.0948660714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3280.0,
      "completions/mean_length": 886.1685791015625,
      "completions/mean_terminated_length": 549.7496948242188,
      "completions/min_length": 131.0,
      "completions/min_terminated_length": 131.0,
      "epoch": 11.494460641399417,
      "grad_norm": 3.2610578536987305,
      "learning_rate": 1e-06,
      "loss": -0.0529,
      "num_tokens": 675591776.0,
      "reward": 0.6875000596046448,
      "reward_std": 0.1626594066619873,
      "rewards/verify_math_reward/mean": 0.6875,
      "rewards/verify_math_reward/std": 0.4637712836265564,
      "step": 1230
    },
    {
      "clip_ratio/high_max": 0.002299333555129124,
      "clip_ratio/high_mean": 0.000734691251636832,
      "clip_ratio/low_mean": 0.00036892001207888825,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011036112609872362,
      "completions/clipped_ratio": 0.1216517857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2260.0,
      "completions/mean_length": 958.3582763671875,
      "completions/mean_terminated_length": 523.7928466796875,
      "completions/min_length": 106.0,
      "completions/min_terminated_length": 106.0,
      "epoch": 11.503790087463557,
      "grad_norm": 0.5798492431640625,
      "learning_rate": 1e-06,
      "loss": -0.0616,
      "num_tokens": 676097265.0,
      "reward": 0.6462053656578064,
      "reward_std": 0.13019494712352753,
      "rewards/verify_math_reward/mean": 0.6462053656578064,
      "rewards/verify_math_reward/std": 0.478413462638855,
      "step": 1231
    },
    {
      "clip_ratio/high_max": 0.0020301885961089283,
      "clip_ratio/high_mean": 0.0006928197844899842,
      "clip_ratio/low_mean": 0.0004166926501056878,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001109512410039315,
      "completions/clipped_ratio": 0.0915178571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3057.0,
      "completions/mean_length": 863.380615234375,
      "completions/mean_terminated_length": 537.73583984375,
      "completions/min_length": 169.0,
      "completions/min_terminated_length": 169.0,
      "epoch": 11.513119533527696,
      "grad_norm": 0.3225386440753937,
      "learning_rate": 1e-06,
      "loss": -0.0313,
      "num_tokens": 676626278.0,
      "reward": 0.6395089626312256,
      "reward_std": 0.1191510558128357,
      "rewards/verify_math_reward/mean": 0.6395089030265808,
      "rewards/verify_math_reward/std": 0.4804111421108246,
      "step": 1232
    },
    {
      "clip_ratio/high_max": 0.002011083000979852,
      "clip_ratio/high_mean": 0.0007108403224265203,
      "clip_ratio/low_mean": 0.0002939858062518397,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001004826135613257,
      "completions/clipped_ratio": 0.1127232142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2558.0,
      "completions/mean_length": 935.97998046875,
      "completions/mean_terminated_length": 534.5182495117188,
      "completions/min_length": 134.0,
      "completions/min_terminated_length": 134.0,
      "epoch": 11.522448979591836,
      "grad_norm": 0.2137346714735031,
      "learning_rate": 1e-06,
      "loss": -0.0589,
      "num_tokens": 677133660.0,
      "reward": 0.6696428656578064,
      "reward_std": 0.14473934471607208,
      "rewards/verify_math_reward/mean": 0.6696428656578064,
      "rewards/verify_math_reward/std": 0.47060438990592957,
      "step": 1233
    },
    {
      "clip_ratio/high_max": 0.0016349138531950302,
      "clip_ratio/high_mean": 0.0005710250661650207,
      "clip_ratio/low_mean": 0.0003529160185280489,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009239410883310484,
      "completions/clipped_ratio": 0.1049107142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2994.0,
      "completions/mean_length": 926.4553833007812,
      "completions/mean_terminated_length": 554.9625854492188,
      "completions/min_length": 130.0,
      "completions/min_terminated_length": 130.0,
      "epoch": 11.531778425655977,
      "grad_norm": 0.1969604343175888,
      "learning_rate": 1e-06,
      "loss": -0.0663,
      "num_tokens": 677670220.0,
      "reward": 0.6819196939468384,
      "reward_std": 0.12422666698694229,
      "rewards/verify_math_reward/mean": 0.6819196343421936,
      "rewards/verify_math_reward/std": 0.46599099040031433,
      "step": 1234
    },
    {
      "clip_ratio/high_max": 0.001964829079952324,
      "clip_ratio/high_mean": 0.000721224840162904,
      "clip_ratio/low_mean": 0.0002398229823938891,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009610478173271986,
      "completions/clipped_ratio": 0.1428571428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3976.0,
      "completions/mean_length": 1073.9788818359375,
      "completions/mean_terminated_length": 570.30859375,
      "completions/min_length": 79.0,
      "completions/min_terminated_length": 79.0,
      "epoch": 11.541107871720117,
      "grad_norm": 0.22050292789936066,
      "learning_rate": 1e-06,
      "loss": -0.07,
      "num_tokens": 678196345.0,
      "reward": 0.6127232313156128,
      "reward_std": 0.13752757012844086,
      "rewards/verify_math_reward/mean": 0.6127232313156128,
      "rewards/verify_math_reward/std": 0.4873998463153839,
      "step": 1235
    },
    {
      "clip_ratio/high_max": 0.0018077477980114054,
      "clip_ratio/high_mean": 0.0006138875123724574,
      "clip_ratio/low_mean": 0.0003746912752831122,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009885787840175908,
      "completions/clipped_ratio": 0.1227678571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3746.0,
      "completions/mean_length": 1013.685302734375,
      "completions/mean_terminated_length": 582.3180541992188,
      "completions/min_length": 167.0,
      "completions/min_terminated_length": 167.0,
      "epoch": 11.550437317784256,
      "grad_norm": 0.2914097011089325,
      "learning_rate": 1e-06,
      "loss": -0.0516,
      "num_tokens": 678738279.0,
      "reward": 0.6573660969734192,
      "reward_std": 0.12317540496587753,
      "rewards/verify_math_reward/mean": 0.6573660969734192,
      "rewards/verify_math_reward/std": 0.47485533356666565,
      "step": 1236
    },
    {
      "clip_ratio/high_max": 0.0020769256880157627,
      "clip_ratio/high_mean": 0.0007838482542865677,
      "clip_ratio/low_mean": 0.0006062004013074329,
      "clip_ratio/low_min": 1.75168170244433e-05,
      "clip_ratio/region_mean": 0.0013900486737838946,
      "completions/clipped_ratio": 0.1216517857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3955.0,
      "completions/mean_length": 977.091552734375,
      "completions/mean_terminated_length": 545.1206665039062,
      "completions/min_length": 141.0,
      "completions/min_terminated_length": 141.0,
      "epoch": 11.559766763848396,
      "grad_norm": 0.3013603985309601,
      "learning_rate": 1e-06,
      "loss": -0.0728,
      "num_tokens": 679272337.0,
      "reward": 0.6428571939468384,
      "reward_std": 0.156651571393013,
      "rewards/verify_math_reward/mean": 0.6428571343421936,
      "rewards/verify_math_reward/std": 0.4794250428676605,
      "step": 1237
    },
    {
      "clip_ratio/high_max": 0.0019671481240948197,
      "clip_ratio/high_mean": 0.0007069959083310096,
      "clip_ratio/low_mean": 0.00036611780433304375,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001073113686288707,
      "completions/clipped_ratio": 0.0982142857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2136.0,
      "completions/mean_length": 891.2109985351562,
      "completions/mean_terminated_length": 542.1744995117188,
      "completions/min_length": 173.0,
      "completions/min_terminated_length": 173.0,
      "epoch": 11.569096209912537,
      "grad_norm": 0.3788949251174927,
      "learning_rate": 1e-06,
      "loss": -0.0621,
      "num_tokens": 679798614.0,
      "reward": 0.6875000596046448,
      "reward_std": 0.15507742762565613,
      "rewards/verify_math_reward/mean": 0.6875,
      "rewards/verify_math_reward/std": 0.4637712836265564,
      "step": 1238
    },
    {
      "clip_ratio/high_max": 0.0014797958465351257,
      "clip_ratio/high_mean": 0.000537880518095335,
      "clip_ratio/low_mean": 0.0003793309106185916,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009172114514512941,
      "completions/clipped_ratio": 0.0736607142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2733.0,
      "completions/mean_length": 793.6295166015625,
      "completions/mean_terminated_length": 531.0313110351562,
      "completions/min_length": 147.0,
      "completions/min_terminated_length": 147.0,
      "epoch": 11.578425655976677,
      "grad_norm": 0.39991965889930725,
      "learning_rate": 1e-06,
      "loss": -0.0315,
      "num_tokens": 680326818.0,
      "reward": 0.6953125596046448,
      "reward_std": 0.12628935277462006,
      "rewards/verify_math_reward/mean": 0.6953125,
      "rewards/verify_math_reward/std": 0.4605320394039154,
      "step": 1239
    },
    {
      "clip_ratio/high_max": 0.0015092982448550174,
      "clip_ratio/high_mean": 0.0005479157616719021,
      "clip_ratio/low_mean": 0.0001647114479510492,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007126272200821404,
      "completions/clipped_ratio": 0.1417410714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2524.0,
      "completions/mean_length": 1020.1842041015625,
      "completions/mean_terminated_length": 512.2145385742188,
      "completions/min_length": 138.0,
      "completions/min_terminated_length": 138.0,
      "epoch": 11.587755102040816,
      "grad_norm": 0.27111905813217163,
      "learning_rate": 1e-06,
      "loss": -0.0542,
      "num_tokens": 680815063.0,
      "reward": 0.6540178656578064,
      "reward_std": 0.0922047421336174,
      "rewards/verify_math_reward/mean": 0.6540178656578064,
      "rewards/verify_math_reward/std": 0.4759531021118164,
      "step": 1240
    },
    {
      "clip_ratio/high_max": 0.0017421358643332496,
      "clip_ratio/high_mean": 0.0006400034226317075,
      "clip_ratio/low_mean": 0.0006353565436256758,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012753599276038585,
      "completions/clipped_ratio": 0.1227678571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2287.0,
      "completions/mean_length": 963.450927734375,
      "completions/mean_terminated_length": 525.053466796875,
      "completions/min_length": 126.0,
      "completions/min_terminated_length": 126.0,
      "epoch": 11.597084548104956,
      "grad_norm": 0.29176339507102966,
      "learning_rate": 1e-06,
      "loss": -0.0386,
      "num_tokens": 681332923.0,
      "reward": 0.625,
      "reward_std": 0.13470715284347534,
      "rewards/verify_math_reward/mean": 0.625,
      "rewards/verify_math_reward/std": 0.48439329862594604,
      "step": 1241
    },
    {
      "clip_ratio/high_max": 0.001956000065547414,
      "clip_ratio/high_mean": 0.0006792026015318697,
      "clip_ratio/low_mean": 0.00037941193295409903,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010586145435809158,
      "completions/clipped_ratio": 0.1104910714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3704.0,
      "completions/mean_length": 940.56591796875,
      "completions/mean_terminated_length": 548.6110229492188,
      "completions/min_length": 155.0,
      "completions/min_terminated_length": 155.0,
      "epoch": 11.606413994169095,
      "grad_norm": 0.23168037831783295,
      "learning_rate": 1e-06,
      "loss": -0.0385,
      "num_tokens": 681872646.0,
      "reward": 0.59375,
      "reward_std": 0.13887983560562134,
      "rewards/verify_math_reward/mean": 0.59375,
      "rewards/verify_math_reward/std": 0.4914066195487976,
      "step": 1242
    },
    {
      "clip_ratio/high_max": 0.0012582398649101378,
      "clip_ratio/high_mean": 0.00035168908289051615,
      "clip_ratio/low_mean": 0.00027861314674737514,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006303022291831439,
      "completions/clipped_ratio": 0.125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2416.0,
      "completions/mean_length": 991.114990234375,
      "completions/mean_terminated_length": 547.5599365234375,
      "completions/min_length": 143.0,
      "completions/min_terminated_length": 143.0,
      "epoch": 11.615743440233237,
      "grad_norm": 0.40971654653549194,
      "learning_rate": 1e-06,
      "loss": -0.0246,
      "num_tokens": 682390493.0,
      "reward": 0.6305803656578064,
      "reward_std": 0.0925096645951271,
      "rewards/verify_math_reward/mean": 0.6305803656578064,
      "rewards/verify_math_reward/std": 0.4829172194004059,
      "step": 1243
    },
    {
      "clip_ratio/high_max": 0.0018434973389958031,
      "clip_ratio/high_mean": 0.0006250663341234031,
      "clip_ratio/low_mean": 0.0002022051241965528,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008272714767372236,
      "completions/clipped_ratio": 0.1104910714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3189.0,
      "completions/mean_length": 910.8951416015625,
      "completions/mean_terminated_length": 515.2546997070312,
      "completions/min_length": 141.0,
      "completions/min_terminated_length": 141.0,
      "epoch": 11.625072886297376,
      "grad_norm": 0.477720707654953,
      "learning_rate": 1e-06,
      "loss": -0.059,
      "num_tokens": 682889359.0,
      "reward": 0.738839328289032,
      "reward_std": 0.10911563783884048,
      "rewards/verify_math_reward/mean": 0.7388392686843872,
      "rewards/verify_math_reward/std": 0.439512699842453,
      "step": 1244
    },
    {
      "clip_ratio/high_max": 0.0018341023751418106,
      "clip_ratio/high_mean": 0.0005889217791263945,
      "clip_ratio/low_mean": 0.00040872691602089617,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000997648705379106,
      "completions/clipped_ratio": 0.1205357142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3530.0,
      "completions/mean_length": 984.3917846679688,
      "completions/mean_terminated_length": 557.9276733398438,
      "completions/min_length": 141.0,
      "completions/min_terminated_length": 141.0,
      "epoch": 11.634402332361516,
      "grad_norm": 0.3170127272605896,
      "learning_rate": 1e-06,
      "loss": -0.0396,
      "num_tokens": 683416678.0,
      "reward": 0.6439732313156128,
      "reward_std": 0.1159629374742508,
      "rewards/verify_math_reward/mean": 0.6439732313156128,
      "rewards/verify_math_reward/std": 0.47909072041511536,
      "step": 1245
    },
    {
      "clip_ratio/high_max": 0.0016940337845881004,
      "clip_ratio/high_mean": 0.0006195336763994419,
      "clip_ratio/low_mean": 0.00040580420227342984,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010253378804918611,
      "completions/clipped_ratio": 0.1350446428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3234.0,
      "completions/mean_length": 1029.696533203125,
      "completions/mean_terminated_length": 550.9574584960938,
      "completions/min_length": 150.0,
      "completions/min_terminated_length": 150.0,
      "epoch": 11.643731778425655,
      "grad_norm": 0.32127144932746887,
      "learning_rate": 1e-06,
      "loss": -0.0694,
      "num_tokens": 683932014.0,
      "reward": 0.6205357313156128,
      "reward_std": 0.15270251035690308,
      "rewards/verify_math_reward/mean": 0.6205357313156128,
      "rewards/verify_math_reward/std": 0.4855247139930725,
      "step": 1246
    },
    {
      "clip_ratio/high_max": 0.0016594558692304417,
      "clip_ratio/high_mean": 0.000595131909904012,
      "clip_ratio/low_mean": 0.0003704376595123904,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009655695648689289,
      "completions/clipped_ratio": 0.1205357142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3752.0,
      "completions/mean_length": 953.7344360351562,
      "completions/mean_terminated_length": 523.0685424804688,
      "completions/min_length": 138.0,
      "completions/min_terminated_length": 138.0,
      "epoch": 11.653061224489797,
      "grad_norm": 0.24881182610988617,
      "learning_rate": 1e-06,
      "loss": -0.0798,
      "num_tokens": 684433376.0,
      "reward": 0.6629464626312256,
      "reward_std": 0.1361381858587265,
      "rewards/verify_math_reward/mean": 0.6629464030265808,
      "rewards/verify_math_reward/std": 0.47296738624572754,
      "step": 1247
    },
    {
      "clip_ratio/high_max": 0.0023023031317279674,
      "clip_ratio/high_mean": 0.0010129041438631248,
      "clip_ratio/low_mean": 0.00036400539784153807,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0013769095603493042,
      "completions/clipped_ratio": 0.1517857142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3806.0,
      "completions/mean_length": 1055.55810546875,
      "completions/mean_terminated_length": 511.4789733886719,
      "completions/min_length": 136.0,
      "completions/min_terminated_length": 136.0,
      "epoch": 11.662390670553936,
      "grad_norm": 0.2707832157611847,
      "learning_rate": 1e-06,
      "loss": -0.0906,
      "num_tokens": 684902212.0,
      "reward": 0.65625,
      "reward_std": 0.16604438424110413,
      "rewards/verify_math_reward/mean": 0.65625,
      "rewards/verify_math_reward/std": 0.4752241373062134,
      "step": 1248
    },
    {
      "clip_ratio/high_max": 0.0018992138502653688,
      "clip_ratio/high_mean": 0.0005970289803371998,
      "clip_ratio/low_mean": 0.0004722713022147218,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010693002732296009,
      "completions/clipped_ratio": 0.1529017857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3183.0,
      "completions/mean_length": 1094.62841796875,
      "completions/mean_terminated_length": 552.8787841796875,
      "completions/min_length": 144.0,
      "completions/min_terminated_length": 144.0,
      "epoch": 11.671720116618076,
      "grad_norm": 0.3415383994579315,
      "learning_rate": 1e-06,
      "loss": -0.0579,
      "num_tokens": 685406431.0,
      "reward": 0.6328125,
      "reward_std": 0.15638872981071472,
      "rewards/verify_math_reward/mean": 0.6328125,
      "rewards/verify_math_reward/std": 0.48230743408203125,
      "step": 1249
    },
    {
      "clip_ratio/high_max": 0.0018159186165576102,
      "clip_ratio/high_mean": 0.0006874466489534825,
      "clip_ratio/low_mean": 0.0004209075045764621,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011083541467087343,
      "completions/clipped_ratio": 0.1194196428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3336.0,
      "completions/mean_length": 974.4420166015625,
      "completions/mean_terminated_length": 551.11279296875,
      "completions/min_length": 124.0,
      "completions/min_terminated_length": 124.0,
      "epoch": 11.681049562682215,
      "grad_norm": 0.5837641954421997,
      "learning_rate": 1e-06,
      "loss": -0.0623,
      "num_tokens": 685932931.0,
      "reward": 0.6149553656578064,
      "reward_std": 0.14091001451015472,
      "rewards/verify_math_reward/mean": 0.6149553656578064,
      "rewards/verify_math_reward/std": 0.4868776500225067,
      "step": 1250
    },
    {
      "clip_ratio/high_max": 0.0017719335919537116,
      "clip_ratio/high_mean": 0.0006121484057075577,
      "clip_ratio/low_mean": 0.0004516656276791764,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010638140465744073,
      "completions/clipped_ratio": 0.1149553571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2529.0,
      "completions/mean_length": 981.82373046875,
      "completions/mean_terminated_length": 577.3341674804688,
      "completions/min_length": 169.0,
      "completions/min_terminated_length": 169.0,
      "epoch": 11.690379008746355,
      "grad_norm": 33.79458236694336,
      "learning_rate": 1e-06,
      "loss": -0.028,
      "num_tokens": 686482749.0,
      "reward": 0.5892857313156128,
      "reward_std": 0.15112948417663574,
      "rewards/verify_math_reward/mean": 0.5892857313156128,
      "rewards/verify_math_reward/std": 0.49223825335502625,
      "step": 1251
    },
    {
      "clip_ratio/high_max": 0.001680741032032529,
      "clip_ratio/high_mean": 0.0006303282416411093,
      "clip_ratio/low_mean": 0.0002803527818286966,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009106810484809102,
      "completions/clipped_ratio": 0.0948660714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3023.0,
      "completions/mean_length": 853.2388916015625,
      "completions/mean_terminated_length": 513.36865234375,
      "completions/min_length": 136.0,
      "completions/min_terminated_length": 136.0,
      "epoch": 11.699708454810496,
      "grad_norm": 0.3022686243057251,
      "learning_rate": 1e-06,
      "loss": -0.0653,
      "num_tokens": 686995531.0,
      "reward": 0.723214328289032,
      "reward_std": 0.12952165305614471,
      "rewards/verify_math_reward/mean": 0.7232142686843872,
      "rewards/verify_math_reward/std": 0.44765952229499817,
      "step": 1252
    },
    {
      "clip_ratio/high_max": 0.0016854898094607051,
      "clip_ratio/high_mean": 0.0006491483527497621,
      "clip_ratio/low_mean": 0.0002343011179846144,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008834494910843205,
      "completions/clipped_ratio": 0.109375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3098.0,
      "completions/mean_length": 949.6585083007812,
      "completions/mean_terminated_length": 563.2656860351562,
      "completions/min_length": 140.0,
      "completions/min_terminated_length": 140.0,
      "epoch": 11.709037900874636,
      "grad_norm": 0.17894645035266876,
      "learning_rate": 1e-06,
      "loss": -0.0739,
      "num_tokens": 687531945.0,
      "reward": 0.6908482313156128,
      "reward_std": 0.13361947238445282,
      "rewards/verify_math_reward/mean": 0.6908482313156128,
      "rewards/verify_math_reward/std": 0.46240198612213135,
      "step": 1253
    },
    {
      "clip_ratio/high_max": 0.0015656596842745785,
      "clip_ratio/high_mean": 0.0005110302818138734,
      "clip_ratio/low_mean": 0.0003575194359655143,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008685497214173665,
      "completions/clipped_ratio": 0.1350446428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3553.0,
      "completions/mean_length": 1021.6339721679688,
      "completions/mean_terminated_length": 541.6361694335938,
      "completions/min_length": 158.0,
      "completions/min_terminated_length": 158.0,
      "epoch": 11.718367346938775,
      "grad_norm": 1.2602882385253906,
      "learning_rate": 1e-06,
      "loss": -0.0486,
      "num_tokens": 688041305.0,
      "reward": 0.6037946939468384,
      "reward_std": 0.11614587157964706,
      "rewards/verify_math_reward/mean": 0.6037946343421936,
      "rewards/verify_math_reward/std": 0.48938122391700745,
      "step": 1254
    },
    {
      "clip_ratio/high_max": 0.0017286953152506612,
      "clip_ratio/high_mean": 0.0005943849428149406,
      "clip_ratio/low_mean": 0.0003327131453261245,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009270980845030863,
      "completions/clipped_ratio": 0.1428571428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3004.0,
      "completions/mean_length": 1068.630615234375,
      "completions/mean_terminated_length": 564.0690307617188,
      "completions/min_length": 132.0,
      "completions/min_terminated_length": 132.0,
      "epoch": 11.727696793002915,
      "grad_norm": 0.2530302107334137,
      "learning_rate": 1e-06,
      "loss": -0.0692,
      "num_tokens": 688569622.0,
      "reward": 0.6261160969734192,
      "reward_std": 0.14142926037311554,
      "rewards/verify_math_reward/mean": 0.6261160969734192,
      "rewards/verify_math_reward/std": 0.48410359025001526,
      "step": 1255
    },
    {
      "clip_ratio/high_max": 0.001845830185629893,
      "clip_ratio/high_mean": 0.0006954295513423858,
      "clip_ratio/low_mean": 0.00024846552241797326,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009438950601179386,
      "completions/clipped_ratio": 0.1272321428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2203.0,
      "completions/mean_length": 979.6160888671875,
      "completions/mean_terminated_length": 525.3094482421875,
      "completions/min_length": 130.0,
      "completions/min_terminated_length": 130.0,
      "epoch": 11.737026239067056,
      "grad_norm": 0.45033928751945496,
      "learning_rate": 1e-06,
      "loss": -0.0707,
      "num_tokens": 689072446.0,
      "reward": 0.6462053656578064,
      "reward_std": 0.12474842369556427,
      "rewards/verify_math_reward/mean": 0.6462053656578064,
      "rewards/verify_math_reward/std": 0.478413462638855,
      "step": 1256
    },
    {
      "clip_ratio/high_max": 0.0014097109124122653,
      "clip_ratio/high_mean": 0.000530242428794736,
      "clip_ratio/low_mean": 0.00046636186380055733,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009966042889573146,
      "completions/clipped_ratio": 0.1116071428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2693.0,
      "completions/mean_length": 971.5513916015625,
      "completions/mean_terminated_length": 579.0326538085938,
      "completions/min_length": 136.0,
      "completions/min_terminated_length": 136.0,
      "epoch": 11.746355685131196,
      "grad_norm": 0.343485027551651,
      "learning_rate": 1e-06,
      "loss": -0.0321,
      "num_tokens": 689624204.0,
      "reward": 0.6272321939468384,
      "reward_std": 0.13159321248531342,
      "rewards/verify_math_reward/mean": 0.6272321343421936,
      "rewards/verify_math_reward/std": 0.4838111698627472,
      "step": 1257
    },
    {
      "clip_ratio/high_max": 0.001955183852260234,
      "clip_ratio/high_mean": 0.0006714449082210194,
      "clip_ratio/low_mean": 0.00032876396107894834,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010002088820328936,
      "completions/clipped_ratio": 0.1595982142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3088.0,
      "completions/mean_length": 1104.8984375,
      "completions/mean_terminated_length": 536.8671875,
      "completions/min_length": 138.0,
      "completions/min_terminated_length": 138.0,
      "epoch": 11.755685131195335,
      "grad_norm": 0.2291107475757599,
      "learning_rate": 1e-06,
      "loss": -0.0566,
      "num_tokens": 690123401.0,
      "reward": 0.5959821939468384,
      "reward_std": 0.12407512217760086,
      "rewards/verify_math_reward/mean": 0.5959821343421936,
      "rewards/verify_math_reward/std": 0.4909749925136566,
      "step": 1258
    },
    {
      "clip_ratio/high_max": 0.002204755186539842,
      "clip_ratio/high_mean": 0.0006613614950765623,
      "clip_ratio/low_mean": 0.000552472462914011,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012138339479861315,
      "completions/clipped_ratio": 0.1238839285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4074.0,
      "completions/mean_length": 982.8013916015625,
      "completions/mean_terminated_length": 542.591064453125,
      "completions/min_length": 154.0,
      "completions/min_terminated_length": 154.0,
      "epoch": 11.765014577259475,
      "grad_norm": 0.7464296221733093,
      "learning_rate": 1e-06,
      "loss": -0.0561,
      "num_tokens": 690640767.0,
      "reward": 0.6428571939468384,
      "reward_std": 0.15244035422801971,
      "rewards/verify_math_reward/mean": 0.6428571343421936,
      "rewards/verify_math_reward/std": 0.4794250428676605,
      "step": 1259
    },
    {
      "clip_ratio/high_max": 0.0015923376358841779,
      "clip_ratio/high_mean": 0.0005037416613049572,
      "clip_ratio/low_mean": 0.00041081587005464826,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009145575368165737,
      "completions/clipped_ratio": 0.0703125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3388.0,
      "completions/mean_length": 765.8739013671875,
      "completions/mean_terminated_length": 514.0155639648438,
      "completions/min_length": 163.0,
      "completions/min_terminated_length": 163.0,
      "epoch": 11.774344023323614,
      "grad_norm": 0.33046114444732666,
      "learning_rate": 1e-06,
      "loss": -0.0238,
      "num_tokens": 691153182.0,
      "reward": 0.7031250596046448,
      "reward_std": 0.1176094189286232,
      "rewards/verify_math_reward/mean": 0.703125,
      "rewards/verify_math_reward/std": 0.4571361541748047,
      "step": 1260
    },
    {
      "clip_ratio/high_max": 0.0019095545285381377,
      "clip_ratio/high_mean": 0.000722666032743291,
      "clip_ratio/low_mean": 0.0004577948702717549,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.00118046090574353,
      "completions/clipped_ratio": 0.0747767857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2437.0,
      "completions/mean_length": 763.935302734375,
      "completions/mean_terminated_length": 494.6369323730469,
      "completions/min_length": 145.0,
      "completions/min_terminated_length": 145.0,
      "epoch": 11.783673469387756,
      "grad_norm": 0.48128291964530945,
      "learning_rate": 1e-06,
      "loss": -0.0271,
      "num_tokens": 691646292.0,
      "reward": 0.7109375596046448,
      "reward_std": 0.15439637005329132,
      "rewards/verify_math_reward/mean": 0.7109375,
      "rewards/verify_math_reward/std": 0.45358020067214966,
      "step": 1261
    },
    {
      "clip_ratio/high_max": 0.0018459937164152507,
      "clip_ratio/high_mean": 0.0006099835327404435,
      "clip_ratio/low_mean": 0.00023658017335037584,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008465637074550614,
      "completions/clipped_ratio": 0.1171875,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2382.0,
      "completions/mean_length": 926.06591796875,
      "completions/mean_terminated_length": 505.27813720703125,
      "completions/min_length": 144.0,
      "completions/min_terminated_length": 144.0,
      "epoch": 11.793002915451895,
      "grad_norm": 0.2681536078453064,
      "learning_rate": 1e-06,
      "loss": -0.0702,
      "num_tokens": 692132271.0,
      "reward": 0.699776828289032,
      "reward_std": 0.13233955204486847,
      "rewards/verify_math_reward/mean": 0.6997767686843872,
      "rewards/verify_math_reward/std": 0.4586108922958374,
      "step": 1262
    },
    {
      "clip_ratio/high_max": 0.0020512671435426455,
      "clip_ratio/high_mean": 0.0007380642273346893,
      "clip_ratio/low_mean": 0.0005563448394241277,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012944090449309442,
      "completions/clipped_ratio": 0.1350446428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4067.0,
      "completions/mean_length": 1052.86279296875,
      "completions/mean_terminated_length": 577.7406616210938,
      "completions/min_length": 172.0,
      "completions/min_terminated_length": 172.0,
      "epoch": 11.802332361516035,
      "grad_norm": 0.269562691450119,
      "learning_rate": 1e-06,
      "loss": -0.0302,
      "num_tokens": 692665740.0,
      "reward": 0.6183035969734192,
      "reward_std": 0.15356972813606262,
      "rewards/verify_math_reward/mean": 0.6183035969734192,
      "rewards/verify_math_reward/std": 0.4860740303993225,
      "step": 1263
    },
    {
      "clip_ratio/high_max": 0.0021905639296164736,
      "clip_ratio/high_mean": 0.0007840562648198102,
      "clip_ratio/low_mean": 0.00043938425301348616,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012234405403432902,
      "completions/clipped_ratio": 0.1205357142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3016.0,
      "completions/mean_length": 964.1864013671875,
      "completions/mean_terminated_length": 534.9530639648438,
      "completions/min_length": 144.0,
      "completions/min_terminated_length": 144.0,
      "epoch": 11.811661807580174,
      "grad_norm": 0.29455795884132385,
      "learning_rate": 1e-06,
      "loss": -0.0888,
      "num_tokens": 693174363.0,
      "reward": 0.6863839626312256,
      "reward_std": 0.15064051747322083,
      "rewards/verify_math_reward/mean": 0.6863839030265808,
      "rewards/verify_math_reward/std": 0.46422141790390015,
      "step": 1264
    },
    {
      "clip_ratio/high_max": 0.0012735659529425902,
      "clip_ratio/high_mean": 0.0003974678108988883,
      "clip_ratio/low_mean": 0.0003051622916245833,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007026301009318558,
      "completions/clipped_ratio": 0.1651785714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2422.0,
      "completions/mean_length": 1124.55029296875,
      "completions/mean_terminated_length": 536.6163330078125,
      "completions/min_length": 142.0,
      "completions/min_terminated_length": 142.0,
      "epoch": 11.820991253644316,
      "grad_norm": 0.2692880630493164,
      "learning_rate": 1e-06,
      "loss": -0.0578,
      "num_tokens": 693664416.0,
      "reward": 0.6183035969734192,
      "reward_std": 0.10562442243099213,
      "rewards/verify_math_reward/mean": 0.6183035969734192,
      "rewards/verify_math_reward/std": 0.4860740303993225,
      "step": 1265
    },
    {
      "clip_ratio/high_max": 0.0020158496881776955,
      "clip_ratio/high_mean": 0.0006687959939881694,
      "clip_ratio/low_mean": 0.0004615484576788731,
      "clip_ratio/low_min": 1.9635563148767687e-05,
      "clip_ratio/region_mean": 0.001130344440753106,
      "completions/clipped_ratio": 0.1238839285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3928.0,
      "completions/mean_length": 1009.591552734375,
      "completions/mean_terminated_length": 573.16943359375,
      "completions/min_length": 175.0,
      "completions/min_terminated_length": 175.0,
      "epoch": 11.830320699708455,
      "grad_norm": 0.2503564655780792,
      "learning_rate": 1e-06,
      "loss": -0.0615,
      "num_tokens": 694197394.0,
      "reward": 0.5892857313156128,
      "reward_std": 0.16093555092811584,
      "rewards/verify_math_reward/mean": 0.5892857313156128,
      "rewards/verify_math_reward/std": 0.49223825335502625,
      "step": 1266
    },
    {
      "clip_ratio/high_max": 0.0014823563906247728,
      "clip_ratio/high_mean": 0.0005556269861699548,
      "clip_ratio/low_mean": 0.0004243018215674965,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009799288218346192,
      "completions/clipped_ratio": 0.1149553571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2915.0,
      "completions/mean_length": 956.130615234375,
      "completions/mean_terminated_length": 548.3038940429688,
      "completions/min_length": 134.0,
      "completions/min_terminated_length": 134.0,
      "epoch": 11.839650145772595,
      "grad_norm": 1.6916627883911133,
      "learning_rate": 1e-06,
      "loss": -0.0212,
      "num_tokens": 694724591.0,
      "reward": 0.6595982313156128,
      "reward_std": 0.12223179638385773,
      "rewards/verify_math_reward/mean": 0.6595982313156128,
      "rewards/verify_math_reward/std": 0.4741089344024658,
      "step": 1267
    },
    {
      "clip_ratio/high_max": 0.002956606214866042,
      "clip_ratio/high_mean": 0.0009120819522649981,
      "clip_ratio/low_mean": 0.0005626539095828775,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0014747358582098968,
      "completions/clipped_ratio": 0.1138392857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2721.0,
      "completions/mean_length": 924.4553833007812,
      "completions/mean_terminated_length": 517.0277099609375,
      "completions/min_length": 140.0,
      "completions/min_terminated_length": 140.0,
      "epoch": 11.848979591836734,
      "grad_norm": 0.2784743905067444,
      "learning_rate": 1e-06,
      "loss": -0.0509,
      "num_tokens": 695238159.0,
      "reward": 0.6361607313156128,
      "reward_std": 0.13914084434509277,
      "rewards/verify_math_reward/mean": 0.6361607313156128,
      "rewards/verify_math_reward/std": 0.4813718795776367,
      "step": 1268
    },
    {
      "clip_ratio/high_max": 0.0021453729314089287,
      "clip_ratio/high_mean": 0.0007798439692123793,
      "clip_ratio/low_mean": 0.00044218389143679815,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001222027865878772,
      "completions/clipped_ratio": 0.0959821428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2421.0,
      "completions/mean_length": 854.2511596679688,
      "completions/mean_terminated_length": 510.0654602050781,
      "completions/min_length": 140.0,
      "completions/min_terminated_length": 140.0,
      "epoch": 11.858309037900874,
      "grad_norm": 0.39456063508987427,
      "learning_rate": 1e-06,
      "loss": -0.0231,
      "num_tokens": 695739280.0,
      "reward": 0.637276828289032,
      "reward_std": 0.1486467719078064,
      "rewards/verify_math_reward/mean": 0.6372767686843872,
      "rewards/verify_math_reward/std": 0.481054425239563,
      "step": 1269
    },
    {
      "clip_ratio/high_max": 0.001790165355487261,
      "clip_ratio/high_mean": 0.0006870819424875663,
      "clip_ratio/low_mean": 0.00032956657150862156,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010166485153604299,
      "completions/clipped_ratio": 0.1037946428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3141.0,
      "completions/mean_length": 889.1663208007812,
      "completions/mean_terminated_length": 517.7645874023438,
      "completions/min_length": 144.0,
      "completions/min_terminated_length": 144.0,
      "epoch": 11.867638483965015,
      "grad_norm": 0.5777519941329956,
      "learning_rate": 1e-06,
      "loss": -0.0527,
      "num_tokens": 696246581.0,
      "reward": 0.6339285969734192,
      "reward_std": 0.13069167733192444,
      "rewards/verify_math_reward/mean": 0.6339285969734192,
      "rewards/verify_math_reward/std": 0.48199835419654846,
      "step": 1270
    },
    {
      "clip_ratio/high_max": 0.0021682983788195997,
      "clip_ratio/high_mean": 0.0007505781686631963,
      "clip_ratio/low_mean": 0.00047273177187889814,
      "clip_ratio/low_min": 1.694685488473624e-05,
      "clip_ratio/region_mean": 0.001223309966007946,
      "completions/clipped_ratio": 0.109375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4037.0,
      "completions/mean_length": 896.607177734375,
      "completions/mean_terminated_length": 503.6992492675781,
      "completions/min_length": 138.0,
      "completions/min_terminated_length": 138.0,
      "epoch": 11.876967930029155,
      "grad_norm": 0.24833278357982635,
      "learning_rate": 1e-06,
      "loss": -0.0271,
      "num_tokens": 696744933.0,
      "reward": 0.6941964626312256,
      "reward_std": 0.12464035302400589,
      "rewards/verify_math_reward/mean": 0.6941964030265808,
      "rewards/verify_math_reward/std": 0.4610042870044708,
      "step": 1271
    },
    {
      "clip_ratio/high_max": 0.002244793126010336,
      "clip_ratio/high_mean": 0.0007893917318142485,
      "clip_ratio/low_mean": 0.00043416947755758883,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012235612084623426,
      "completions/clipped_ratio": 0.1305803571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3543.0,
      "completions/mean_length": 997.3404541015625,
      "completions/mean_terminated_length": 531.94482421875,
      "completions/min_length": 175.0,
      "completions/min_terminated_length": 175.0,
      "epoch": 11.886297376093294,
      "grad_norm": 0.33796581625938416,
      "learning_rate": 1e-06,
      "loss": -0.0663,
      "num_tokens": 697254414.0,
      "reward": 0.621651828289032,
      "reward_std": 0.1480829268693924,
      "rewards/verify_math_reward/mean": 0.6216517686843872,
      "rewards/verify_math_reward/std": 0.4852459728717804,
      "step": 1272
    },
    {
      "clip_ratio/high_max": 0.0017303563763562124,
      "clip_ratio/high_mean": 0.0005526193217519904,
      "clip_ratio/low_mean": 0.0003969038589275442,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009495231497567147,
      "completions/clipped_ratio": 0.1294642857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3646.0,
      "completions/mean_length": 1010.5313110351562,
      "completions/mean_terminated_length": 551.6666870117188,
      "completions/min_length": 141.0,
      "completions/min_terminated_length": 141.0,
      "epoch": 11.895626822157434,
      "grad_norm": 0.29500406980514526,
      "learning_rate": 1e-06,
      "loss": -0.0613,
      "num_tokens": 697773394.0,
      "reward": 0.6328125,
      "reward_std": 0.11547327041625977,
      "rewards/verify_math_reward/mean": 0.6328125,
      "rewards/verify_math_reward/std": 0.48230743408203125,
      "step": 1273
    },
    {
      "clip_ratio/high_max": 0.001746648660628125,
      "clip_ratio/high_mean": 0.0005820595306431642,
      "clip_ratio/low_mean": 0.0003811074420809746,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009631669599912129,
      "completions/clipped_ratio": 0.1160714285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2596.0,
      "completions/mean_length": 974.8694458007812,
      "completions/mean_terminated_length": 565.0239868164062,
      "completions/min_length": 143.0,
      "completions/min_terminated_length": 143.0,
      "epoch": 11.904956268221575,
      "grad_norm": 0.25987085700035095,
      "learning_rate": 1e-06,
      "loss": -0.0483,
      "num_tokens": 698310221.0,
      "reward": 0.6026785969734192,
      "reward_std": 0.14627917110919952,
      "rewards/verify_math_reward/mean": 0.6026785969734192,
      "rewards/verify_math_reward/std": 0.48961687088012695,
      "step": 1274
    },
    {
      "clip_ratio/high_max": 0.001714388436084846,
      "clip_ratio/high_mean": 0.000591999774769647,
      "clip_ratio/low_mean": 0.0003437696518631128,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009357694325444754,
      "completions/clipped_ratio": 0.1473214285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3182.0,
      "completions/mean_length": 1107.009033203125,
      "completions/mean_terminated_length": 590.5863647460938,
      "completions/min_length": 125.0,
      "completions/min_terminated_length": 125.0,
      "epoch": 11.914285714285715,
      "grad_norm": 0.227379709482193,
      "learning_rate": 1e-06,
      "loss": -0.0663,
      "num_tokens": 698852605.0,
      "reward": 0.6328125,
      "reward_std": 0.13606080412864685,
      "rewards/verify_math_reward/mean": 0.6328125,
      "rewards/verify_math_reward/std": 0.48230743408203125,
      "step": 1275
    },
    {
      "clip_ratio/high_max": 0.002196255649323575,
      "clip_ratio/high_mean": 0.0007929484127089381,
      "clip_ratio/low_mean": 0.0003965649657402537,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001189513372082729,
      "completions/clipped_ratio": 0.109375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2959.0,
      "completions/mean_length": 918.0491333007812,
      "completions/mean_terminated_length": 527.7744140625,
      "completions/min_length": 136.0,
      "completions/min_terminated_length": 136.0,
      "epoch": 11.923615160349854,
      "grad_norm": 0.2657451629638672,
      "learning_rate": 1e-06,
      "loss": -0.0411,
      "num_tokens": 699365449.0,
      "reward": 0.6361607313156128,
      "reward_std": 0.1429387927055359,
      "rewards/verify_math_reward/mean": 0.6361607313156128,
      "rewards/verify_math_reward/std": 0.4813718795776367,
      "step": 1276
    },
    {
      "clip_ratio/high_max": 0.001362100110782194,
      "clip_ratio/high_mean": 0.0005375641376303975,
      "clip_ratio/low_mean": 0.00043177891700452165,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009693430529296165,
      "completions/clipped_ratio": 0.1138392857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3470.0,
      "completions/mean_length": 945.3717041015625,
      "completions/mean_terminated_length": 540.6309814453125,
      "completions/min_length": 117.0,
      "completions/min_terminated_length": 117.0,
      "epoch": 11.932944606413994,
      "grad_norm": 0.22986739873886108,
      "learning_rate": 1e-06,
      "loss": -0.0734,
      "num_tokens": 699885182.0,
      "reward": 0.6540178656578064,
      "reward_std": 0.1504889577627182,
      "rewards/verify_math_reward/mean": 0.6540178656578064,
      "rewards/verify_math_reward/std": 0.4759531021118164,
      "step": 1277
    },
    {
      "clip_ratio/high_max": 0.0015836131751711946,
      "clip_ratio/high_mean": 0.000602021920713014,
      "clip_ratio/low_mean": 0.0004741516077046981,
      "clip_ratio/low_min": 3.965853011322906e-05,
      "clip_ratio/region_mean": 0.0010761735211417545,
      "completions/clipped_ratio": 0.1328125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2660.0,
      "completions/mean_length": 1021.411865234375,
      "completions/mean_terminated_length": 550.5289916992188,
      "completions/min_length": 169.0,
      "completions/min_terminated_length": 169.0,
      "epoch": 11.942274052478133,
      "grad_norm": 0.2524794042110443,
      "learning_rate": 1e-06,
      "loss": -0.0507,
      "num_tokens": 700396959.0,
      "reward": 0.6205357313156128,
      "reward_std": 0.15973204374313354,
      "rewards/verify_math_reward/mean": 0.6205357313156128,
      "rewards/verify_math_reward/std": 0.4855247139930725,
      "step": 1278
    },
    {
      "clip_ratio/high_max": 0.0017062622755474877,
      "clip_ratio/high_mean": 0.0006627615730394609,
      "clip_ratio/low_mean": 0.00026853110784941236,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009312926595157478,
      "completions/clipped_ratio": 0.1071428571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3930.0,
      "completions/mean_length": 936.6395263671875,
      "completions/mean_terminated_length": 557.5162353515625,
      "completions/min_length": 140.0,
      "completions/min_terminated_length": 140.0,
      "epoch": 11.951603498542275,
      "grad_norm": 0.2874181270599365,
      "learning_rate": 1e-06,
      "loss": -0.0453,
      "num_tokens": 700924588.0,
      "reward": 0.6774553656578064,
      "reward_std": 0.125994011759758,
      "rewards/verify_math_reward/mean": 0.6774553656578064,
      "rewards/verify_math_reward/std": 0.4677111804485321,
      "step": 1279
    },
    {
      "clip_ratio/high_max": 0.002430144875688711,
      "clip_ratio/high_mean": 0.0007032658213574905,
      "clip_ratio/low_mean": 0.0003796259118189482,
      "clip_ratio/low_min": 2.031859548878856e-05,
      "clip_ratio/region_mean": 0.0010828917202161392,
      "completions/clipped_ratio": 0.1216517857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2481.0,
      "completions/mean_length": 949.2489013671875,
      "completions/mean_terminated_length": 513.4218139648438,
      "completions/min_length": 116.0,
      "completions/min_terminated_length": 116.0,
      "epoch": 11.960932944606414,
      "grad_norm": 0.24304766952991486,
      "learning_rate": 1e-06,
      "loss": -0.052,
      "num_tokens": 701422435.0,
      "reward": 0.6975446939468384,
      "reward_std": 0.1239977478981018,
      "rewards/verify_math_reward/mean": 0.6975446343421936,
      "rewards/verify_math_reward/std": 0.45957791805267334,
      "step": 1280
    },
    {
      "clip_ratio/high_max": 0.0028999966161791235,
      "clip_ratio/high_mean": 0.0009244636557923513,
      "clip_ratio/low_mean": 0.0004252248681950732,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0013496885294443928,
      "completions/clipped_ratio": 0.1328125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3781.0,
      "completions/mean_length": 1000.8460083007812,
      "completions/mean_terminated_length": 526.8134155273438,
      "completions/min_length": 168.0,
      "completions/min_terminated_length": 168.0,
      "epoch": 11.970262390670554,
      "grad_norm": 1.5346214771270752,
      "learning_rate": 1e-06,
      "loss": -0.0468,
      "num_tokens": 701923937.0,
      "reward": 0.6752232313156128,
      "reward_std": 0.14699704945087433,
      "rewards/verify_math_reward/mean": 0.6752232313156128,
      "rewards/verify_math_reward/std": 0.46855294704437256,
      "step": 1281
    },
    {
      "clip_ratio/high_max": 0.0022753454104531556,
      "clip_ratio/high_mean": 0.000832652862300165,
      "clip_ratio/low_mean": 0.00048645421611581696,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0013191070684115402,
      "completions/clipped_ratio": 0.109375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3859.0,
      "completions/mean_length": 932.9777221679688,
      "completions/mean_terminated_length": 544.5363159179688,
      "completions/min_length": 146.0,
      "completions/min_terminated_length": 146.0,
      "epoch": 11.979591836734693,
      "grad_norm": 0.28128111362457275,
      "learning_rate": 1e-06,
      "loss": -0.0438,
      "num_tokens": 702452005.0,
      "reward": 0.6395089626312256,
      "reward_std": 0.16251038014888763,
      "rewards/verify_math_reward/mean": 0.6395089030265808,
      "rewards/verify_math_reward/std": 0.4804111123085022,
      "step": 1282
    },
    {
      "clip_ratio/high_max": 0.002333375727175735,
      "clip_ratio/high_mean": 0.0008855625001160661,
      "clip_ratio/low_mean": 0.00031522811423201347,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012007906225335319,
      "completions/clipped_ratio": 0.1138392857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3981.0,
      "completions/mean_length": 975.5803833007812,
      "completions/mean_terminated_length": 574.7203979492188,
      "completions/min_length": 151.0,
      "completions/min_terminated_length": 151.0,
      "epoch": 11.988921282798835,
      "grad_norm": 2.9312286376953125,
      "learning_rate": 1e-06,
      "loss": -0.0559,
      "num_tokens": 703001525.0,
      "reward": 0.613839328289032,
      "reward_std": 0.1521807461977005,
      "rewards/verify_math_reward/mean": 0.6138392686843872,
      "rewards/verify_math_reward/std": 0.48714008927345276,
      "step": 1283
    },
    {
      "clip_ratio/high_max": 0.0018744780209090095,
      "clip_ratio/high_mean": 0.000724603861272044,
      "clip_ratio/low_mean": 0.00047104938903430593,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001195653254399076,
      "completions/clipped_ratio": 0.12784090909090906,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2255.0,
      "completions/mean_length": 983.571044921875,
      "completions/mean_terminated_length": 527.3517456054688,
      "completions/min_length": 158.0,
      "completions/min_terminated_length": 158.0,
      "epoch": 11.998250728862974,
      "grad_norm": 0.23256705701351166,
      "learning_rate": 1e-06,
      "loss": -0.0778,
      "num_tokens": 703512836.0,
      "reward": 0.6953125596046448,
      "reward_std": 0.15146788954734802,
      "rewards/verify_math_reward/mean": 0.6953125,
      "rewards/verify_math_reward/std": 0.4605320394039154,
      "step": 1284
    },
    {
      "clip_ratio/high_max": 0.002448100822221022,
      "clip_ratio/high_mean": 0.0007052671644487418,
      "clip_ratio/low_mean": 0.00039358966751024127,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010988568064931314,
      "completions/clipped_ratio": 0.1450892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2148.0,
      "completions/mean_length": 1018.099365234375,
      "completions/mean_terminated_length": 495.7402038574219,
      "completions/min_length": 126.0,
      "completions/min_terminated_length": 126.0,
      "epoch": 12.00932944606414,
      "grad_norm": 0.2784478962421417,
      "learning_rate": 1e-06,
      "loss": -0.0638,
      "num_tokens": 703980069.0,
      "reward": 0.6428571939468384,
      "reward_std": 0.13722126185894012,
      "rewards/verify_math_reward/mean": 0.6428571343421936,
      "rewards/verify_math_reward/std": 0.4794250428676605,
      "step": 1285
    },
    {
      "clip_ratio/high_max": 0.002386544922046596,
      "clip_ratio/high_mean": 0.0008257443287220667,
      "clip_ratio/low_mean": 0.00044577935909728694,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012715236880467273,
      "completions/clipped_ratio": 0.1060267857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2701.0,
      "completions/mean_length": 881.3248291015625,
      "completions/mean_terminated_length": 500.05865478515625,
      "completions/min_length": 153.0,
      "completions/min_terminated_length": 153.0,
      "epoch": 12.018658892128279,
      "grad_norm": 0.24729658663272858,
      "learning_rate": 1e-06,
      "loss": -0.0523,
      "num_tokens": 704468696.0,
      "reward": 0.652901828289032,
      "reward_std": 0.12633031606674194,
      "rewards/verify_math_reward/mean": 0.6529017686843872,
      "rewards/verify_math_reward/std": 0.47631320357322693,
      "step": 1286
    },
    {
      "clip_ratio/high_max": 0.0015934536331769777,
      "clip_ratio/high_mean": 0.0006365084691424272,
      "clip_ratio/low_mean": 0.00045211950282464386,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001088627970602829,
      "completions/clipped_ratio": 0.0926339285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3354.0,
      "completions/mean_length": 846.5892944335938,
      "completions/mean_terminated_length": 514.8536376953125,
      "completions/min_length": 114.0,
      "completions/min_terminated_length": 114.0,
      "epoch": 12.02798833819242,
      "grad_norm": 0.26070430874824524,
      "learning_rate": 1e-06,
      "loss": -0.0342,
      "num_tokens": 704978640.0,
      "reward": 0.7020089626312256,
      "reward_std": 0.14383850991725922,
      "rewards/verify_math_reward/mean": 0.7020089030265808,
      "rewards/verify_math_reward/std": 0.45763099193573,
      "step": 1287
    },
    {
      "clip_ratio/high_max": 0.001844758658990031,
      "clip_ratio/high_mean": 0.0006172262137624784,
      "clip_ratio/low_mean": 0.0002482873160261079,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008655135297885863,
      "completions/clipped_ratio": 0.0970982142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2534.0,
      "completions/mean_length": 855.1094360351562,
      "completions/mean_terminated_length": 506.58343505859375,
      "completions/min_length": 134.0,
      "completions/min_terminated_length": 134.0,
      "epoch": 12.03731778425656,
      "grad_norm": 0.5442487001419067,
      "learning_rate": 1e-06,
      "loss": -0.0242,
      "num_tokens": 705476786.0,
      "reward": 0.7299107313156128,
      "reward_std": 0.11930371820926666,
      "rewards/verify_math_reward/mean": 0.7299107313156128,
      "rewards/verify_math_reward/std": 0.44425371289253235,
      "step": 1288
    },
    {
      "clip_ratio/high_max": 0.002029318577115191,
      "clip_ratio/high_mean": 0.0007828341404092498,
      "clip_ratio/low_mean": 0.00040049768631433835,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011833318276330829,
      "completions/clipped_ratio": 0.0904017857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3228.0,
      "completions/mean_length": 845.677490234375,
      "completions/mean_terminated_length": 522.6392822265625,
      "completions/min_length": 146.0,
      "completions/min_terminated_length": 146.0,
      "epoch": 12.0466472303207,
      "grad_norm": 23153.896484375,
      "learning_rate": 1e-06,
      "loss": 0.3239,
      "num_tokens": 705980673.0,
      "reward": 0.7433035969734192,
      "reward_std": 0.1471051424741745,
      "rewards/verify_math_reward/mean": 0.7433035969734192,
      "rewards/verify_math_reward/std": 0.43705442547798157,
      "step": 1289
    },
    {
      "clip_ratio/high_max": 0.002059018261206802,
      "clip_ratio/high_mean": 0.0006915239491718239,
      "clip_ratio/low_mean": 0.0004089122239747667,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011004361895174952,
      "completions/clipped_ratio": 0.1350446428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3849.0,
      "completions/mean_length": 989.8058471679688,
      "completions/mean_terminated_length": 504.8387145996094,
      "completions/min_length": 144.0,
      "completions/min_terminated_length": 144.0,
      "epoch": 12.055976676384839,
      "grad_norm": 0.7496042251586914,
      "learning_rate": 1e-06,
      "loss": -0.0306,
      "num_tokens": 706458515.0,
      "reward": 0.6540178656578064,
      "reward_std": 0.14594538509845734,
      "rewards/verify_math_reward/mean": 0.6540178656578064,
      "rewards/verify_math_reward/std": 0.4759531021118164,
      "step": 1290
    },
    {
      "clip_ratio/high_max": 0.0014425257886614418,
      "clip_ratio/high_mean": 0.0004379914612400171,
      "clip_ratio/low_mean": 0.00033467197226855205,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007726634412392741,
      "completions/clipped_ratio": 0.0892857142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3401.0,
      "completions/mean_length": 841.7210083007812,
      "completions/mean_terminated_length": 522.6740112304688,
      "completions/min_length": 144.0,
      "completions/min_terminated_length": 144.0,
      "epoch": 12.06530612244898,
      "grad_norm": 1.2562134265899658,
      "learning_rate": 1e-06,
      "loss": -0.0185,
      "num_tokens": 706975625.0,
      "reward": 0.6417410969734192,
      "reward_std": 0.11471623182296753,
      "rewards/verify_math_reward/mean": 0.6417410969734192,
      "rewards/verify_math_reward/std": 0.47975659370422363,
      "step": 1291
    },
    {
      "clip_ratio/high_max": 0.0021811319966218434,
      "clip_ratio/high_mean": 0.0007741660901956493,
      "clip_ratio/low_mean": 0.00045813859105692245,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012323047012614552,
      "completions/clipped_ratio": 0.1361607142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3910.0,
      "completions/mean_length": 1005.6864013671875,
      "completions/mean_terminated_length": 518.5827026367188,
      "completions/min_length": 149.0,
      "completions/min_terminated_length": 149.0,
      "epoch": 12.07463556851312,
      "grad_norm": 0.6910886764526367,
      "learning_rate": 1e-06,
      "loss": -0.06,
      "num_tokens": 707471544.0,
      "reward": 0.6383928656578064,
      "reward_std": 0.15977439284324646,
      "rewards/verify_math_reward/mean": 0.6383928656578064,
      "rewards/verify_math_reward/std": 0.4807341992855072,
      "step": 1292
    },
    {
      "clip_ratio/high_max": 0.0014795328643231187,
      "clip_ratio/high_mean": 0.000543700685739168,
      "clip_ratio/low_mean": 0.0005049879619036801,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010486886239959858,
      "completions/clipped_ratio": 0.1272321428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3337.0,
      "completions/mean_length": 1017.1942138671875,
      "completions/mean_terminated_length": 568.36572265625,
      "completions/min_length": 119.0,
      "completions/min_terminated_length": 119.0,
      "epoch": 12.08396501457726,
      "grad_norm": 0.26466211676597595,
      "learning_rate": 1e-06,
      "loss": -0.0361,
      "num_tokens": 708006262.0,
      "reward": 0.6484375,
      "reward_std": 0.1283929944038391,
      "rewards/verify_math_reward/mean": 0.6484375,
      "rewards/verify_math_reward/std": 0.4777248501777649,
      "step": 1293
    },
    {
      "clip_ratio/high_max": 0.0018227593245683238,
      "clip_ratio/high_mean": 0.0005954590096735046,
      "clip_ratio/low_mean": 0.0006368893909893814,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012323484115768224,
      "completions/clipped_ratio": 0.1383928571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2663.0,
      "completions/mean_length": 1029.587158203125,
      "completions/mean_terminated_length": 537.0543823242188,
      "completions/min_length": 125.0,
      "completions/min_terminated_length": 125.0,
      "epoch": 12.093294460641399,
      "grad_norm": 1.189442753791809,
      "learning_rate": 1e-06,
      "loss": -0.0521,
      "num_tokens": 708515316.0,
      "reward": 0.6350446939468384,
      "reward_std": 0.13827574253082275,
      "rewards/verify_math_reward/mean": 0.6350446343421936,
      "rewards/verify_math_reward/std": 0.4816865026950836,
      "step": 1294
    },
    {
      "clip_ratio/high_max": 0.0022230715621844865,
      "clip_ratio/high_mean": 0.0008959492315625539,
      "clip_ratio/low_mean": 0.00033243598750232195,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012283852156542707,
      "completions/clipped_ratio": 0.1183035714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3801.0,
      "completions/mean_length": 978.7422485351562,
      "completions/mean_terminated_length": 560.4772338867188,
      "completions/min_length": 151.0,
      "completions/min_terminated_length": 151.0,
      "epoch": 12.102623906705539,
      "grad_norm": 0.2985447347164154,
      "learning_rate": 1e-06,
      "loss": -0.0674,
      "num_tokens": 709050797.0,
      "reward": 0.652901828289032,
      "reward_std": 0.14263640344142914,
      "rewards/verify_math_reward/mean": 0.6529017686843872,
      "rewards/verify_math_reward/std": 0.47631317377090454,
      "step": 1295
    },
    {
      "clip_ratio/high_max": 0.0015567751815979136,
      "clip_ratio/high_mean": 0.0005009084907214856,
      "clip_ratio/low_mean": 0.00029084627249176265,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007917547663964797,
      "completions/clipped_ratio": 0.0948660714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3497.0,
      "completions/mean_length": 861.9866333007812,
      "completions/mean_terminated_length": 523.0332641601562,
      "completions/min_length": 111.0,
      "completions/min_terminated_length": 111.0,
      "epoch": 12.11195335276968,
      "grad_norm": 2.3686461448669434,
      "learning_rate": 1e-06,
      "loss": -0.0186,
      "num_tokens": 709558057.0,
      "reward": 0.6651785969734192,
      "reward_std": 0.1011870950460434,
      "rewards/verify_math_reward/mean": 0.6651785969734192,
      "rewards/verify_math_reward/std": 0.47219157218933105,
      "step": 1296
    },
    {
      "clip_ratio/high_max": 0.002217165849287994,
      "clip_ratio/high_mean": 0.0008089675575320143,
      "clip_ratio/low_mean": 0.00031538944858766627,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011243570315855322,
      "completions/clipped_ratio": 0.0959821428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2325.0,
      "completions/mean_length": 886.3538208007812,
      "completions/mean_terminated_length": 545.5765380859375,
      "completions/min_length": 138.0,
      "completions/min_terminated_length": 138.0,
      "epoch": 12.12128279883382,
      "grad_norm": 0.21736620366573334,
      "learning_rate": 1e-06,
      "loss": -0.0747,
      "num_tokens": 710086078.0,
      "reward": 0.7087053656578064,
      "reward_std": 0.14376294612884521,
      "rewards/verify_math_reward/mean": 0.7087053656578064,
      "rewards/verify_math_reward/std": 0.45461276173591614,
      "step": 1297
    },
    {
      "clip_ratio/high_max": 0.0017939078898052685,
      "clip_ratio/high_mean": 0.0007152509715524502,
      "clip_ratio/low_mean": 0.0003571505853869894,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010724015628511552,
      "completions/clipped_ratio": 0.0892857142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3385.0,
      "completions/mean_length": 883.9263916015625,
      "completions/mean_terminated_length": 569.0171508789062,
      "completions/min_length": 155.0,
      "completions/min_terminated_length": 155.0,
      "epoch": 12.130612244897959,
      "grad_norm": 0.20817220211029053,
      "learning_rate": 1e-06,
      "loss": -0.0344,
      "num_tokens": 710643932.0,
      "reward": 0.6328125,
      "reward_std": 0.12948885560035706,
      "rewards/verify_math_reward/mean": 0.6328125,
      "rewards/verify_math_reward/std": 0.48230743408203125,
      "step": 1298
    },
    {
      "clip_ratio/high_max": 0.001732151722535491,
      "clip_ratio/high_mean": 0.0006494188701253734,
      "clip_ratio/low_mean": 0.0003305515740521514,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009799704366741935,
      "completions/clipped_ratio": 0.0870535714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2872.0,
      "completions/mean_length": 825.8717041015625,
      "completions/mean_terminated_length": 514.0501708984375,
      "completions/min_length": 148.0,
      "completions/min_terminated_length": 148.0,
      "epoch": 12.139941690962099,
      "grad_norm": 0.22387662529945374,
      "learning_rate": 1e-06,
      "loss": -0.0497,
      "num_tokens": 711149745.0,
      "reward": 0.7042410969734192,
      "reward_std": 0.12245932221412659,
      "rewards/verify_math_reward/mean": 0.7042410969734192,
      "rewards/verify_math_reward/std": 0.45663803815841675,
      "step": 1299
    },
    {
      "clip_ratio/high_max": 0.002410630557278637,
      "clip_ratio/high_mean": 0.0008133751998684602,
      "clip_ratio/low_mean": 0.00035388790547585813,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011672631117107812,
      "completions/clipped_ratio": 0.1361607142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3713.0,
      "completions/mean_length": 1056.325927734375,
      "completions/mean_terminated_length": 577.2041625976562,
      "completions/min_length": 134.0,
      "completions/min_terminated_length": 134.0,
      "epoch": 12.14927113702624,
      "grad_norm": 0.34515145421028137,
      "learning_rate": 1e-06,
      "loss": -0.0469,
      "num_tokens": 711696493.0,
      "reward": 0.640625,
      "reward_std": 0.16003261506557465,
      "rewards/verify_math_reward/mean": 0.640625,
      "rewards/verify_math_reward/std": 0.48008525371551514,
      "step": 1300
    },
    {
      "clip_ratio/high_max": 0.0015739901791675948,
      "clip_ratio/high_mean": 0.0005282271149553708,
      "clip_ratio/low_mean": 0.0003222537222882238,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008504808429279365,
      "completions/clipped_ratio": 0.0837053571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2900.0,
      "completions/mean_length": 816.1295166015625,
      "completions/mean_terminated_length": 516.5067138671875,
      "completions/min_length": 137.0,
      "completions/min_terminated_length": 137.0,
      "epoch": 12.15860058309038,
      "grad_norm": 0.24931249022483826,
      "learning_rate": 1e-06,
      "loss": -0.0334,
      "num_tokens": 712205073.0,
      "reward": 0.7209821939468384,
      "reward_std": 0.11257727444171906,
      "rewards/verify_math_reward/mean": 0.7209821343421936,
      "rewards/verify_math_reward/std": 0.448766827583313,
      "step": 1301
    },
    {
      "clip_ratio/high_max": 0.0016901142698770855,
      "clip_ratio/high_mean": 0.0005469306806844543,
      "clip_ratio/low_mean": 0.00030967633620093693,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000856607028254075,
      "completions/clipped_ratio": 0.1049107142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3696.0,
      "completions/mean_length": 916.333740234375,
      "completions/mean_terminated_length": 543.6546020507812,
      "completions/min_length": 159.0,
      "completions/min_terminated_length": 159.0,
      "epoch": 12.167930029154519,
      "grad_norm": 0.24502603709697723,
      "learning_rate": 1e-06,
      "loss": -0.0511,
      "num_tokens": 712731524.0,
      "reward": 0.6484375,
      "reward_std": 0.11858581006526947,
      "rewards/verify_math_reward/mean": 0.6484375,
      "rewards/verify_math_reward/std": 0.4777248501777649,
      "step": 1302
    },
    {
      "clip_ratio/high_max": 0.0014539215371769387,
      "clip_ratio/high_mean": 0.000498499149671261,
      "clip_ratio/low_mean": 0.0003480606339962833,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008465597966278438,
      "completions/clipped_ratio": 0.1149553571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2060.0,
      "completions/mean_length": 913.7567138671875,
      "completions/mean_terminated_length": 500.4262390136719,
      "completions/min_length": 152.0,
      "completions/min_terminated_length": 152.0,
      "epoch": 12.177259475218658,
      "grad_norm": 0.22843466699123383,
      "learning_rate": 1e-06,
      "loss": -0.0434,
      "num_tokens": 713211074.0,
      "reward": 0.7198660969734192,
      "reward_std": 0.12110385298728943,
      "rewards/verify_math_reward/mean": 0.7198660969734192,
      "rewards/verify_math_reward/std": 0.44931530952453613,
      "step": 1303
    },
    {
      "clip_ratio/high_max": 0.0018457827500242274,
      "clip_ratio/high_mean": 0.0005627900591207435,
      "clip_ratio/low_mean": 0.00025844126093943487,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008212313186959364,
      "completions/clipped_ratio": 0.0892857142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2341.0,
      "completions/mean_length": 800.4721069335938,
      "completions/mean_terminated_length": 477.3811340332031,
      "completions/min_length": 150.0,
      "completions/min_terminated_length": 150.0,
      "epoch": 12.186588921282798,
      "grad_norm": 0.19199474155902863,
      "learning_rate": 1e-06,
      "loss": -0.0335,
      "num_tokens": 713680297.0,
      "reward": 0.699776828289032,
      "reward_std": 0.08837654441595078,
      "rewards/verify_math_reward/mean": 0.6997767686843872,
      "rewards/verify_math_reward/std": 0.4586109220981598,
      "step": 1304
    },
    {
      "clip_ratio/high_max": 0.0011734882300515892,
      "clip_ratio/high_mean": 0.0003831462172456668,
      "clip_ratio/low_mean": 0.00027823038726637606,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006613765990550746,
      "completions/clipped_ratio": 0.1060267857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2768.0,
      "completions/mean_length": 926.8370971679688,
      "completions/mean_terminated_length": 550.96875,
      "completions/min_length": 144.0,
      "completions/min_terminated_length": 144.0,
      "epoch": 12.19591836734694,
      "grad_norm": 0.20187468826770782,
      "learning_rate": 1e-06,
      "loss": -0.0491,
      "num_tokens": 714212319.0,
      "reward": 0.6584821939468384,
      "reward_std": 0.12253419309854507,
      "rewards/verify_math_reward/mean": 0.6584821343421936,
      "rewards/verify_math_reward/std": 0.4744836091995239,
      "step": 1305
    },
    {
      "clip_ratio/high_max": 0.0018866618229367305,
      "clip_ratio/high_mean": 0.0006295248540482135,
      "clip_ratio/low_mean": 0.00036837365564679203,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009978985372072202,
      "completions/clipped_ratio": 0.1071428571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3804.0,
      "completions/mean_length": 909.1116333007812,
      "completions/mean_terminated_length": 526.6849975585938,
      "completions/min_length": 137.0,
      "completions/min_terminated_length": 137.0,
      "epoch": 12.205247813411079,
      "grad_norm": 0.2795391380786896,
      "learning_rate": 1e-06,
      "loss": -0.0475,
      "num_tokens": 714720035.0,
      "reward": 0.6774553656578064,
      "reward_std": 0.12102828174829483,
      "rewards/verify_math_reward/mean": 0.6774553656578064,
      "rewards/verify_math_reward/std": 0.4677111804485321,
      "step": 1306
    },
    {
      "clip_ratio/high_max": 0.0019569530631997623,
      "clip_ratio/high_mean": 0.000720999252735055,
      "clip_ratio/low_mean": 0.0003416577901589335,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010626570328895468,
      "completions/clipped_ratio": 0.1205357142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2499.0,
      "completions/mean_length": 959.7500610351562,
      "completions/mean_terminated_length": 529.9086303710938,
      "completions/min_length": 120.0,
      "completions/min_terminated_length": 120.0,
      "epoch": 12.214577259475218,
      "grad_norm": 0.2649560570716858,
      "learning_rate": 1e-06,
      "loss": -0.0626,
      "num_tokens": 715226251.0,
      "reward": 0.6741071939468384,
      "reward_std": 0.13970720767974854,
      "rewards/verify_math_reward/mean": 0.6741071343421936,
      "rewards/verify_math_reward/std": 0.4689692258834839,
      "step": 1307
    },
    {
      "clip_ratio/high_max": 0.0020844952705374453,
      "clip_ratio/high_mean": 0.0007541122831753455,
      "clip_ratio/low_mean": 0.000370883740743011,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011249960371060297,
      "completions/clipped_ratio": 0.1350446428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3845.0,
      "completions/mean_length": 1043.618408203125,
      "completions/mean_terminated_length": 567.0529174804688,
      "completions/min_length": 156.0,
      "completions/min_terminated_length": 156.0,
      "epoch": 12.223906705539358,
      "grad_norm": 0.2061583697795868,
      "learning_rate": 1e-06,
      "loss": -0.0744,
      "num_tokens": 715751605.0,
      "reward": 0.6272321939468384,
      "reward_std": 0.13639894127845764,
      "rewards/verify_math_reward/mean": 0.6272321343421936,
      "rewards/verify_math_reward/std": 0.4838111698627472,
      "step": 1308
    },
    {
      "clip_ratio/high_max": 0.0018267015984747559,
      "clip_ratio/high_mean": 0.0007149914308683947,
      "clip_ratio/low_mean": 0.00041626947313488927,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011312609058222733,
      "completions/clipped_ratio": 0.1417410714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3141.0,
      "completions/mean_length": 1053.969970703125,
      "completions/mean_terminated_length": 551.5799560546875,
      "completions/min_length": 144.0,
      "completions/min_terminated_length": 144.0,
      "epoch": 12.2332361516035,
      "grad_norm": 8347.5732421875,
      "learning_rate": 1e-06,
      "loss": 0.4866,
      "num_tokens": 716262578.0,
      "reward": 0.6305803656578064,
      "reward_std": 0.14203909039497375,
      "rewards/verify_math_reward/mean": 0.6305803656578064,
      "rewards/verify_math_reward/std": 0.4829172194004059,
      "step": 1309
    },
    {
      "clip_ratio/high_max": 0.0015578833299514372,
      "clip_ratio/high_mean": 0.0006038658484612824,
      "clip_ratio/low_mean": 0.0004937596686431789,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010976255343848607,
      "completions/clipped_ratio": 0.1015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1710.0,
      "completions/mean_length": 894.0625610351562,
      "completions/mean_terminated_length": 532.1043090820312,
      "completions/min_length": 152.0,
      "completions/min_terminated_length": 152.0,
      "epoch": 12.242565597667639,
      "grad_norm": 0.2176607847213745,
      "learning_rate": 1e-06,
      "loss": -0.0531,
      "num_tokens": 716778194.0,
      "reward": 0.6629464626312256,
      "reward_std": 0.14748378098011017,
      "rewards/verify_math_reward/mean": 0.6629464030265808,
      "rewards/verify_math_reward/std": 0.47296738624572754,
      "step": 1310
    },
    {
      "clip_ratio/high_max": 0.0018361847760388628,
      "clip_ratio/high_mean": 0.0006725578768964624,
      "clip_ratio/low_mean": 0.00047711110664749867,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011496689621708356,
      "completions/clipped_ratio": 0.1127232142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3422.0,
      "completions/mean_length": 915.4420166015625,
      "completions/mean_terminated_length": 511.3710632324219,
      "completions/min_length": 155.0,
      "completions/min_terminated_length": 155.0,
      "epoch": 12.251895043731778,
      "grad_norm": 0.23621632158756256,
      "learning_rate": 1e-06,
      "loss": -0.0928,
      "num_tokens": 717265078.0,
      "reward": 0.6852678656578064,
      "reward_std": 0.14868700504302979,
      "rewards/verify_math_reward/mean": 0.6852678656578064,
      "rewards/verify_math_reward/std": 0.46466848254203796,
      "step": 1311
    },
    {
      "clip_ratio/high_max": 0.0017448979124310426,
      "clip_ratio/high_mean": 0.0005409900240920251,
      "clip_ratio/low_mean": 0.0003150496285115878,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008560396508983104,
      "completions/clipped_ratio": 0.15625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3250.0,
      "completions/mean_length": 1089.07373046875,
      "completions/mean_terminated_length": 532.2354125976562,
      "completions/min_length": 190.0,
      "completions/min_terminated_length": 190.0,
      "epoch": 12.261224489795918,
      "grad_norm": 0.26443490386009216,
      "learning_rate": 1e-06,
      "loss": -0.0521,
      "num_tokens": 717759792.0,
      "reward": 0.5703125,
      "reward_std": 0.13999709486961365,
      "rewards/verify_math_reward/mean": 0.5703125,
      "rewards/verify_math_reward/std": 0.49530795216560364,
      "step": 1312
    },
    {
      "clip_ratio/high_max": 0.0018752369069261476,
      "clip_ratio/high_mean": 0.000779215579314041,
      "clip_ratio/low_mean": 0.00040045686000667047,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011796724211308174,
      "completions/clipped_ratio": 0.1138392857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2948.0,
      "completions/mean_length": 970.13623046875,
      "completions/mean_terminated_length": 568.5767822265625,
      "completions/min_length": 127.0,
      "completions/min_terminated_length": 127.0,
      "epoch": 12.270553935860057,
      "grad_norm": 0.2585813105106354,
      "learning_rate": 1e-06,
      "loss": -0.0613,
      "num_tokens": 718296866.0,
      "reward": 0.6272321939468384,
      "reward_std": 0.1594713032245636,
      "rewards/verify_math_reward/mean": 0.6272321343421936,
      "rewards/verify_math_reward/std": 0.4838111698627472,
      "step": 1313
    },
    {
      "clip_ratio/high_max": 0.0014747667009942234,
      "clip_ratio/high_mean": 0.0005256458553049015,
      "clip_ratio/low_mean": 0.00038528843151652836,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009109342699957779,
      "completions/clipped_ratio": 0.0825892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3269.0,
      "completions/mean_length": 828.2924194335938,
      "completions/mean_terminated_length": 534.1192016601562,
      "completions/min_length": 125.0,
      "completions/min_terminated_length": 125.0,
      "epoch": 12.279883381924199,
      "grad_norm": 0.2321818768978119,
      "learning_rate": 1e-06,
      "loss": -0.0275,
      "num_tokens": 718826944.0,
      "reward": 0.6819196939468384,
      "reward_std": 0.11825203895568848,
      "rewards/verify_math_reward/mean": 0.6819196343421936,
      "rewards/verify_math_reward/std": 0.46599099040031433,
      "step": 1314
    },
    {
      "clip_ratio/high_max": 0.0018210375856142491,
      "clip_ratio/high_mean": 0.0006717022824886953,
      "clip_ratio/low_mean": 0.0003300168152691185,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010017190434155054,
      "completions/clipped_ratio": 0.0803571428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3987.0,
      "completions/mean_length": 797.3504638671875,
      "completions/mean_terminated_length": 509.1189270019531,
      "completions/min_length": 148.0,
      "completions/min_terminated_length": 148.0,
      "epoch": 12.289212827988338,
      "grad_norm": 0.2518939971923828,
      "learning_rate": 1e-06,
      "loss": -0.0467,
      "num_tokens": 719326674.0,
      "reward": 0.7254464626312256,
      "reward_std": 0.11779487878084183,
      "rewards/verify_math_reward/mean": 0.7254464030265808,
      "rewards/verify_math_reward/std": 0.4465382993221283,
      "step": 1315
    },
    {
      "clip_ratio/high_max": 0.001613775129953865,
      "clip_ratio/high_mean": 0.0006450811197282746,
      "clip_ratio/low_mean": 0.0003908105545633589,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010358916770201176,
      "completions/clipped_ratio": 0.1082589285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1745.0,
      "completions/mean_length": 898.0670166015625,
      "completions/mean_terminated_length": 509.8323059082031,
      "completions/min_length": 125.0,
      "completions/min_terminated_length": 125.0,
      "epoch": 12.298542274052478,
      "grad_norm": 0.29463186860084534,
      "learning_rate": 1e-06,
      "loss": -0.0701,
      "num_tokens": 719826702.0,
      "reward": 0.6718750596046448,
      "reward_std": 0.14432775974273682,
      "rewards/verify_math_reward/mean": 0.671875,
      "rewards/verify_math_reward/std": 0.46979284286499023,
      "step": 1316
    },
    {
      "clip_ratio/high_max": 0.0014518742937070783,
      "clip_ratio/high_mean": 0.0005386547600210179,
      "clip_ratio/low_mean": 0.00026829864145838656,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000806953390565468,
      "completions/clipped_ratio": 0.0792410714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3851.0,
      "completions/mean_length": 846.0424194335938,
      "completions/mean_terminated_length": 566.34912109375,
      "completions/min_length": 114.0,
      "completions/min_terminated_length": 114.0,
      "epoch": 12.307871720116617,
      "grad_norm": 0.2016036957502365,
      "learning_rate": 1e-06,
      "loss": -0.0506,
      "num_tokens": 720381084.0,
      "reward": 0.6741071939468384,
      "reward_std": 0.11114511638879776,
      "rewards/verify_math_reward/mean": 0.6741071343421936,
      "rewards/verify_math_reward/std": 0.4689692556858063,
      "step": 1317
    },
    {
      "clip_ratio/high_max": 0.0024602458797744475,
      "clip_ratio/high_mean": 0.000720085497960099,
      "clip_ratio/low_mean": 0.0005095433743917965,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001229628876899369,
      "completions/clipped_ratio": 0.1484375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3203.0,
      "completions/mean_length": 1088.310302734375,
      "completions/mean_terminated_length": 564.0341186523438,
      "completions/min_length": 126.0,
      "completions/min_terminated_length": 126.0,
      "epoch": 12.317201166180759,
      "grad_norm": 0.2657475471496582,
      "learning_rate": 1e-06,
      "loss": -0.0317,
      "num_tokens": 720910994.0,
      "reward": 0.5758928656578064,
      "reward_std": 0.14191961288452148,
      "rewards/verify_math_reward/mean": 0.5758928656578064,
      "rewards/verify_math_reward/std": 0.49448272585868835,
      "step": 1318
    },
    {
      "clip_ratio/high_max": 0.0019393142938497476,
      "clip_ratio/high_mean": 0.0006565232779394137,
      "clip_ratio/low_mean": 0.00029207376746853697,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009485970495006768,
      "completions/clipped_ratio": 0.0870535714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3425.0,
      "completions/mean_length": 825.8694458007812,
      "completions/mean_terminated_length": 514.0477294921875,
      "completions/min_length": 149.0,
      "completions/min_terminated_length": 149.0,
      "epoch": 12.326530612244898,
      "grad_norm": 0.24735401570796967,
      "learning_rate": 1e-06,
      "loss": -0.0395,
      "num_tokens": 721414773.0,
      "reward": 0.7187500596046448,
      "reward_std": 0.11899950355291367,
      "rewards/verify_math_reward/mean": 0.71875,
      "rewards/verify_math_reward/std": 0.4498603343963623,
      "step": 1319
    },
    {
      "clip_ratio/high_max": 0.0017042428044078406,
      "clip_ratio/high_mean": 0.0005626931406368385,
      "clip_ratio/low_mean": 0.0002088966343762877,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007715897627349477,
      "completions/clipped_ratio": 0.1138392857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3585.0,
      "completions/mean_length": 996.0156860351562,
      "completions/mean_terminated_length": 597.7808227539062,
      "completions/min_length": 175.0,
      "completions/min_terminated_length": 175.0,
      "epoch": 12.335860058309038,
      "grad_norm": 0.18399116396903992,
      "learning_rate": 1e-06,
      "loss": -0.0307,
      "num_tokens": 721975811.0,
      "reward": 0.6395089626312256,
      "reward_std": 0.10182830691337585,
      "rewards/verify_math_reward/mean": 0.6395089030265808,
      "rewards/verify_math_reward/std": 0.4804111421108246,
      "step": 1320
    },
    {
      "clip_ratio/high_max": 0.0016912338433030527,
      "clip_ratio/high_mean": 0.0006079029899410671,
      "clip_ratio/low_mean": 0.0005140973971720086,
      "clip_ratio/low_min": 1.20586537377676e-05,
      "clip_ratio/region_mean": 0.0011220004053029697,
      "completions/clipped_ratio": 0.1272321428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2808.0,
      "completions/mean_length": 1024.05810546875,
      "completions/mean_terminated_length": 576.2301635742188,
      "completions/min_length": 119.0,
      "completions/min_terminated_length": 119.0,
      "epoch": 12.345189504373177,
      "grad_norm": 0.21878719329833984,
      "learning_rate": 1e-06,
      "loss": -0.0403,
      "num_tokens": 722511047.0,
      "reward": 0.5792410969734192,
      "reward_std": 0.14286141097545624,
      "rewards/verify_math_reward/mean": 0.5792410969734192,
      "rewards/verify_math_reward/std": 0.49395665526390076,
      "step": 1321
    },
    {
      "clip_ratio/high_max": 0.0014700482706757612,
      "clip_ratio/high_mean": 0.0004403067352996004,
      "clip_ratio/low_mean": 0.00023282826759896125,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000673134985845536,
      "completions/clipped_ratio": 0.1428571428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3186.0,
      "completions/mean_length": 1039.094970703125,
      "completions/mean_terminated_length": 529.6107177734375,
      "completions/min_length": 128.0,
      "completions/min_terminated_length": 128.0,
      "epoch": 12.354518950437317,
      "grad_norm": 0.21954764425754547,
      "learning_rate": 1e-06,
      "loss": -0.0477,
      "num_tokens": 723004596.0,
      "reward": 0.6305803656578064,
      "reward_std": 0.10682723671197891,
      "rewards/verify_math_reward/mean": 0.6305803656578064,
      "rewards/verify_math_reward/std": 0.4829172194004059,
      "step": 1322
    },
    {
      "clip_ratio/high_max": 0.0013260283303679898,
      "clip_ratio/high_mean": 0.00045830250110157067,
      "clip_ratio/low_mean": 0.00031886689885141095,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007771694072289392,
      "completions/clipped_ratio": 0.1238839285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4087.0,
      "completions/mean_length": 974.0145263671875,
      "completions/mean_terminated_length": 532.561767578125,
      "completions/min_length": 131.0,
      "completions/min_terminated_length": 131.0,
      "epoch": 12.363848396501458,
      "grad_norm": 0.3715142011642456,
      "learning_rate": 1e-06,
      "loss": -0.0439,
      "num_tokens": 723509937.0,
      "reward": 0.6328125,
      "reward_std": 0.11738258600234985,
      "rewards/verify_math_reward/mean": 0.6328125,
      "rewards/verify_math_reward/std": 0.48230743408203125,
      "step": 1323
    },
    {
      "clip_ratio/high_max": 0.002065404762106482,
      "clip_ratio/high_mean": 0.0007164950420701643,
      "clip_ratio/low_mean": 0.0008087563928711461,
      "clip_ratio/low_min": 2.6260148842993658e-05,
      "clip_ratio/region_mean": 0.0015252514167514164,
      "completions/clipped_ratio": 0.1283482142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3686.0,
      "completions/mean_length": 1027.399658203125,
      "completions/mean_terminated_length": 575.5570068359375,
      "completions/min_length": 152.0,
      "completions/min_terminated_length": 152.0,
      "epoch": 12.373177842565598,
      "grad_norm": 0.3467468321323395,
      "learning_rate": 1e-06,
      "loss": -0.0686,
      "num_tokens": 724055543.0,
      "reward": 0.5881696939468384,
      "reward_std": 0.1622903198003769,
      "rewards/verify_math_reward/mean": 0.5881696343421936,
      "rewards/verify_math_reward/std": 0.4924395978450775,
      "step": 1324
    },
    {
      "clip_ratio/high_max": 0.0014852873864583671,
      "clip_ratio/high_mean": 0.000510515309542825,
      "clip_ratio/low_mean": 0.00048510939222978777,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009956246976798866,
      "completions/clipped_ratio": 0.1651785714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3804.0,
      "completions/mean_length": 1146.51123046875,
      "completions/mean_terminated_length": 562.9224853515625,
      "completions/min_length": 134.0,
      "completions/min_terminated_length": 134.0,
      "epoch": 12.382507288629737,
      "grad_norm": 0.30605512857437134,
      "learning_rate": 1e-06,
      "loss": -0.0458,
      "num_tokens": 724562401.0,
      "reward": 0.5703125,
      "reward_std": 0.13305816054344177,
      "rewards/verify_math_reward/mean": 0.5703125,
      "rewards/verify_math_reward/std": 0.49530795216560364,
      "step": 1325
    },
    {
      "clip_ratio/high_max": 0.002095753065077588,
      "clip_ratio/high_mean": 0.0006541928869410185,
      "clip_ratio/low_mean": 0.00034604472557475674,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010002375929616392,
      "completions/clipped_ratio": 0.1372767857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3523.0,
      "completions/mean_length": 1100.7567138671875,
      "completions/mean_terminated_length": 624.1526489257812,
      "completions/min_length": 144.0,
      "completions/min_terminated_length": 144.0,
      "epoch": 12.391836734693877,
      "grad_norm": 0.2333359271287918,
      "learning_rate": 1e-06,
      "loss": -0.0588,
      "num_tokens": 725137831.0,
      "reward": 0.5424107313156128,
      "reward_std": 0.13873039186000824,
      "rewards/verify_math_reward/mean": 0.5424107313156128,
      "rewards/verify_math_reward/std": 0.4984763264656067,
      "step": 1326
    },
    {
      "clip_ratio/high_max": 0.0017548011946928455,
      "clip_ratio/high_mean": 0.0004978831566404551,
      "clip_ratio/low_mean": 0.00042375341763545293,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000921636565180961,
      "completions/clipped_ratio": 0.0948660714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4094.0,
      "completions/mean_length": 871.6908569335938,
      "completions/mean_terminated_length": 533.754638671875,
      "completions/min_length": 174.0,
      "completions/min_terminated_length": 174.0,
      "epoch": 12.401166180758018,
      "grad_norm": 0.3012625277042389,
      "learning_rate": 1e-06,
      "loss": -0.0407,
      "num_tokens": 725649330.0,
      "reward": 0.6383928656578064,
      "reward_std": 0.11325128376483917,
      "rewards/verify_math_reward/mean": 0.6383928656578064,
      "rewards/verify_math_reward/std": 0.4807341694831848,
      "step": 1327
    },
    {
      "clip_ratio/high_max": 0.002174711273255525,
      "clip_ratio/high_mean": 0.0007291964629985159,
      "clip_ratio/low_mean": 0.00045984219013917027,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011890386158484034,
      "completions/clipped_ratio": 0.1004464285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3140.0,
      "completions/mean_length": 887.8605346679688,
      "completions/mean_terminated_length": 529.6315307617188,
      "completions/min_length": 145.0,
      "completions/min_terminated_length": 145.0,
      "epoch": 12.410495626822158,
      "grad_norm": 0.2796292304992676,
      "learning_rate": 1e-06,
      "loss": -0.0783,
      "num_tokens": 726165405.0,
      "reward": 0.6908482313156128,
      "reward_std": 0.17705437541007996,
      "rewards/verify_math_reward/mean": 0.6908482313156128,
      "rewards/verify_math_reward/std": 0.46240198612213135,
      "step": 1328
    },
    {
      "clip_ratio/high_max": 0.0018297717724635731,
      "clip_ratio/high_mean": 0.0006870367415103829,
      "clip_ratio/low_mean": 0.0003574270317585615,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010444637900945963,
      "completions/clipped_ratio": 0.0926339285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2078.0,
      "completions/mean_length": 827.0870971679688,
      "completions/mean_terminated_length": 493.3603820800781,
      "completions/min_length": 148.0,
      "completions/min_terminated_length": 148.0,
      "epoch": 12.419825072886297,
      "grad_norm": 0.33825457096099854,
      "learning_rate": 1e-06,
      "loss": -0.046,
      "num_tokens": 726656691.0,
      "reward": 0.6852678656578064,
      "reward_std": 0.13139888644218445,
      "rewards/verify_math_reward/mean": 0.6852678656578064,
      "rewards/verify_math_reward/std": 0.46466848254203796,
      "step": 1329
    },
    {
      "clip_ratio/high_max": 0.0021442070792545564,
      "clip_ratio/high_mean": 0.0008315178347402252,
      "clip_ratio/low_mean": 0.00040918350123320124,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012407013527990784,
      "completions/clipped_ratio": 0.1216517857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1688.0,
      "completions/mean_length": 955.5279541015625,
      "completions/mean_terminated_length": 520.5704956054688,
      "completions/min_length": 146.0,
      "completions/min_terminated_length": 146.0,
      "epoch": 12.429154518950437,
      "grad_norm": 0.596783459186554,
      "learning_rate": 1e-06,
      "loss": -0.0944,
      "num_tokens": 727167484.0,
      "reward": 0.6171875,
      "reward_std": 0.1715322583913803,
      "rewards/verify_math_reward/mean": 0.6171875,
      "rewards/verify_math_reward/std": 0.4863446056842804,
      "step": 1330
    },
    {
      "clip_ratio/high_max": 0.0018381874469923787,
      "clip_ratio/high_mean": 0.0007178178530011792,
      "clip_ratio/low_mean": 0.0003124983541056281,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010303161889169132,
      "completions/clipped_ratio": 0.1149553571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3827.0,
      "completions/mean_length": 937.8460083007812,
      "completions/mean_terminated_length": 527.6444091796875,
      "completions/min_length": 126.0,
      "completions/min_terminated_length": 126.0,
      "epoch": 12.438483965014576,
      "grad_norm": 0.3436470329761505,
      "learning_rate": 1e-06,
      "loss": -0.0832,
      "num_tokens": 727673458.0,
      "reward": 0.6785714626312256,
      "reward_std": 0.15834084153175354,
      "rewards/verify_math_reward/mean": 0.6785714030265808,
      "rewards/verify_math_reward/std": 0.46728572249412537,
      "step": 1331
    },
    {
      "clip_ratio/high_max": 0.002104193546983879,
      "clip_ratio/high_mean": 0.000805018111350364,
      "clip_ratio/low_mean": 0.00025845954769465607,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001063477680872893,
      "completions/clipped_ratio": 0.0747767857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1872.0,
      "completions/mean_length": 756.2154541015625,
      "completions/mean_terminated_length": 486.2931213378906,
      "completions/min_length": 134.0,
      "completions/min_terminated_length": 134.0,
      "epoch": 12.447813411078718,
      "grad_norm": 0.26644325256347656,
      "learning_rate": 1e-06,
      "loss": -0.0665,
      "num_tokens": 728162619.0,
      "reward": 0.7477678656578064,
      "reward_std": 0.14382894337177277,
      "rewards/verify_math_reward/mean": 0.7477678656578064,
      "rewards/verify_math_reward/std": 0.434536337852478,
      "step": 1332
    },
    {
      "clip_ratio/high_max": 0.0019071974747930653,
      "clip_ratio/high_mean": 0.0005061422020844475,
      "clip_ratio/low_mean": 0.0006438554846681654,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011499976844788762,
      "completions/clipped_ratio": 0.1395089285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2670.0,
      "completions/mean_length": 1034.37060546875,
      "completions/mean_terminated_length": 537.9973754882812,
      "completions/min_length": 160.0,
      "completions/min_terminated_length": 160.0,
      "epoch": 12.457142857142857,
      "grad_norm": 0.3891693949699402,
      "learning_rate": 1e-06,
      "loss": -0.0516,
      "num_tokens": 728678415.0,
      "reward": 0.5892857313156128,
      "reward_std": 0.1322993040084839,
      "rewards/verify_math_reward/mean": 0.5892857313156128,
      "rewards/verify_math_reward/std": 0.49223825335502625,
      "step": 1333
    },
    {
      "clip_ratio/high_max": 0.0020731909971800633,
      "clip_ratio/high_mean": 0.0007368674869212555,
      "clip_ratio/low_mean": 0.00034514032768129255,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010820078059623484,
      "completions/clipped_ratio": 0.1395089285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3718.0,
      "completions/mean_length": 1022.29248046875,
      "completions/mean_terminated_length": 523.9610595703125,
      "completions/min_length": 148.0,
      "completions/min_terminated_length": 148.0,
      "epoch": 12.466472303206997,
      "grad_norm": 0.4087883234024048,
      "learning_rate": 1e-06,
      "loss": -0.0774,
      "num_tokens": 729174453.0,
      "reward": 0.6283482313156128,
      "reward_std": 0.14969663321971893,
      "rewards/verify_math_reward/mean": 0.6283482313156128,
      "rewards/verify_math_reward/std": 0.4835159182548523,
      "step": 1334
    },
    {
      "clip_ratio/high_max": 0.0013565474546339829,
      "clip_ratio/high_mean": 0.0005092681140013156,
      "clip_ratio/low_mean": 0.00016760302048623998,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006768711373297265,
      "completions/clipped_ratio": 0.0625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3007.0,
      "completions/mean_length": 721.6138916015625,
      "completions/mean_terminated_length": 496.65478515625,
      "completions/min_length": 164.0,
      "completions/min_terminated_length": 164.0,
      "epoch": 12.475801749271136,
      "grad_norm": 0.18541057407855988,
      "learning_rate": 1e-06,
      "loss": -0.0332,
      "num_tokens": 729672259.0,
      "reward": 0.723214328289032,
      "reward_std": 0.0914565846323967,
      "rewards/verify_math_reward/mean": 0.7232142686843872,
      "rewards/verify_math_reward/std": 0.44765952229499817,
      "step": 1335
    },
    {
      "clip_ratio/high_max": 0.0025216024623659905,
      "clip_ratio/high_mean": 0.000756388562876964,
      "clip_ratio/low_mean": 0.0003998155166300421,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011562040745047852,
      "completions/clipped_ratio": 0.1104910714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2925.0,
      "completions/mean_length": 919.2388916015625,
      "completions/mean_terminated_length": 524.6348876953125,
      "completions/min_length": 147.0,
      "completions/min_terminated_length": 147.0,
      "epoch": 12.485131195335278,
      "grad_norm": 0.28288766741752625,
      "learning_rate": 1e-06,
      "loss": -0.0747,
      "num_tokens": 730185161.0,
      "reward": 0.6830357313156128,
      "reward_std": 0.14124813675880432,
      "rewards/verify_math_reward/mean": 0.6830357313156128,
      "rewards/verify_math_reward/std": 0.46555325388908386,
      "step": 1336
    },
    {
      "clip_ratio/high_max": 0.001978162796149263,
      "clip_ratio/high_mean": 0.000796153801275068,
      "clip_ratio/low_mean": 0.0003990620625700103,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011952158638450783,
      "completions/clipped_ratio": 0.0982142857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3891.0,
      "completions/mean_length": 880.2645263671875,
      "completions/mean_terminated_length": 530.035888671875,
      "completions/min_length": 163.0,
      "completions/min_terminated_length": 163.0,
      "epoch": 12.494460641399417,
      "grad_norm": 0.272204726934433,
      "learning_rate": 1e-06,
      "loss": -0.0238,
      "num_tokens": 730705782.0,
      "reward": 0.6752232313156128,
      "reward_std": 0.13572561740875244,
      "rewards/verify_math_reward/mean": 0.6752232313156128,
      "rewards/verify_math_reward/std": 0.46855294704437256,
      "step": 1337
    },
    {
      "clip_ratio/high_max": 0.0017550437987665646,
      "clip_ratio/high_mean": 0.0006655100432908512,
      "clip_ratio/low_mean": 0.00032973168163152877,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009952417258318746,
      "completions/clipped_ratio": 0.0993303571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2684.0,
      "completions/mean_length": 893.2902221679688,
      "completions/mean_terminated_length": 540.079345703125,
      "completions/min_length": 175.0,
      "completions/min_terminated_length": 175.0,
      "epoch": 12.503790087463557,
      "grad_norm": 0.21157510578632355,
      "learning_rate": 1e-06,
      "loss": -0.0434,
      "num_tokens": 731231402.0,
      "reward": 0.6495535969734192,
      "reward_std": 0.13260099291801453,
      "rewards/verify_math_reward/mean": 0.6495535969734192,
      "rewards/verify_math_reward/std": 0.477376252412796,
      "step": 1338
    },
    {
      "clip_ratio/high_max": 0.0019510428319335915,
      "clip_ratio/high_mean": 0.0007264954570018745,
      "clip_ratio/low_mean": 0.0005139458235134953,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012404412664182018,
      "completions/clipped_ratio": 0.1473214285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2254.0,
      "completions/mean_length": 1069.1160888671875,
      "completions/mean_terminated_length": 546.1466064453125,
      "completions/min_length": 165.0,
      "completions/min_terminated_length": 165.0,
      "epoch": 12.513119533527696,
      "grad_norm": 0.2677256762981415,
      "learning_rate": 1e-06,
      "loss": -0.0351,
      "num_tokens": 731749626.0,
      "reward": 0.6283482313156128,
      "reward_std": 0.13196228444576263,
      "rewards/verify_math_reward/mean": 0.6283482313156128,
      "rewards/verify_math_reward/std": 0.4835159480571747,
      "step": 1339
    },
    {
      "clip_ratio/high_max": 0.0014662169305665884,
      "clip_ratio/high_mean": 0.0006078013284422923,
      "clip_ratio/low_mean": 0.00047037160948093515,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010781729142763652,
      "completions/clipped_ratio": 0.0993303571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2787.0,
      "completions/mean_length": 921.1261596679688,
      "completions/mean_terminated_length": 570.9851684570312,
      "completions/min_length": 123.0,
      "completions/min_terminated_length": 123.0,
      "epoch": 12.522448979591836,
      "grad_norm": 0.24825072288513184,
      "learning_rate": 1e-06,
      "loss": -0.0333,
      "num_tokens": 732300379.0,
      "reward": 0.6439732313156128,
      "reward_std": 0.16683784127235413,
      "rewards/verify_math_reward/mean": 0.6439732313156128,
      "rewards/verify_math_reward/std": 0.47909069061279297,
      "step": 1340
    },
    {
      "clip_ratio/high_max": 0.0018917612433142494,
      "clip_ratio/high_mean": 0.0005794450075882196,
      "clip_ratio/low_mean": 0.0005305540903464134,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011099991024821065,
      "completions/clipped_ratio": 0.0948660714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3103.0,
      "completions/mean_length": 906.1428833007812,
      "completions/mean_terminated_length": 571.8175048828125,
      "completions/min_length": 162.0,
      "completions/min_terminated_length": 162.0,
      "epoch": 12.531778425655977,
      "grad_norm": 0.20584636926651,
      "learning_rate": 1e-06,
      "loss": -0.0265,
      "num_tokens": 732847419.0,
      "reward": 0.6361607313156128,
      "reward_std": 0.12602904438972473,
      "rewards/verify_math_reward/mean": 0.6361607313156128,
      "rewards/verify_math_reward/std": 0.4813718795776367,
      "step": 1341
    },
    {
      "clip_ratio/high_max": 0.002032488802797161,
      "clip_ratio/high_mean": 0.0006238512614800129,
      "clip_ratio/low_mean": 0.0004995815661459346,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011234328158025164,
      "completions/clipped_ratio": 0.1116071428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3197.0,
      "completions/mean_length": 921.6328735351562,
      "completions/mean_terminated_length": 522.8429565429688,
      "completions/min_length": 118.0,
      "completions/min_terminated_length": 118.0,
      "epoch": 12.541107871720117,
      "grad_norm": 0.25520849227905273,
      "learning_rate": 1e-06,
      "loss": -0.0395,
      "num_tokens": 733352234.0,
      "reward": 0.6395089626312256,
      "reward_std": 0.12471634149551392,
      "rewards/verify_math_reward/mean": 0.6395089030265808,
      "rewards/verify_math_reward/std": 0.4804111421108246,
      "step": 1342
    },
    {
      "clip_ratio/high_max": 0.002320948951819446,
      "clip_ratio/high_mean": 0.0008796048095973674,
      "clip_ratio/low_mean": 0.00044332246670819586,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0013229272626631428,
      "completions/clipped_ratio": 0.0870535714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3588.0,
      "completions/mean_length": 855.5100708007812,
      "completions/mean_terminated_length": 546.5147094726562,
      "completions/min_length": 107.0,
      "completions/min_terminated_length": 107.0,
      "epoch": 12.550437317784256,
      "grad_norm": 0.24174198508262634,
      "learning_rate": 1e-06,
      "loss": -0.0354,
      "num_tokens": 733881731.0,
      "reward": 0.6506696939468384,
      "reward_std": 0.15518662333488464,
      "rewards/verify_math_reward/mean": 0.6506696343421936,
      "rewards/verify_math_reward/std": 0.47702476382255554,
      "step": 1343
    },
    {
      "clip_ratio/high_max": 0.002332564181415364,
      "clip_ratio/high_mean": 0.0009191828648908995,
      "clip_ratio/low_mean": 0.00034532180598034756,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012645046808756888,
      "completions/clipped_ratio": 0.1015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2618.0,
      "completions/mean_length": 878.654052734375,
      "completions/mean_terminated_length": 514.9540405273438,
      "completions/min_length": 155.0,
      "completions/min_terminated_length": 155.0,
      "epoch": 12.559766763848396,
      "grad_norm": 0.24639609456062317,
      "learning_rate": 1e-06,
      "loss": -0.0531,
      "num_tokens": 734376733.0,
      "reward": 0.6830357313156128,
      "reward_std": 0.1587231457233429,
      "rewards/verify_math_reward/mean": 0.6830357313156128,
      "rewards/verify_math_reward/std": 0.46555325388908386,
      "step": 1344
    },
    {
      "clip_ratio/high_max": 0.001727164286421612,
      "clip_ratio/high_mean": 0.0007320740969589679,
      "clip_ratio/low_mean": 0.0004977139587936108,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012297880348342005,
      "completions/clipped_ratio": 0.1049107142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4017.0,
      "completions/mean_length": 929.91748046875,
      "completions/mean_terminated_length": 558.8304443359375,
      "completions/min_length": 156.0,
      "completions/min_terminated_length": 156.0,
      "epoch": 12.569096209912537,
      "grad_norm": 0.24643969535827637,
      "learning_rate": 1e-06,
      "loss": -0.0601,
      "num_tokens": 734918323.0,
      "reward": 0.6283482313156128,
      "reward_std": 0.16506938636302948,
      "rewards/verify_math_reward/mean": 0.6283482313156128,
      "rewards/verify_math_reward/std": 0.4835159182548523,
      "step": 1345
    },
    {
      "clip_ratio/high_max": 0.0017780049893190153,
      "clip_ratio/high_mean": 0.0006547151942868368,
      "clip_ratio/low_mean": 0.00029960907022541505,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009543242740619462,
      "completions/clipped_ratio": 0.0747767857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2644.0,
      "completions/mean_length": 780.7176513671875,
      "completions/mean_terminated_length": 512.775634765625,
      "completions/min_length": 133.0,
      "completions/min_terminated_length": 133.0,
      "epoch": 12.578425655976677,
      "grad_norm": 0.27910077571868896,
      "learning_rate": 1e-06,
      "loss": -0.0544,
      "num_tokens": 735437726.0,
      "reward": 0.7042410969734192,
      "reward_std": 0.12561674416065216,
      "rewards/verify_math_reward/mean": 0.7042410969734192,
      "rewards/verify_math_reward/std": 0.45663803815841675,
      "step": 1346
    },
    {
      "clip_ratio/high_max": 0.0017548783544043545,
      "clip_ratio/high_mean": 0.0005606087624983047,
      "clip_ratio/low_mean": 0.00019346550914178806,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007540742844867054,
      "completions/clipped_ratio": 0.125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3733.0,
      "completions/mean_length": 948.0379638671875,
      "completions/mean_terminated_length": 498.3290710449219,
      "completions/min_length": 133.0,
      "completions/min_terminated_length": 133.0,
      "epoch": 12.587755102040816,
      "grad_norm": 0.6453022360801697,
      "learning_rate": 1e-06,
      "loss": -0.0632,
      "num_tokens": 735918744.0,
      "reward": 0.6752232313156128,
      "reward_std": 0.11622144281864166,
      "rewards/verify_math_reward/mean": 0.6752232313156128,
      "rewards/verify_math_reward/std": 0.46855294704437256,
      "step": 1347
    },
    {
      "clip_ratio/high_max": 0.0020438689753063954,
      "clip_ratio/high_mean": 0.000658259799820371,
      "clip_ratio/low_mean": 0.0003055958482036658,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009638556457503,
      "completions/clipped_ratio": 0.1350446428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3782.0,
      "completions/mean_length": 1000.98779296875,
      "completions/mean_terminated_length": 517.7664794921875,
      "completions/min_length": 134.0,
      "completions/min_terminated_length": 134.0,
      "epoch": 12.597084548104956,
      "grad_norm": 0.3222213685512543,
      "learning_rate": 1e-06,
      "loss": -0.0732,
      "num_tokens": 736407973.0,
      "reward": 0.676339328289032,
      "reward_std": 0.13722378015518188,
      "rewards/verify_math_reward/mean": 0.6763392686843872,
      "rewards/verify_math_reward/std": 0.4681335985660553,
      "step": 1348
    },
    {
      "clip_ratio/high_max": 0.0024825293221510947,
      "clip_ratio/high_mean": 0.0007444149105140241,
      "clip_ratio/low_mean": 0.0002747075670868071,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010191224682785105,
      "completions/clipped_ratio": 0.1328125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2491.0,
      "completions/mean_length": 1003.98779296875,
      "completions/mean_terminated_length": 530.436279296875,
      "completions/min_length": 129.0,
      "completions/min_terminated_length": 129.0,
      "epoch": 12.606413994169095,
      "grad_norm": 0.24971255660057068,
      "learning_rate": 1e-06,
      "loss": -0.0567,
      "num_tokens": 736918250.0,
      "reward": 0.6261160969734192,
      "reward_std": 0.12628935277462006,
      "rewards/verify_math_reward/mean": 0.6261160969734192,
      "rewards/verify_math_reward/std": 0.48410359025001526,
      "step": 1349
    },
    {
      "clip_ratio/high_max": 0.0024342500692000613,
      "clip_ratio/high_mean": 0.0009041311186592793,
      "clip_ratio/low_mean": 0.0003971068072132766,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0013012379131396301,
      "completions/clipped_ratio": 0.1116071428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3984.0,
      "completions/mean_length": 888.65185546875,
      "completions/mean_terminated_length": 485.7185974121094,
      "completions/min_length": 150.0,
      "completions/min_terminated_length": 150.0,
      "epoch": 12.615743440233237,
      "grad_norm": 0.2990378439426422,
      "learning_rate": 1e-06,
      "loss": -0.0409,
      "num_tokens": 737398250.0,
      "reward": 0.6629464626312256,
      "reward_std": 0.16299711167812347,
      "rewards/verify_math_reward/mean": 0.6629464030265808,
      "rewards/verify_math_reward/std": 0.47296738624572754,
      "step": 1350
    },
    {
      "clip_ratio/high_max": 0.0023502210242440924,
      "clip_ratio/high_mean": 0.0007898391268099658,
      "clip_ratio/low_mean": 0.0006947171011688624,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0014845562145637814,
      "completions/clipped_ratio": 0.109375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2742.0,
      "completions/mean_length": 920.7254638671875,
      "completions/mean_terminated_length": 530.7794189453125,
      "completions/min_length": 149.0,
      "completions/min_terminated_length": 149.0,
      "epoch": 12.625072886297376,
      "grad_norm": 16.867530822753906,
      "learning_rate": 1e-06,
      "loss": -0.0679,
      "num_tokens": 737911308.0,
      "reward": 0.640625,
      "reward_std": 0.15643151104450226,
      "rewards/verify_math_reward/mean": 0.640625,
      "rewards/verify_math_reward/std": 0.48008525371551514,
      "step": 1351
    },
    {
      "clip_ratio/high_max": 0.002359927231736947,
      "clip_ratio/high_mean": 0.0006368118611135287,
      "clip_ratio/low_mean": 0.0004894386020168895,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011262504376645666,
      "completions/clipped_ratio": 0.1104910714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2875.0,
      "completions/mean_length": 925.33935546875,
      "completions/mean_terminated_length": 531.4931030273438,
      "completions/min_length": 140.0,
      "completions/min_terminated_length": 140.0,
      "epoch": 12.634402332361516,
      "grad_norm": 0.32085034251213074,
      "learning_rate": 1e-06,
      "loss": -0.0268,
      "num_tokens": 738422820.0,
      "reward": 0.6339285969734192,
      "reward_std": 0.11103636026382446,
      "rewards/verify_math_reward/mean": 0.6339285969734192,
      "rewards/verify_math_reward/std": 0.48199835419654846,
      "step": 1352
    },
    {
      "clip_ratio/high_max": 0.0020135758277319837,
      "clip_ratio/high_mean": 0.00059482392953214,
      "clip_ratio/low_mean": 0.0003226257676942623,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009174496535706567,
      "completions/clipped_ratio": 0.1316964285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3710.0,
      "completions/mean_length": 984.9342041015625,
      "completions/mean_terminated_length": 513.0758056640625,
      "completions/min_length": 130.0,
      "completions/min_terminated_length": 130.0,
      "epoch": 12.643731778425655,
      "grad_norm": 0.5963546633720398,
      "learning_rate": 1e-06,
      "loss": -0.0277,
      "num_tokens": 738916025.0,
      "reward": 0.6774553656578064,
      "reward_std": 0.1061122789978981,
      "rewards/verify_math_reward/mean": 0.6774553656578064,
      "rewards/verify_math_reward/std": 0.4677111804485321,
      "step": 1353
    },
    {
      "clip_ratio/high_max": 0.0018552585133875255,
      "clip_ratio/high_mean": 0.0006031259708834114,
      "clip_ratio/low_mean": 0.000258984800439066,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008621107517683413,
      "completions/clipped_ratio": 0.0959821428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1985.0,
      "completions/mean_length": 800.7310791015625,
      "completions/mean_terminated_length": 450.86297607421875,
      "completions/min_length": 156.0,
      "completions/min_terminated_length": 156.0,
      "epoch": 12.653061224489797,
      "grad_norm": 0.25760844349861145,
      "learning_rate": 1e-06,
      "loss": -0.0427,
      "num_tokens": 739358560.0,
      "reward": 0.7165178656578064,
      "reward_std": 0.10968086868524551,
      "rewards/verify_math_reward/mean": 0.7165178656578064,
      "rewards/verify_math_reward/std": 0.4509401023387909,
      "step": 1354
    },
    {
      "clip_ratio/high_max": 0.0018420838496240322,
      "clip_ratio/high_mean": 0.0006966865385038545,
      "clip_ratio/low_mean": 0.0004199227610115486,
      "clip_ratio/low_min": 1.6141528249136172e-05,
      "clip_ratio/region_mean": 0.0011166092899657087,
      "completions/clipped_ratio": 0.0904017857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2475.0,
      "completions/mean_length": 866.216552734375,
      "completions/mean_terminated_length": 545.2196044921875,
      "completions/min_length": 157.0,
      "completions/min_terminated_length": 157.0,
      "epoch": 12.662390670553936,
      "grad_norm": 0.40403029322624207,
      "learning_rate": 1e-06,
      "loss": -0.0393,
      "num_tokens": 739896074.0,
      "reward": 0.6729910969734192,
      "reward_std": 0.15462279319763184,
      "rewards/verify_math_reward/mean": 0.6729910969734192,
      "rewards/verify_math_reward/std": 0.46938255429267883,
      "step": 1355
    },
    {
      "clip_ratio/high_max": 0.001981287838134449,
      "clip_ratio/high_mean": 0.0006317215529634268,
      "clip_ratio/low_mean": 0.00037931783617750625,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001011039403238101,
      "completions/clipped_ratio": 0.1383928571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2812.0,
      "completions/mean_length": 1033.76123046875,
      "completions/mean_terminated_length": 541.89892578125,
      "completions/min_length": 95.0,
      "completions/min_terminated_length": 95.0,
      "epoch": 12.671720116618076,
      "grad_norm": 0.2093334197998047,
      "learning_rate": 1e-06,
      "loss": -0.0726,
      "num_tokens": 740416092.0,
      "reward": 0.6049107313156128,
      "reward_std": 0.1397060751914978,
      "rewards/verify_math_reward/mean": 0.6049107313156128,
      "rewards/verify_math_reward/std": 0.48914292454719543,
      "step": 1356
    },
    {
      "clip_ratio/high_max": 0.0019730155836441554,
      "clip_ratio/high_mean": 0.0005656256280417438,
      "clip_ratio/low_mean": 0.00043394920339778764,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000999574829620542,
      "completions/clipped_ratio": 0.1339285714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3459.0,
      "completions/mean_length": 1034.0648193359375,
      "completions/mean_terminated_length": 560.569580078125,
      "completions/min_length": 141.0,
      "completions/min_terminated_length": 141.0,
      "epoch": 12.681049562682215,
      "grad_norm": 0.24663449823856354,
      "learning_rate": 1e-06,
      "loss": -0.033,
      "num_tokens": 740937878.0,
      "reward": 0.621651828289032,
      "reward_std": 0.14545360207557678,
      "rewards/verify_math_reward/mean": 0.6216517686843872,
      "rewards/verify_math_reward/std": 0.4852459728717804,
      "step": 1357
    },
    {
      "clip_ratio/high_max": 0.002267629883135669,
      "clip_ratio/high_mean": 0.0007278704006239423,
      "clip_ratio/low_mean": 0.0002739739552453102,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010018443445005687,
      "completions/clipped_ratio": 0.1015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2830.0,
      "completions/mean_length": 850.068115234375,
      "completions/mean_terminated_length": 483.1366271972656,
      "completions/min_length": 139.0,
      "completions/min_terminated_length": 139.0,
      "epoch": 12.690379008746355,
      "grad_norm": 0.22954854369163513,
      "learning_rate": 1e-06,
      "loss": -0.0584,
      "num_tokens": 741413035.0,
      "reward": 0.7243303656578064,
      "reward_std": 0.12546339631080627,
      "rewards/verify_math_reward/mean": 0.7243303656578064,
      "rewards/verify_math_reward/std": 0.4471006691455841,
      "step": 1358
    },
    {
      "clip_ratio/high_max": 0.0023479087903979234,
      "clip_ratio/high_mean": 0.0006960446326047531,
      "clip_ratio/low_mean": 0.0004340857358329231,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001130130356614245,
      "completions/clipped_ratio": 0.1037946428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3638.0,
      "completions/mean_length": 889.75341796875,
      "completions/mean_terminated_length": 518.419677734375,
      "completions/min_length": 153.0,
      "completions/min_terminated_length": 153.0,
      "epoch": 12.699708454810496,
      "grad_norm": 0.2435080111026764,
      "learning_rate": 1e-06,
      "loss": -0.051,
      "num_tokens": 741919382.0,
      "reward": 0.7209821939468384,
      "reward_std": 0.11633063107728958,
      "rewards/verify_math_reward/mean": 0.7209821343421936,
      "rewards/verify_math_reward/std": 0.448766827583313,
      "step": 1359
    },
    {
      "clip_ratio/high_max": 0.0017577246399014257,
      "clip_ratio/high_mean": 0.0007076831798258354,
      "clip_ratio/low_mean": 0.0003529586056174594,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001060641781805316,
      "completions/clipped_ratio": 0.1484375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1940.0,
      "completions/mean_length": 1041.13623046875,
      "completions/mean_terminated_length": 508.636962890625,
      "completions/min_length": 155.0,
      "completions/min_terminated_length": 155.0,
      "epoch": 12.709037900874636,
      "grad_norm": 0.6882129907608032,
      "learning_rate": 1e-06,
      "loss": -0.0663,
      "num_tokens": 742390752.0,
      "reward": 0.6328125,
      "reward_std": 0.16250787675380707,
      "rewards/verify_math_reward/mean": 0.6328125,
      "rewards/verify_math_reward/std": 0.48230743408203125,
      "step": 1360
    },
    {
      "clip_ratio/high_max": 0.0015584094544465188,
      "clip_ratio/high_mean": 0.0005284538101477665,
      "clip_ratio/low_mean": 0.00036905520255459123,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008975089986051898,
      "completions/clipped_ratio": 0.0859375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2852.0,
      "completions/mean_length": 801.7031860351562,
      "completions/mean_terminated_length": 491.98291015625,
      "completions/min_length": 162.0,
      "completions/min_terminated_length": 162.0,
      "epoch": 12.718367346938775,
      "grad_norm": 0.28293901681900024,
      "learning_rate": 1e-06,
      "loss": -0.045,
      "num_tokens": 742874982.0,
      "reward": 0.6584821939468384,
      "reward_std": 0.12644091248512268,
      "rewards/verify_math_reward/mean": 0.6584821343421936,
      "rewards/verify_math_reward/std": 0.4744836091995239,
      "step": 1361
    },
    {
      "clip_ratio/high_max": 0.0017015195444400888,
      "clip_ratio/high_mean": 0.00047585794163751416,
      "clip_ratio/low_mean": 0.000347914735584709,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008237726760853548,
      "completions/clipped_ratio": 0.15625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3659.0,
      "completions/mean_length": 1136.7254638671875,
      "completions/mean_terminated_length": 588.7116088867188,
      "completions/min_length": 155.0,
      "completions/min_terminated_length": 155.0,
      "epoch": 12.727696793002915,
      "grad_norm": 0.20893487334251404,
      "learning_rate": 1e-06,
      "loss": -0.0419,
      "num_tokens": 743411312.0,
      "reward": 0.5569196939468384,
      "reward_std": 0.10618643462657928,
      "rewards/verify_math_reward/mean": 0.5569196343421936,
      "rewards/verify_math_reward/std": 0.4970270097255707,
      "step": 1362
    },
    {
      "clip_ratio/high_max": 0.0019468862883513793,
      "clip_ratio/high_mean": 0.0006948173395358026,
      "clip_ratio/low_mean": 0.0003842596770482487,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.00107907703568344,
      "completions/clipped_ratio": 0.1462053571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3201.0,
      "completions/mean_length": 1069.0826416015625,
      "completions/mean_terminated_length": 550.7477416992188,
      "completions/min_length": 110.0,
      "completions/min_terminated_length": 110.0,
      "epoch": 12.737026239067056,
      "grad_norm": 0.2141713947057724,
      "learning_rate": 1e-06,
      "loss": -0.0741,
      "num_tokens": 743925954.0,
      "reward": 0.574776828289032,
      "reward_std": 0.14777731895446777,
      "rewards/verify_math_reward/mean": 0.5747767686843872,
      "rewards/verify_math_reward/std": 0.49465295672416687,
      "step": 1363
    },
    {
      "clip_ratio/high_max": 0.003098163040704094,
      "clip_ratio/high_mean": 0.000917768931685714,
      "clip_ratio/low_mean": 0.00045799038343830034,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0013757593078480568,
      "completions/clipped_ratio": 0.1160714285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3554.0,
      "completions/mean_length": 963.3136596679688,
      "completions/mean_terminated_length": 551.9507446289062,
      "completions/min_length": 152.0,
      "completions/min_terminated_length": 152.0,
      "epoch": 12.746355685131196,
      "grad_norm": 0.25729474425315857,
      "learning_rate": 1e-06,
      "loss": -0.0563,
      "num_tokens": 744464371.0,
      "reward": 0.6339285969734192,
      "reward_std": 0.14466404914855957,
      "rewards/verify_math_reward/mean": 0.6339285969734192,
      "rewards/verify_math_reward/std": 0.48199835419654846,
      "step": 1364
    },
    {
      "clip_ratio/high_max": 0.0019777921406785026,
      "clip_ratio/high_mean": 0.0006717506166751264,
      "clip_ratio/low_mean": 0.0004016095765564387,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010733601920946967,
      "completions/clipped_ratio": 0.0859375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2520.0,
      "completions/mean_length": 806.6495971679688,
      "completions/mean_terminated_length": 497.3944091796875,
      "completions/min_length": 124.0,
      "completions/min_terminated_length": 124.0,
      "epoch": 12.755685131195335,
      "grad_norm": 0.26544907689094543,
      "learning_rate": 1e-06,
      "loss": -0.0433,
      "num_tokens": 744951817.0,
      "reward": 0.7008928656578064,
      "reward_std": 0.1327543705701828,
      "rewards/verify_math_reward/mean": 0.7008928656578064,
      "rewards/verify_math_reward/std": 0.458122581243515,
      "step": 1365
    },
    {
      "clip_ratio/high_max": 0.0015571737858408596,
      "clip_ratio/high_mean": 0.0005872535057278583,
      "clip_ratio/low_mean": 0.0003519047841109568,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009391582843818469,
      "completions/clipped_ratio": 0.1328125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3847.0,
      "completions/mean_length": 1048.9754638671875,
      "completions/mean_terminated_length": 582.3140258789062,
      "completions/min_length": 104.0,
      "completions/min_terminated_length": 104.0,
      "epoch": 12.765014577259475,
      "grad_norm": 0.3987433910369873,
      "learning_rate": 1e-06,
      "loss": -0.044,
      "num_tokens": 745494731.0,
      "reward": 0.6171875,
      "reward_std": 0.14451251924037933,
      "rewards/verify_math_reward/mean": 0.6171875,
      "rewards/verify_math_reward/std": 0.4863446056842804,
      "step": 1366
    },
    {
      "clip_ratio/high_max": 0.0017320850420219358,
      "clip_ratio/high_mean": 0.0005912239066674374,
      "clip_ratio/low_mean": 0.00039086757260520244,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009820914929150604,
      "completions/clipped_ratio": 0.1450892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3386.0,
      "completions/mean_length": 1037.1551513671875,
      "completions/mean_terminated_length": 518.030029296875,
      "completions/min_length": 147.0,
      "completions/min_terminated_length": 147.0,
      "epoch": 12.774344023323614,
      "grad_norm": 99.93839263916016,
      "learning_rate": 1e-06,
      "loss": -0.053,
      "num_tokens": 745977630.0,
      "reward": 0.6328125,
      "reward_std": 0.1306820958852768,
      "rewards/verify_math_reward/mean": 0.6328125,
      "rewards/verify_math_reward/std": 0.48230743408203125,
      "step": 1367
    },
    {
      "clip_ratio/high_max": 0.001804772560717538,
      "clip_ratio/high_mean": 0.0006167724113765871,
      "clip_ratio/low_mean": 0.00026702754712459864,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008837999321258394,
      "completions/clipped_ratio": 0.1350446428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4081.0,
      "completions/mean_length": 972.40185546875,
      "completions/mean_terminated_length": 484.7174377441406,
      "completions/min_length": 105.0,
      "completions/min_terminated_length": 105.0,
      "epoch": 12.783673469387756,
      "grad_norm": 0.3347950875759125,
      "learning_rate": 1e-06,
      "loss": -0.0419,
      "num_tokens": 746439430.0,
      "reward": 0.6729910969734192,
      "reward_std": 0.11866390705108643,
      "rewards/verify_math_reward/mean": 0.6729910969734192,
      "rewards/verify_math_reward/std": 0.46938255429267883,
      "step": 1368
    },
    {
      "clip_ratio/high_max": 0.002638830730575137,
      "clip_ratio/high_mean": 0.0008918509229260962,
      "clip_ratio/low_mean": 0.0003563135458080069,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012481644735089503,
      "completions/clipped_ratio": 0.1149553571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3441.0,
      "completions/mean_length": 964.9297485351562,
      "completions/mean_terminated_length": 558.2459106445312,
      "completions/min_length": 156.0,
      "completions/min_terminated_length": 156.0,
      "epoch": 12.793002915451895,
      "grad_norm": 0.3066607415676117,
      "learning_rate": 1e-06,
      "loss": -0.0545,
      "num_tokens": 746983927.0,
      "reward": 0.6540178656578064,
      "reward_std": 0.15559779107570648,
      "rewards/verify_math_reward/mean": 0.6540178656578064,
      "rewards/verify_math_reward/std": 0.4759531021118164,
      "step": 1369
    },
    {
      "clip_ratio/high_max": 0.0014095676415308844,
      "clip_ratio/high_mean": 0.00046644072790513746,
      "clip_ratio/low_mean": 0.000292919801267999,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007593605332658626,
      "completions/clipped_ratio": 0.0982142857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3962.0,
      "completions/mean_length": 906.2188110351562,
      "completions/mean_terminated_length": 558.8168334960938,
      "completions/min_length": 164.0,
      "completions/min_terminated_length": 164.0,
      "epoch": 12.802332361516035,
      "grad_norm": 0.21188370883464813,
      "learning_rate": 1e-06,
      "loss": -0.0428,
      "num_tokens": 747526211.0,
      "reward": 0.7165178656578064,
      "reward_std": 0.11745856702327728,
      "rewards/verify_math_reward/mean": 0.7165178656578064,
      "rewards/verify_math_reward/std": 0.4509401023387909,
      "step": 1370
    },
    {
      "clip_ratio/high_max": 0.0019549962380551733,
      "clip_ratio/high_mean": 0.0007605743721796898,
      "clip_ratio/low_mean": 0.0003989964870925178,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011595708892855328,
      "completions/clipped_ratio": 0.1529017857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4065.0,
      "completions/mean_length": 1083.130615234375,
      "completions/mean_terminated_length": 539.3056640625,
      "completions/min_length": 142.0,
      "completions/min_terminated_length": 142.0,
      "epoch": 12.811661807580174,
      "grad_norm": 0.24704080820083618,
      "learning_rate": 1e-06,
      "loss": -0.0632,
      "num_tokens": 748038552.0,
      "reward": 0.5848214626312256,
      "reward_std": 0.15064233541488647,
      "rewards/verify_math_reward/mean": 0.5848214030265808,
      "rewards/verify_math_reward/std": 0.49302801489830017,
      "step": 1371
    },
    {
      "clip_ratio/high_max": 0.0019636064862424973,
      "clip_ratio/high_mean": 0.0007228554368339246,
      "clip_ratio/low_mean": 0.0004627469643310178,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011856024138978682,
      "completions/clipped_ratio": 0.1339285714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2826.0,
      "completions/mean_length": 1033.9085693359375,
      "completions/mean_terminated_length": 560.38916015625,
      "completions/min_length": 147.0,
      "completions/min_terminated_length": 147.0,
      "epoch": 12.820991253644316,
      "grad_norm": 0.2643205225467682,
      "learning_rate": 1e-06,
      "loss": -0.0554,
      "num_tokens": 748566814.0,
      "reward": 0.613839328289032,
      "reward_std": 0.1306481957435608,
      "rewards/verify_math_reward/mean": 0.6138392686843872,
      "rewards/verify_math_reward/std": 0.48714008927345276,
      "step": 1372
    },
    {
      "clip_ratio/high_max": 0.0022867963889439125,
      "clip_ratio/high_mean": 0.0008140832942444831,
      "clip_ratio/low_mean": 0.000571572565149836,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001385655839840183,
      "completions/clipped_ratio": 0.1395089285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2339.0,
      "completions/mean_length": 1041.462158203125,
      "completions/mean_terminated_length": 546.2386474609375,
      "completions/min_length": 145.0,
      "completions/min_terminated_length": 145.0,
      "epoch": 12.830320699708455,
      "grad_norm": 0.8475359678268433,
      "learning_rate": 1e-06,
      "loss": -0.0317,
      "num_tokens": 749084676.0,
      "reward": 0.640625,
      "reward_std": 0.1382429599761963,
      "rewards/verify_math_reward/mean": 0.640625,
      "rewards/verify_math_reward/std": 0.48008525371551514,
      "step": 1373
    },
    {
      "clip_ratio/high_max": 0.002273448451887816,
      "clip_ratio/high_mean": 0.0008107989287964301,
      "clip_ratio/low_mean": 0.0003847020611829066,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011955010231758934,
      "completions/clipped_ratio": 0.1183035714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4033.0,
      "completions/mean_length": 929.5770263671875,
      "completions/mean_terminated_length": 504.7151794433594,
      "completions/min_length": 156.0,
      "completions/min_terminated_length": 156.0,
      "epoch": 12.839650145772595,
      "grad_norm": 0.24407708644866943,
      "learning_rate": 1e-06,
      "loss": -0.1056,
      "num_tokens": 749570417.0,
      "reward": 0.6707589626312256,
      "reward_std": 0.14451363682746887,
      "rewards/verify_math_reward/mean": 0.6707589030265808,
      "rewards/verify_math_reward/std": 0.4702001214027405,
      "step": 1374
    },
    {
      "clip_ratio/high_max": 0.002060843755316455,
      "clip_ratio/high_mean": 0.0007619017005708884,
      "clip_ratio/low_mean": 0.0004417857421685767,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001203687432280276,
      "completions/clipped_ratio": 0.1205357142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4055.0,
      "completions/mean_length": 935.341552734375,
      "completions/mean_terminated_length": 502.1548156738281,
      "completions/min_length": 137.0,
      "completions/min_terminated_length": 137.0,
      "epoch": 12.848979591836734,
      "grad_norm": 0.29424676299095154,
      "learning_rate": 1e-06,
      "loss": -0.0413,
      "num_tokens": 750058099.0,
      "reward": 0.6707589626312256,
      "reward_std": 0.1353893280029297,
      "rewards/verify_math_reward/mean": 0.6707589030265808,
      "rewards/verify_math_reward/std": 0.4702001214027405,
      "step": 1375
    },
    {
      "clip_ratio/high_max": 0.00217004971273127,
      "clip_ratio/high_mean": 0.0008153443122864701,
      "clip_ratio/low_mean": 0.0004750150524159835,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012903593487862963,
      "completions/clipped_ratio": 0.125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2332.0,
      "completions/mean_length": 999.8449096679688,
      "completions/mean_terminated_length": 557.5369873046875,
      "completions/min_length": 163.0,
      "completions/min_terminated_length": 163.0,
      "epoch": 12.858309037900874,
      "grad_norm": 1.1741997003555298,
      "learning_rate": 1e-06,
      "loss": -0.0702,
      "num_tokens": 750583248.0,
      "reward": 0.6194196939468384,
      "reward_std": 0.16085955500602722,
      "rewards/verify_math_reward/mean": 0.6194196343421936,
      "rewards/verify_math_reward/std": 0.48580074310302734,
      "step": 1376
    },
    {
      "clip_ratio/high_max": 0.0015669029671698809,
      "clip_ratio/high_mean": 0.0006371952913468704,
      "clip_ratio/low_mean": 0.0003702701142174192,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010074654128402472,
      "completions/clipped_ratio": 0.1026785714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3462.0,
      "completions/mean_length": 947.0256958007812,
      "completions/mean_terminated_length": 586.6952514648438,
      "completions/min_length": 164.0,
      "completions/min_terminated_length": 164.0,
      "epoch": 12.867638483965015,
      "grad_norm": 0.7590116858482361,
      "learning_rate": 1e-06,
      "loss": -0.0386,
      "num_tokens": 751139935.0,
      "reward": 0.6696428656578064,
      "reward_std": 0.1619868129491806,
      "rewards/verify_math_reward/mean": 0.6696428656578064,
      "rewards/verify_math_reward/std": 0.47060438990592957,
      "step": 1377
    },
    {
      "clip_ratio/high_max": 0.0018466759247530717,
      "clip_ratio/high_mean": 0.0005880852513655555,
      "clip_ratio/low_mean": 0.0005118016952110338,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001099886947486084,
      "completions/clipped_ratio": 0.1383928571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4036.0,
      "completions/mean_length": 1103.4576416015625,
      "completions/mean_terminated_length": 622.7901611328125,
      "completions/min_length": 175.0,
      "completions/min_terminated_length": 175.0,
      "epoch": 12.876967930029155,
      "grad_norm": 0.21544533967971802,
      "learning_rate": 1e-06,
      "loss": -0.055,
      "num_tokens": 751723785.0,
      "reward": 0.559151828289032,
      "reward_std": 0.13016286492347717,
      "rewards/verify_math_reward/mean": 0.5591517686843872,
      "rewards/verify_math_reward/std": 0.496766060590744,
      "step": 1378
    },
    {
      "clip_ratio/high_max": 0.0013301987582963193,
      "clip_ratio/high_mean": 0.0005247186995802622,
      "clip_ratio/low_mean": 0.0003667620244414138,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008914807040127926,
      "completions/clipped_ratio": 0.0747767857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3829.0,
      "completions/mean_length": 812.9944458007812,
      "completions/mean_terminated_length": 547.6610717773438,
      "completions/min_length": 160.0,
      "completions/min_terminated_length": 160.0,
      "epoch": 12.886297376093294,
      "grad_norm": 0.25212574005126953,
      "learning_rate": 1e-06,
      "loss": -0.0311,
      "num_tokens": 752274788.0,
      "reward": 0.6640625,
      "reward_std": 0.1315172165632248,
      "rewards/verify_math_reward/mean": 0.6640625,
      "rewards/verify_math_reward/std": 0.4725809693336487,
      "step": 1379
    },
    {
      "clip_ratio/high_max": 0.002105562853103038,
      "clip_ratio/high_mean": 0.0008208707604353549,
      "clip_ratio/low_mean": 0.0003263071916990157,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011471779544081073,
      "completions/clipped_ratio": 0.109375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3309.0,
      "completions/mean_length": 946.364990234375,
      "completions/mean_terminated_length": 559.5676879882812,
      "completions/min_length": 147.0,
      "completions/min_terminated_length": 147.0,
      "epoch": 12.895626822157434,
      "grad_norm": 0.24270500242710114,
      "learning_rate": 1e-06,
      "loss": -0.0548,
      "num_tokens": 752807811.0,
      "reward": 0.6495535969734192,
      "reward_std": 0.14282114803791046,
      "rewards/verify_math_reward/mean": 0.6495535969734192,
      "rewards/verify_math_reward/std": 0.477376252412796,
      "step": 1380
    },
    {
      "clip_ratio/high_max": 0.0019212423212593421,
      "clip_ratio/high_mean": 0.0007278142074937932,
      "clip_ratio/low_mean": 0.0003460776024439838,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010738918390416075,
      "completions/clipped_ratio": 0.1350446428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3627.0,
      "completions/mean_length": 1041.673095703125,
      "completions/mean_terminated_length": 564.8038940429688,
      "completions/min_length": 151.0,
      "completions/min_terminated_length": 151.0,
      "epoch": 12.904956268221575,
      "grad_norm": 0.28183022141456604,
      "learning_rate": 1e-06,
      "loss": -0.0621,
      "num_tokens": 753343046.0,
      "reward": 0.6328125,
      "reward_std": 0.14609551429748535,
      "rewards/verify_math_reward/mean": 0.6328125,
      "rewards/verify_math_reward/std": 0.48230743408203125,
      "step": 1381
    },
    {
      "clip_ratio/high_max": 0.0017274372112296987,
      "clip_ratio/high_mean": 0.0005665796907123877,
      "clip_ratio/low_mean": 0.0003050948271265952,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008716745269339299,
      "completions/clipped_ratio": 0.0848214285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2056.0,
      "completions/mean_length": 832.8092041015625,
      "completions/mean_terminated_length": 530.3670654296875,
      "completions/min_length": 129.0,
      "completions/min_terminated_length": 129.0,
      "epoch": 12.914285714285715,
      "grad_norm": 0.27039772272109985,
      "learning_rate": 1e-06,
      "loss": -0.0429,
      "num_tokens": 753858491.0,
      "reward": 0.7020089626312256,
      "reward_std": 0.10562372952699661,
      "rewards/verify_math_reward/mean": 0.7020089030265808,
      "rewards/verify_math_reward/std": 0.45763099193573,
      "step": 1382
    },
    {
      "clip_ratio/high_max": 0.0020260836972738616,
      "clip_ratio/high_mean": 0.0008750749730097596,
      "clip_ratio/low_mean": 0.00044082958811486606,
      "clip_ratio/low_min": 1.582679215061944e-05,
      "clip_ratio/region_mean": 0.0013159045302018058,
      "completions/clipped_ratio": 0.0982142857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3756.0,
      "completions/mean_length": 869.013427734375,
      "completions/mean_terminated_length": 517.5593872070312,
      "completions/min_length": 154.0,
      "completions/min_terminated_length": 154.0,
      "epoch": 12.923615160349854,
      "grad_norm": 0.5587045550346375,
      "learning_rate": 1e-06,
      "loss": -0.0604,
      "num_tokens": 754374567.0,
      "reward": 0.7098214626312256,
      "reward_std": 0.16119515895843506,
      "rewards/verify_math_reward/mean": 0.7098214030265808,
      "rewards/verify_math_reward/std": 0.454098105430603,
      "step": 1383
    },
    {
      "clip_ratio/high_max": 0.0018916974077001214,
      "clip_ratio/high_mean": 0.0007225484514492564,
      "clip_ratio/low_mean": 0.00046064554999247775,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001183193988254061,
      "completions/clipped_ratio": 0.1261160714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3774.0,
      "completions/mean_length": 997.6082763671875,
      "completions/mean_terminated_length": 550.45849609375,
      "completions/min_length": 128.0,
      "completions/min_terminated_length": 128.0,
      "epoch": 12.932944606413994,
      "grad_norm": 1.0377944707870483,
      "learning_rate": 1e-06,
      "loss": -0.0768,
      "num_tokens": 754898000.0,
      "reward": 0.6707589626312256,
      "reward_std": 0.14846131205558777,
      "rewards/verify_math_reward/mean": 0.6707589030265808,
      "rewards/verify_math_reward/std": 0.4702001214027405,
      "step": 1384
    },
    {
      "clip_ratio/high_max": 0.0018531905989220832,
      "clip_ratio/high_mean": 0.0006839078305347357,
      "clip_ratio/low_mean": 0.00046031783040234586,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011442256582085975,
      "completions/clipped_ratio": 0.1227678571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3490.0,
      "completions/mean_length": 1012.8058471679688,
      "completions/mean_terminated_length": 581.3155517578125,
      "completions/min_length": 134.0,
      "completions/min_terminated_length": 134.0,
      "epoch": 12.942274052478133,
      "grad_norm": 0.3049590587615967,
      "learning_rate": 1e-06,
      "loss": -0.0586,
      "num_tokens": 755444122.0,
      "reward": 0.6696428656578064,
      "reward_std": 0.15999305248260498,
      "rewards/verify_math_reward/mean": 0.6696428656578064,
      "rewards/verify_math_reward/std": 0.47060438990592957,
      "step": 1385
    },
    {
      "clip_ratio/high_max": 0.0019815594714600593,
      "clip_ratio/high_mean": 0.0007506612710130867,
      "clip_ratio/low_mean": 0.0005363260279409587,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001286987288040109,
      "completions/clipped_ratio": 0.1227678571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2481.0,
      "completions/mean_length": 996.0480346679688,
      "completions/mean_terminated_length": 562.2124633789062,
      "completions/min_length": 141.0,
      "completions/min_terminated_length": 141.0,
      "epoch": 12.951603498542275,
      "grad_norm": 0.29062795639038086,
      "learning_rate": 1e-06,
      "loss": -0.053,
      "num_tokens": 755972237.0,
      "reward": 0.6227678656578064,
      "reward_std": 0.17149202525615692,
      "rewards/verify_math_reward/mean": 0.6227678656578064,
      "rewards/verify_math_reward/std": 0.4849644899368286,
      "step": 1386
    },
    {
      "clip_ratio/high_max": 0.0021974281225993764,
      "clip_ratio/high_mean": 0.0007501259115088033,
      "clip_ratio/low_mean": 0.0005105942154841614,
      "clip_ratio/low_min": 2.6047093342640437e-05,
      "clip_ratio/region_mean": 0.001260720135178417,
      "completions/clipped_ratio": 0.1116071428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1612.0,
      "completions/mean_length": 890.4129638671875,
      "completions/mean_terminated_length": 487.70098876953125,
      "completions/min_length": 151.0,
      "completions/min_terminated_length": 151.0,
      "epoch": 12.960932944606414,
      "grad_norm": 0.2648910880088806,
      "learning_rate": 1e-06,
      "loss": -0.0517,
      "num_tokens": 756457031.0,
      "reward": 0.6551339626312256,
      "reward_std": 0.14507775008678436,
      "rewards/verify_math_reward/mean": 0.6551339030265808,
      "rewards/verify_math_reward/std": 0.4755900502204895,
      "step": 1387
    },
    {
      "clip_ratio/high_max": 0.0019278808067610953,
      "clip_ratio/high_mean": 0.0006655543675151421,
      "clip_ratio/low_mean": 0.0002514504044484056,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009170047651423374,
      "completions/clipped_ratio": 0.1082589285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2854.0,
      "completions/mean_length": 891.9263916015625,
      "completions/mean_terminated_length": 502.9461975097656,
      "completions/min_length": 135.0,
      "completions/min_terminated_length": 135.0,
      "epoch": 12.970262390670554,
      "grad_norm": 0.35309767723083496,
      "learning_rate": 1e-06,
      "loss": -0.0327,
      "num_tokens": 756946101.0,
      "reward": 0.6462053656578064,
      "reward_std": 0.10867238789796829,
      "rewards/verify_math_reward/mean": 0.6462053656578064,
      "rewards/verify_math_reward/std": 0.478413462638855,
      "step": 1388
    },
    {
      "clip_ratio/high_max": 0.0013791169112664647,
      "clip_ratio/high_mean": 0.00040248090772365686,
      "clip_ratio/low_mean": 0.00025305602434855246,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006555369454872562,
      "completions/clipped_ratio": 0.1372767857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4047.0,
      "completions/mean_length": 1010.2020263671875,
      "completions/mean_terminated_length": 519.1889038085938,
      "completions/min_length": 155.0,
      "completions/min_terminated_length": 155.0,
      "epoch": 12.979591836734693,
      "grad_norm": 0.2338443100452423,
      "learning_rate": 1e-06,
      "loss": -0.0402,
      "num_tokens": 757438650.0,
      "reward": 0.6261160969734192,
      "reward_std": 0.09333452582359314,
      "rewards/verify_math_reward/mean": 0.6261160969734192,
      "rewards/verify_math_reward/std": 0.48410359025001526,
      "step": 1389
    },
    {
      "clip_ratio/high_max": 0.0017437691567465663,
      "clip_ratio/high_mean": 0.0005722645419155015,
      "clip_ratio/low_mean": 0.00034574814435472945,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009180126762657892,
      "completions/clipped_ratio": 0.1283482142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3830.0,
      "completions/mean_length": 1025.6082763671875,
      "completions/mean_terminated_length": 573.501953125,
      "completions/min_length": 109.0,
      "completions/min_terminated_length": 109.0,
      "epoch": 12.988921282798835,
      "grad_norm": 0.22266581654548645,
      "learning_rate": 1e-06,
      "loss": -0.0766,
      "num_tokens": 757975123.0,
      "reward": 0.6417410969734192,
      "reward_std": 0.14158402383327484,
      "rewards/verify_math_reward/mean": 0.6417410969734192,
      "rewards/verify_math_reward/std": 0.47975659370422363,
      "step": 1390
    },
    {
      "clip_ratio/high_max": 0.0015518709733441938,
      "clip_ratio/high_mean": 0.0005661605073328246,
      "clip_ratio/low_mean": 0.0002864747480089136,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008526352467015386,
      "completions/clipped_ratio": 0.07670454545454541,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2614.0,
      "completions/mean_length": 853.1704711914062,
      "completions/mean_terminated_length": 583.7661743164062,
      "completions/min_length": 145.0,
      "completions/min_terminated_length": 145.0,
      "epoch": 12.998250728862974,
      "grad_norm": 0.23627407848834991,
      "learning_rate": 1e-06,
      "loss": -0.0371,
      "num_tokens": 758497122.0,
      "reward": 0.6551339626312256,
      "reward_std": 0.11727241426706314,
      "rewards/verify_math_reward/mean": 0.6551339030265808,
      "rewards/verify_math_reward/std": 0.4755900502204895,
      "step": 1391
    },
    {
      "clip_ratio/high_max": 0.0015648082808183972,
      "clip_ratio/high_mean": 0.0004424165472300956,
      "clip_ratio/low_mean": 0.00036852742709925224,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008109439877443947,
      "completions/clipped_ratio": 0.1104910714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3467.0,
      "completions/mean_length": 931.3951416015625,
      "completions/mean_terminated_length": 538.3011474609375,
      "completions/min_length": 139.0,
      "completions/min_terminated_length": 139.0,
      "epoch": 13.00932944606414,
      "grad_norm": 0.20707617700099945,
      "learning_rate": 1e-06,
      "loss": -0.0376,
      "num_tokens": 759014796.0,
      "reward": 0.625,
      "reward_std": 0.11050571501255035,
      "rewards/verify_math_reward/mean": 0.625,
      "rewards/verify_math_reward/std": 0.48439329862594604,
      "step": 1392
    },
    {
      "clip_ratio/high_max": 0.001504514177213423,
      "clip_ratio/high_mean": 0.0005346524048945867,
      "clip_ratio/low_mean": 0.0003906382294189825,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009252906565961894,
      "completions/clipped_ratio": 0.0881696428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3606.0,
      "completions/mean_length": 900.5045166015625,
      "completions/mean_terminated_length": 591.5153198242188,
      "completions/min_length": 152.0,
      "completions/min_terminated_length": 152.0,
      "epoch": 13.018658892128279,
      "grad_norm": 0.3734883666038513,
      "learning_rate": 1e-06,
      "loss": -0.0196,
      "num_tokens": 759577432.0,
      "reward": 0.7064732313156128,
      "reward_std": 0.13042137026786804,
      "rewards/verify_math_reward/mean": 0.7064732313156128,
      "rewards/verify_math_reward/std": 0.4556320011615753,
      "step": 1393
    },
    {
      "clip_ratio/high_max": 0.0021092421702633146,
      "clip_ratio/high_mean": 0.0007486360846087337,
      "clip_ratio/low_mean": 0.0004456653541637934,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011943014796997886,
      "completions/clipped_ratio": 0.1462053571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2815.0,
      "completions/mean_length": 1086.9754638671875,
      "completions/mean_terminated_length": 571.70458984375,
      "completions/min_length": 166.0,
      "completions/min_terminated_length": 166.0,
      "epoch": 13.02798833819242,
      "grad_norm": 0.24691501259803772,
      "learning_rate": 1e-06,
      "loss": -0.0442,
      "num_tokens": 760105314.0,
      "reward": 0.5725446939468384,
      "reward_std": 0.15541164577007294,
      "rewards/verify_math_reward/mean": 0.5725446343421936,
      "rewards/verify_math_reward/std": 0.49498558044433594,
      "step": 1394
    },
    {
      "clip_ratio/high_max": 0.0016860339765116805,
      "clip_ratio/high_mean": 0.000513932033754827,
      "clip_ratio/low_mean": 0.00025348205599584617,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007674140888411785,
      "completions/clipped_ratio": 0.109375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2764.0,
      "completions/mean_length": 901.1295166015625,
      "completions/mean_terminated_length": 508.7769470214844,
      "completions/min_length": 90.0,
      "completions/min_terminated_length": 90.0,
      "epoch": 13.03731778425656,
      "grad_norm": 0.23121798038482666,
      "learning_rate": 1e-06,
      "loss": -0.0455,
      "num_tokens": 760598846.0,
      "reward": 0.668526828289032,
      "reward_std": 0.10708937793970108,
      "rewards/verify_math_reward/mean": 0.6685267686843872,
      "rewards/verify_math_reward/std": 0.4710056483745575,
      "step": 1395
    },
    {
      "clip_ratio/high_max": 0.001584160105267074,
      "clip_ratio/high_mean": 0.0005649628828905406,
      "clip_ratio/low_mean": 0.0003471491600066656,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000912112052901648,
      "completions/clipped_ratio": 0.1071428571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3119.0,
      "completions/mean_length": 912.9933471679688,
      "completions/mean_terminated_length": 531.032470703125,
      "completions/min_length": 160.0,
      "completions/min_terminated_length": 160.0,
      "epoch": 13.0466472303207,
      "grad_norm": 0.2560865581035614,
      "learning_rate": 1e-06,
      "loss": -0.0433,
      "num_tokens": 761115712.0,
      "reward": 0.6473214626312256,
      "reward_std": 0.13215592503547668,
      "rewards/verify_math_reward/mean": 0.6473214030265808,
      "rewards/verify_math_reward/std": 0.47807058691978455,
      "step": 1396
    },
    {
      "clip_ratio/high_max": 0.0018003426266659517,
      "clip_ratio/high_mean": 0.0005776014768343884,
      "clip_ratio/low_mean": 0.00042263580576218374,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010002372819144512,
      "completions/clipped_ratio": 0.0881696428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2059.0,
      "completions/mean_length": 826.4096069335938,
      "completions/mean_terminated_length": 510.2558288574219,
      "completions/min_length": 127.0,
      "completions/min_terminated_length": 127.0,
      "epoch": 13.055976676384839,
      "grad_norm": 0.23720556497573853,
      "learning_rate": 1e-06,
      "loss": -0.0284,
      "num_tokens": 761625543.0,
      "reward": 0.6986607313156128,
      "reward_std": 0.11468091607093811,
      "rewards/verify_math_reward/mean": 0.6986607313156128,
      "rewards/verify_math_reward/std": 0.4590960443019867,
      "step": 1397
    },
    {
      "clip_ratio/high_max": 0.0019789553443843033,
      "clip_ratio/high_mean": 0.0007179691847341019,
      "clip_ratio/low_mean": 0.00040239865552393894,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011203678404854145,
      "completions/clipped_ratio": 0.0904017857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3735.0,
      "completions/mean_length": 849.7957763671875,
      "completions/mean_terminated_length": 527.1668701171875,
      "completions/min_length": 104.0,
      "completions/min_terminated_length": 104.0,
      "epoch": 13.06530612244898,
      "grad_norm": 0.2802946865558624,
      "learning_rate": 1e-06,
      "loss": -0.021,
      "num_tokens": 762133072.0,
      "reward": 0.7332589626312256,
      "reward_std": 0.1350441575050354,
      "rewards/verify_math_reward/mean": 0.7332589030265808,
      "rewards/verify_math_reward/std": 0.4425029158592224,
      "step": 1398
    },
    {
      "clip_ratio/high_max": 0.0019997288036393,
      "clip_ratio/high_mean": 0.000648982097118278,
      "clip_ratio/low_mean": 0.00043257709376121056,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001081559184967773,
      "completions/clipped_ratio": 0.0993303571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3993.0,
      "completions/mean_length": 900.3817138671875,
      "completions/mean_terminated_length": 547.9529418945312,
      "completions/min_length": 163.0,
      "completions/min_terminated_length": 163.0,
      "epoch": 13.07463556851312,
      "grad_norm": 0.24073903262615204,
      "learning_rate": 1e-06,
      "loss": -0.0426,
      "num_tokens": 762663694.0,
      "reward": 0.6729910969734192,
      "reward_std": 0.13624738156795502,
      "rewards/verify_math_reward/mean": 0.6729910969734192,
      "rewards/verify_math_reward/std": 0.46938255429267883,
      "step": 1399
    },
    {
      "clip_ratio/high_max": 0.002562109522841638,
      "clip_ratio/high_mean": 0.0007564049938082462,
      "clip_ratio/low_mean": 0.00029234775456643547,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010487527415534714,
      "completions/clipped_ratio": 0.1238839285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3513.0,
      "completions/mean_length": 989.154052734375,
      "completions/mean_terminated_length": 549.842041015625,
      "completions/min_length": 164.0,
      "completions/min_terminated_length": 164.0,
      "epoch": 13.08396501457726,
      "grad_norm": 0.4724392592906952,
      "learning_rate": 1e-06,
      "loss": -0.0522,
      "num_tokens": 763192256.0,
      "reward": 0.6774553656578064,
      "reward_std": 0.11617907881736755,
      "rewards/verify_math_reward/mean": 0.6774553656578064,
      "rewards/verify_math_reward/std": 0.4677111804485321,
      "step": 1400
    },
    {
      "clip_ratio/high_max": 0.0011260485043749213,
      "clip_ratio/high_mean": 0.00043029518383264076,
      "clip_ratio/low_mean": 0.00026256952924086363,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006928647289896617,
      "completions/clipped_ratio": 0.1104910714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3987.0,
      "completions/mean_length": 935.27685546875,
      "completions/mean_terminated_length": 542.6649780273438,
      "completions/min_length": 118.0,
      "completions/min_terminated_length": 118.0,
      "epoch": 13.093294460641399,
      "grad_norm": 0.20758171379566193,
      "learning_rate": 1e-06,
      "loss": -0.041,
      "num_tokens": 763706592.0,
      "reward": 0.6808035969734192,
      "reward_std": 0.10987518727779388,
      "rewards/verify_math_reward/mean": 0.6808035969734192,
      "rewards/verify_math_reward/std": 0.46642565727233887,
      "step": 1401
    },
    {
      "clip_ratio/high_max": 0.0015355567702499684,
      "clip_ratio/high_mean": 0.0005331959373506834,
      "clip_ratio/low_mean": 0.0002480945679508295,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007812905132595915,
      "completions/clipped_ratio": 0.0904017857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3594.0,
      "completions/mean_length": 866.8370971679688,
      "completions/mean_terminated_length": 545.90185546875,
      "completions/min_length": 136.0,
      "completions/min_terminated_length": 136.0,
      "epoch": 13.102623906705539,
      "grad_norm": 0.2250635325908661,
      "learning_rate": 1e-06,
      "loss": -0.0561,
      "num_tokens": 764243726.0,
      "reward": 0.7109375596046448,
      "reward_std": 0.1287727802991867,
      "rewards/verify_math_reward/mean": 0.7109375,
      "rewards/verify_math_reward/std": 0.45358020067214966,
      "step": 1402
    },
    {
      "clip_ratio/high_max": 0.0014361132161866408,
      "clip_ratio/high_mean": 0.0005224953333708982,
      "clip_ratio/low_mean": 0.00032753702953414177,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008500323574480717,
      "completions/clipped_ratio": 0.1428571428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4012.0,
      "completions/mean_length": 1042.0513916015625,
      "completions/mean_terminated_length": 533.0599365234375,
      "completions/min_length": 130.0,
      "completions/min_terminated_length": 130.0,
      "epoch": 13.11195335276968,
      "grad_norm": 0.20067481696605682,
      "learning_rate": 1e-06,
      "loss": -0.0431,
      "num_tokens": 764748476.0,
      "reward": 0.6149553656578064,
      "reward_std": 0.10919302701950073,
      "rewards/verify_math_reward/mean": 0.6149553656578064,
      "rewards/verify_math_reward/std": 0.4868776500225067,
      "step": 1403
    },
    {
      "clip_ratio/high_max": 0.0017143203658633865,
      "clip_ratio/high_mean": 0.0006862149748485535,
      "clip_ratio/low_mean": 0.0004324463725424721,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011186613301106263,
      "completions/clipped_ratio": 0.1261160714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3250.0,
      "completions/mean_length": 987.5670166015625,
      "completions/mean_terminated_length": 538.9680786132812,
      "completions/min_length": 125.0,
      "completions/min_terminated_length": 125.0,
      "epoch": 13.12128279883382,
      "grad_norm": 0.250449538230896,
      "learning_rate": 1e-06,
      "loss": -0.0492,
      "num_tokens": 765262008.0,
      "reward": 0.6484375,
      "reward_std": 0.14350220561027527,
      "rewards/verify_math_reward/mean": 0.6484375,
      "rewards/verify_math_reward/std": 0.4777248501777649,
      "step": 1404
    },
    {
      "clip_ratio/high_max": 0.00151262997314916,
      "clip_ratio/high_mean": 0.0005685164214810356,
      "clip_ratio/low_mean": 0.0004395626838231692,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010080791180371307,
      "completions/clipped_ratio": 0.1417410714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3900.0,
      "completions/mean_length": 1107.204345703125,
      "completions/mean_terminated_length": 613.60595703125,
      "completions/min_length": 151.0,
      "completions/min_terminated_length": 151.0,
      "epoch": 13.130612244897959,
      "grad_norm": 0.3064783811569214,
      "learning_rate": 1e-06,
      "loss": -0.044,
      "num_tokens": 765829791.0,
      "reward": 0.6004464626312256,
      "reward_std": 0.13339374959468842,
      "rewards/verify_math_reward/mean": 0.6004464030265808,
      "rewards/verify_math_reward/std": 0.49008017778396606,
      "step": 1405
    },
    {
      "clip_ratio/high_max": 0.002421735345706111,
      "clip_ratio/high_mean": 0.0008009010516616399,
      "clip_ratio/low_mean": 0.00047693476335552987,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012778357995557599,
      "completions/clipped_ratio": 0.1194196428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3001.0,
      "completions/mean_length": 982.33935546875,
      "completions/mean_terminated_length": 560.0811157226562,
      "completions/min_length": 134.0,
      "completions/min_terminated_length": 134.0,
      "epoch": 13.139941690962099,
      "grad_norm": 0.275840699672699,
      "learning_rate": 1e-06,
      "loss": -0.0446,
      "num_tokens": 766365839.0,
      "reward": 0.6462053656578064,
      "reward_std": 0.14440374076366425,
      "rewards/verify_math_reward/mean": 0.6462053656578064,
      "rewards/verify_math_reward/std": 0.478413462638855,
      "step": 1406
    },
    {
      "clip_ratio/high_max": 0.0023350001429207623,
      "clip_ratio/high_mean": 0.0007682941577513702,
      "clip_ratio/low_mean": 0.0004151979937887518,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011834921679110266,
      "completions/clipped_ratio": 0.0837053571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3665.0,
      "completions/mean_length": 827.6417846679688,
      "completions/mean_terminated_length": 529.0706787109375,
      "completions/min_length": 129.0,
      "completions/min_terminated_length": 129.0,
      "epoch": 13.14927113702624,
      "grad_norm": 0.24996188282966614,
      "learning_rate": 1e-06,
      "loss": -0.0533,
      "num_tokens": 766881086.0,
      "reward": 0.7254464626312256,
      "reward_std": 0.1504889577627182,
      "rewards/verify_math_reward/mean": 0.7254464030265808,
      "rewards/verify_math_reward/std": 0.4465382993221283,
      "step": 1407
    },
    {
      "clip_ratio/high_max": 0.0017211684680660255,
      "clip_ratio/high_mean": 0.0005624990271826391,
      "clip_ratio/low_mean": 0.0002965925878015696,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008590916058892617,
      "completions/clipped_ratio": 0.1026785714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2450.0,
      "completions/mean_length": 923.786865234375,
      "completions/mean_terminated_length": 560.7972412109375,
      "completions/min_length": 138.0,
      "completions/min_terminated_length": 138.0,
      "epoch": 13.15860058309038,
      "grad_norm": 0.6004595756530762,
      "learning_rate": 1e-06,
      "loss": -0.0213,
      "num_tokens": 767425871.0,
      "reward": 0.645089328289032,
      "reward_std": 0.11963889002799988,
      "rewards/verify_math_reward/mean": 0.6450892686843872,
      "rewards/verify_math_reward/std": 0.4787535071372986,
      "step": 1408
    },
    {
      "clip_ratio/high_max": 0.001921835766552249,
      "clip_ratio/high_mean": 0.0005753110599471256,
      "clip_ratio/low_mean": 0.0003687656890178914,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009440767371415859,
      "completions/clipped_ratio": 0.1227678571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2979.0,
      "completions/mean_length": 973.693115234375,
      "completions/mean_terminated_length": 536.72900390625,
      "completions/min_length": 142.0,
      "completions/min_terminated_length": 142.0,
      "epoch": 13.167930029154519,
      "grad_norm": 9.168828964233398,
      "learning_rate": 1e-06,
      "loss": -0.0427,
      "num_tokens": 767954332.0,
      "reward": 0.590401828289032,
      "reward_std": 0.12651577591896057,
      "rewards/verify_math_reward/mean": 0.5904017686843872,
      "rewards/verify_math_reward/std": 0.49203425645828247,
      "step": 1409
    },
    {
      "clip_ratio/high_max": 0.0019108351189061068,
      "clip_ratio/high_mean": 0.0007167610619944753,
      "clip_ratio/low_mean": 0.00038995804061414674,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011067190862377174,
      "completions/clipped_ratio": 0.0803571428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2387.0,
      "completions/mean_length": 803.6808471679688,
      "completions/mean_terminated_length": 516.00244140625,
      "completions/min_length": 145.0,
      "completions/min_terminated_length": 145.0,
      "epoch": 13.177259475218658,
      "grad_norm": 0.26358672976493835,
      "learning_rate": 1e-06,
      "loss": -0.0429,
      "num_tokens": 768473718.0,
      "reward": 0.7667410969734192,
      "reward_std": 0.12482510507106781,
      "rewards/verify_math_reward/mean": 0.7667410969734192,
      "rewards/verify_math_reward/std": 0.42314186692237854,
      "step": 1410
    },
    {
      "clip_ratio/high_max": 0.0019463468961475883,
      "clip_ratio/high_mean": 0.0006621730399274384,
      "clip_ratio/low_mean": 0.00044918252797288005,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011113555610791082,
      "completions/clipped_ratio": 0.1015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2986.0,
      "completions/mean_length": 893.7109985351562,
      "completions/mean_terminated_length": 531.7130126953125,
      "completions/min_length": 146.0,
      "completions/min_terminated_length": 146.0,
      "epoch": 13.186588921282798,
      "grad_norm": 0.2778402864933014,
      "learning_rate": 1e-06,
      "loss": -0.0847,
      "num_tokens": 768986131.0,
      "reward": 0.7254464626312256,
      "reward_std": 0.15094542503356934,
      "rewards/verify_math_reward/mean": 0.7254464030265808,
      "rewards/verify_math_reward/std": 0.4465382993221283,
      "step": 1411
    },
    {
      "clip_ratio/high_max": 0.0022086741228122264,
      "clip_ratio/high_mean": 0.00074517638040561,
      "clip_ratio/low_mean": 0.0005234462605585577,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012686226509686094,
      "completions/clipped_ratio": 0.1450892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3485.0,
      "completions/mean_length": 1080.279052734375,
      "completions/mean_terminated_length": 568.4725952148438,
      "completions/min_length": 167.0,
      "completions/min_terminated_length": 167.0,
      "epoch": 13.19591836734694,
      "grad_norm": 0.2508910000324249,
      "learning_rate": 1e-06,
      "loss": -0.0595,
      "num_tokens": 769516429.0,
      "reward": 0.6049107313156128,
      "reward_std": 0.15988317131996155,
      "rewards/verify_math_reward/mean": 0.6049107313156128,
      "rewards/verify_math_reward/std": 0.48914292454719543,
      "step": 1412
    },
    {
      "clip_ratio/high_max": 0.0021129219530848786,
      "clip_ratio/high_mean": 0.0008120713246171363,
      "clip_ratio/low_mean": 0.00045953432436363073,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012716056262433995,
      "completions/clipped_ratio": 0.1116071428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3032.0,
      "completions/mean_length": 909.0960083007812,
      "completions/mean_terminated_length": 508.73114013671875,
      "completions/min_length": 139.0,
      "completions/min_terminated_length": 139.0,
      "epoch": 13.205247813411079,
      "grad_norm": 0.4871099591255188,
      "learning_rate": 1e-06,
      "loss": -0.0572,
      "num_tokens": 770010651.0,
      "reward": 0.6718750596046448,
      "reward_std": 0.14842741191387177,
      "rewards/verify_math_reward/mean": 0.671875,
      "rewards/verify_math_reward/std": 0.46979284286499023,
      "step": 1413
    },
    {
      "clip_ratio/high_max": 0.0020849489519605413,
      "clip_ratio/high_mean": 0.0008023624959605513,
      "clip_ratio/low_mean": 0.0004160077241976978,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012183701910544187,
      "completions/clipped_ratio": 0.125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1939.0,
      "completions/mean_length": 979.4888916015625,
      "completions/mean_terminated_length": 534.27294921875,
      "completions/min_length": 115.0,
      "completions/min_terminated_length": 115.0,
      "epoch": 13.214577259475218,
      "grad_norm": 0.29521510004997253,
      "learning_rate": 1e-06,
      "loss": -0.0411,
      "num_tokens": 770518217.0,
      "reward": 0.6272321939468384,
      "reward_std": 0.15331120789051056,
      "rewards/verify_math_reward/mean": 0.6272321343421936,
      "rewards/verify_math_reward/std": 0.4838111698627472,
      "step": 1414
    },
    {
      "clip_ratio/high_max": 0.0017796547799662221,
      "clip_ratio/high_mean": 0.0006175994385557715,
      "clip_ratio/low_mean": 0.0004503474581269984,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010679468869057018,
      "completions/clipped_ratio": 0.1082589285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3553.0,
      "completions/mean_length": 942.5335083007812,
      "completions/mean_terminated_length": 559.6971435546875,
      "completions/min_length": 154.0,
      "completions/min_terminated_length": 154.0,
      "epoch": 13.223906705539358,
      "grad_norm": 0.338432252407074,
      "learning_rate": 1e-06,
      "loss": -0.0538,
      "num_tokens": 771061911.0,
      "reward": 0.6618303656578064,
      "reward_std": 0.14451110363006592,
      "rewards/verify_math_reward/mean": 0.6618303656578064,
      "rewards/verify_math_reward/std": 0.4733508229255676,
      "step": 1415
    },
    {
      "clip_ratio/high_max": 0.0017426106714992784,
      "clip_ratio/high_mean": 0.0005744925165345194,
      "clip_ratio/low_mean": 0.0002698533530747227,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008443458555120742,
      "completions/clipped_ratio": 0.1004464285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2392.0,
      "completions/mean_length": 861.1127319335938,
      "completions/mean_terminated_length": 499.8970031738281,
      "completions/min_length": 100.0,
      "completions/min_terminated_length": 100.0,
      "epoch": 13.2332361516035,
      "grad_norm": 0.9795376658439636,
      "learning_rate": 1e-06,
      "loss": -0.049,
      "num_tokens": 771560692.0,
      "reward": 0.6417410969734192,
      "reward_std": 0.11314068734645844,
      "rewards/verify_math_reward/mean": 0.6417410969734192,
      "rewards/verify_math_reward/std": 0.47975656390190125,
      "step": 1416
    },
    {
      "clip_ratio/high_max": 0.0021070267575851176,
      "clip_ratio/high_mean": 0.000787390230470919,
      "clip_ratio/low_mean": 0.0003916053956345422,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011789956006396096,
      "completions/clipped_ratio": 0.0948660714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2024.0,
      "completions/mean_length": 901.21435546875,
      "completions/mean_terminated_length": 566.3723754882812,
      "completions/min_length": 147.0,
      "completions/min_terminated_length": 147.0,
      "epoch": 13.242565597667639,
      "grad_norm": 0.6604495048522949,
      "learning_rate": 1e-06,
      "loss": -0.0287,
      "num_tokens": 772100260.0,
      "reward": 0.676339328289032,
      "reward_std": 0.13970720767974854,
      "rewards/verify_math_reward/mean": 0.6763392686843872,
      "rewards/verify_math_reward/std": 0.4681335985660553,
      "step": 1417
    },
    {
      "clip_ratio/high_max": 0.001792183225916233,
      "clip_ratio/high_mean": 0.0006919041243236279,
      "clip_ratio/low_mean": 0.0004517107954598032,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011436149397923145,
      "completions/clipped_ratio": 0.1026785714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3738.0,
      "completions/mean_length": 930.6551513671875,
      "completions/mean_terminated_length": 568.4514770507812,
      "completions/min_length": 159.0,
      "completions/min_terminated_length": 159.0,
      "epoch": 13.251895043731778,
      "grad_norm": 0.2922399044036865,
      "learning_rate": 1e-06,
      "loss": -0.0396,
      "num_tokens": 772650519.0,
      "reward": 0.6830357313156128,
      "reward_std": 0.14564089477062225,
      "rewards/verify_math_reward/mean": 0.6830357313156128,
      "rewards/verify_math_reward/std": 0.46555325388908386,
      "step": 1418
    },
    {
      "clip_ratio/high_max": 0.001605254807145684,
      "clip_ratio/high_mean": 0.0005620021677259501,
      "clip_ratio/low_mean": 0.00038511040793309803,
      "clip_ratio/low_min": 2.6354311557952315e-05,
      "clip_ratio/region_mean": 0.0009471125977142947,
      "completions/clipped_ratio": 0.0825892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2341.0,
      "completions/mean_length": 880.357177734375,
      "completions/mean_terminated_length": 590.8710327148438,
      "completions/min_length": 173.0,
      "completions/min_terminated_length": 173.0,
      "epoch": 13.261224489795918,
      "grad_norm": 0.23736920952796936,
      "learning_rate": 1e-06,
      "loss": -0.0526,
      "num_tokens": 773225039.0,
      "reward": 0.652901828289032,
      "reward_std": 0.14560584723949432,
      "rewards/verify_math_reward/mean": 0.6529017686843872,
      "rewards/verify_math_reward/std": 0.47631317377090454,
      "step": 1419
    },
    {
      "clip_ratio/high_max": 0.0021306603885022923,
      "clip_ratio/high_mean": 0.0007419451176247094,
      "clip_ratio/low_mean": 0.00027688059344654903,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010188257110712584,
      "completions/clipped_ratio": 0.1071428571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2853.0,
      "completions/mean_length": 880.5636596679688,
      "completions/mean_terminated_length": 494.71124267578125,
      "completions/min_length": 170.0,
      "completions/min_terminated_length": 170.0,
      "epoch": 13.270553935860057,
      "grad_norm": 0.2683456242084503,
      "learning_rate": 1e-06,
      "loss": -0.0686,
      "num_tokens": 773715688.0,
      "reward": 0.723214328289032,
      "reward_std": 0.12723001837730408,
      "rewards/verify_math_reward/mean": 0.7232142686843872,
      "rewards/verify_math_reward/std": 0.44765952229499817,
      "step": 1420
    },
    {
      "clip_ratio/high_max": 0.0016470260743517429,
      "clip_ratio/high_mean": 0.000479669638480118,
      "clip_ratio/low_mean": 0.00032177978164327214,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.00080144939784077,
      "completions/clipped_ratio": 0.0881696428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3451.0,
      "completions/mean_length": 834.5413208007812,
      "completions/mean_terminated_length": 519.173828125,
      "completions/min_length": 141.0,
      "completions/min_terminated_length": 141.0,
      "epoch": 13.279883381924199,
      "grad_norm": 0.5516514182090759,
      "learning_rate": 1e-06,
      "loss": -0.0316,
      "num_tokens": 774220701.0,
      "reward": 0.6852678656578064,
      "reward_std": 0.08503435552120209,
      "rewards/verify_math_reward/mean": 0.6852678656578064,
      "rewards/verify_math_reward/std": 0.4646684527397156,
      "step": 1421
    },
    {
      "clip_ratio/high_max": 0.0023291828474611975,
      "clip_ratio/high_mean": 0.0007562954651803011,
      "clip_ratio/low_mean": 0.0004190416448182077,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011753371181839611,
      "completions/clipped_ratio": 0.1104910714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3014.0,
      "completions/mean_length": 956.0960083007812,
      "completions/mean_terminated_length": 566.0702514648438,
      "completions/min_length": 105.0,
      "completions/min_terminated_length": 105.0,
      "epoch": 13.289212827988338,
      "grad_norm": 0.2889426648616791,
      "learning_rate": 1e-06,
      "loss": -0.0573,
      "num_tokens": 774763931.0,
      "reward": 0.6752232313156128,
      "reward_std": 0.15811371803283691,
      "rewards/verify_math_reward/mean": 0.6752232313156128,
      "rewards/verify_math_reward/std": 0.46855294704437256,
      "step": 1422
    },
    {
      "clip_ratio/high_max": 0.0013299583024490857,
      "clip_ratio/high_mean": 0.00039771064666638267,
      "clip_ratio/low_mean": 0.00024158328778867144,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006392939449142432,
      "completions/clipped_ratio": 0.0814732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2034.0,
      "completions/mean_length": 812.0636596679688,
      "completions/mean_terminated_length": 520.7788696289062,
      "completions/min_length": 137.0,
      "completions/min_terminated_length": 137.0,
      "epoch": 13.298542274052478,
      "grad_norm": 0.263927161693573,
      "learning_rate": 1e-06,
      "loss": -0.0161,
      "num_tokens": 775281860.0,
      "reward": 0.7098214626312256,
      "reward_std": 0.09055617451667786,
      "rewards/verify_math_reward/mean": 0.7098214030265808,
      "rewards/verify_math_reward/std": 0.454098105430603,
      "step": 1423
    },
    {
      "clip_ratio/high_max": 0.0015924356375762727,
      "clip_ratio/high_mean": 0.0004741024454233411,
      "clip_ratio/low_mean": 0.0002808431995617866,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007549456604465377,
      "completions/clipped_ratio": 0.1049107142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2852.0,
      "completions/mean_length": 905.3761596679688,
      "completions/mean_terminated_length": 531.4127197265625,
      "completions/min_length": 155.0,
      "completions/min_terminated_length": 155.0,
      "epoch": 13.307871720116617,
      "grad_norm": 0.18549026548862457,
      "learning_rate": 1e-06,
      "loss": -0.0511,
      "num_tokens": 775801477.0,
      "reward": 0.6149553656578064,
      "reward_std": 0.10626451671123505,
      "rewards/verify_math_reward/mean": 0.6149553656578064,
      "rewards/verify_math_reward/std": 0.4868776500225067,
      "step": 1424
    },
    {
      "clip_ratio/high_max": 0.0018041823350358754,
      "clip_ratio/high_mean": 0.0006176234282975201,
      "clip_ratio/low_mean": 0.00038572605035369634,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001003349490929395,
      "completions/clipped_ratio": 0.1138392857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4017.0,
      "completions/mean_length": 970.0201416015625,
      "completions/mean_terminated_length": 568.44580078125,
      "completions/min_length": 129.0,
      "completions/min_terminated_length": 129.0,
      "epoch": 13.317201166180759,
      "grad_norm": 0.30052486062049866,
      "learning_rate": 1e-06,
      "loss": -0.0686,
      "num_tokens": 776333047.0,
      "reward": 0.5814732313156128,
      "reward_std": 0.15950380265712738,
      "rewards/verify_math_reward/mean": 0.5814732313156128,
      "rewards/verify_math_reward/std": 0.4935929775238037,
      "step": 1425
    },
    {
      "clip_ratio/high_max": 0.0017893044496304356,
      "clip_ratio/high_mean": 0.0005078815574961482,
      "clip_ratio/low_mean": 0.0004161108709013206,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009239924329449423,
      "completions/clipped_ratio": 0.0881696428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3203.0,
      "completions/mean_length": 864.2210083007812,
      "completions/mean_terminated_length": 551.723388671875,
      "completions/min_length": 92.0,
      "completions/min_terminated_length": 92.0,
      "epoch": 13.326530612244898,
      "grad_norm": 0.21070371568202972,
      "learning_rate": 1e-06,
      "loss": -0.0319,
      "num_tokens": 776868381.0,
      "reward": 0.652901828289032,
      "reward_std": 0.11655885726213455,
      "rewards/verify_math_reward/mean": 0.6529017686843872,
      "rewards/verify_math_reward/std": 0.47631317377090454,
      "step": 1426
    },
    {
      "clip_ratio/high_max": 0.0019182517717126757,
      "clip_ratio/high_mean": 0.0006882018933538347,
      "clip_ratio/low_mean": 0.00025237160048163787,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009405734635947738,
      "completions/clipped_ratio": 0.1171875,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3832.0,
      "completions/mean_length": 992.2277221679688,
      "completions/mean_terminated_length": 580.2225341796875,
      "completions/min_length": 109.0,
      "completions/min_terminated_length": 109.0,
      "epoch": 13.335860058309038,
      "grad_norm": 0.26085469126701355,
      "learning_rate": 1e-06,
      "loss": -0.0508,
      "num_tokens": 777414201.0,
      "reward": 0.6540178656578064,
      "reward_std": 0.12963788211345673,
      "rewards/verify_math_reward/mean": 0.6540178656578064,
      "rewards/verify_math_reward/std": 0.4759531021118164,
      "step": 1427
    },
    {
      "clip_ratio/high_max": 0.0021280049040797167,
      "clip_ratio/high_mean": 0.0006637554033659399,
      "clip_ratio/low_mean": 0.0003439082952354511,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010076637081510853,
      "completions/clipped_ratio": 0.0814732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3688.0,
      "completions/mean_length": 857.2545166015625,
      "completions/mean_terminated_length": 569.9781494140625,
      "completions/min_length": 109.0,
      "completions/min_terminated_length": 109.0,
      "epoch": 13.345189504373177,
      "grad_norm": 0.2858824133872986,
      "learning_rate": 1e-06,
      "loss": -0.0365,
      "num_tokens": 777971573.0,
      "reward": 0.6718750596046448,
      "reward_std": 0.14609484374523163,
      "rewards/verify_math_reward/mean": 0.671875,
      "rewards/verify_math_reward/std": 0.46979284286499023,
      "step": 1428
    },
    {
      "clip_ratio/high_max": 0.001930807720782468,
      "clip_ratio/high_mean": 0.0007317347431126109,
      "clip_ratio/low_mean": 0.0003103565891251492,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010420913240523078,
      "completions/clipped_ratio": 0.0970982142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4082.0,
      "completions/mean_length": 875.6629638671875,
      "completions/mean_terminated_length": 529.3473510742188,
      "completions/min_length": 170.0,
      "completions/min_terminated_length": 170.0,
      "epoch": 13.354518950437317,
      "grad_norm": 0.4686256945133209,
      "learning_rate": 1e-06,
      "loss": -0.0564,
      "num_tokens": 778481751.0,
      "reward": 0.7020089626312256,
      "reward_std": 0.129900723695755,
      "rewards/verify_math_reward/mean": 0.7020089030265808,
      "rewards/verify_math_reward/std": 0.45763099193573,
      "step": 1429
    },
    {
      "clip_ratio/high_max": 0.0016534370079170913,
      "clip_ratio/high_mean": 0.0006498158982140012,
      "clip_ratio/low_mean": 0.0005216361232669442,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011714520296663977,
      "completions/clipped_ratio": 0.1071428571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3980.0,
      "completions/mean_length": 910.4777221679688,
      "completions/mean_terminated_length": 528.2149658203125,
      "completions/min_length": 136.0,
      "completions/min_terminated_length": 136.0,
      "epoch": 13.363848396501458,
      "grad_norm": 0.27099400758743286,
      "learning_rate": 1e-06,
      "loss": -0.0518,
      "num_tokens": 778984387.0,
      "reward": 0.6662946939468384,
      "reward_std": 0.14522719383239746,
      "rewards/verify_math_reward/mean": 0.6662946343421936,
      "rewards/verify_math_reward/std": 0.47179922461509705,
      "step": 1430
    },
    {
      "clip_ratio/high_max": 0.0016680276712577324,
      "clip_ratio/high_mean": 0.0005007668742109672,
      "clip_ratio/low_mean": 0.0003237281475776399,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000824495011329418,
      "completions/clipped_ratio": 0.0970982142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2298.0,
      "completions/mean_length": 824.075927734375,
      "completions/mean_terminated_length": 472.2126159667969,
      "completions/min_length": 119.0,
      "completions/min_terminated_length": 119.0,
      "epoch": 13.373177842565598,
      "grad_norm": 0.22796332836151123,
      "learning_rate": 1e-06,
      "loss": -0.0433,
      "num_tokens": 779461503.0,
      "reward": 0.6986607313156128,
      "reward_std": 0.11250059306621552,
      "rewards/verify_math_reward/mean": 0.6986607313156128,
      "rewards/verify_math_reward/std": 0.4590960443019867,
      "step": 1431
    },
    {
      "clip_ratio/high_max": 0.001909197340864921,
      "clip_ratio/high_mean": 0.0006651566982327495,
      "clip_ratio/low_mean": 0.0002979744176627719,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009631311258999631,
      "completions/clipped_ratio": 0.0982142857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2665.0,
      "completions/mean_length": 899.9699096679688,
      "completions/mean_terminated_length": 551.8873901367188,
      "completions/min_length": 137.0,
      "completions/min_terminated_length": 137.0,
      "epoch": 13.382507288629737,
      "grad_norm": 0.3207937479019165,
      "learning_rate": 1e-06,
      "loss": -0.0344,
      "num_tokens": 779999436.0,
      "reward": 0.6618303656578064,
      "reward_std": 0.13004270195960999,
      "rewards/verify_math_reward/mean": 0.6618303656578064,
      "rewards/verify_math_reward/std": 0.4733508229255676,
      "step": 1432
    },
    {
      "clip_ratio/high_max": 0.0013709939667023718,
      "clip_ratio/high_mean": 0.0004886877450189786,
      "clip_ratio/low_mean": 0.000465553274807462,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009542410625726916,
      "completions/clipped_ratio": 0.0881696428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1783.0,
      "completions/mean_length": 836.2388916015625,
      "completions/mean_terminated_length": 521.0355224609375,
      "completions/min_length": 158.0,
      "completions/min_terminated_length": 158.0,
      "epoch": 13.391836734693877,
      "grad_norm": 0.24078159034252167,
      "learning_rate": 1e-06,
      "loss": -0.0442,
      "num_tokens": 780519594.0,
      "reward": 0.6964285969734192,
      "reward_std": 0.13624556362628937,
      "rewards/verify_math_reward/mean": 0.6964285969734192,
      "rewards/verify_math_reward/std": 0.4600566029548645,
      "step": 1433
    },
    {
      "clip_ratio/high_max": 0.0018737911959760822,
      "clip_ratio/high_mean": 0.0007206155978565221,
      "clip_ratio/low_mean": 0.00024201836117754283,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000962633987001027,
      "completions/clipped_ratio": 0.0747767857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3279.0,
      "completions/mean_length": 764.630615234375,
      "completions/mean_terminated_length": 495.388427734375,
      "completions/min_length": 147.0,
      "completions/min_terminated_length": 147.0,
      "epoch": 13.401166180758018,
      "grad_norm": 0.33389750123023987,
      "learning_rate": 1e-06,
      "loss": -0.0295,
      "num_tokens": 781022647.0,
      "reward": 0.7209821939468384,
      "reward_std": 0.12906630337238312,
      "rewards/verify_math_reward/mean": 0.7209821343421936,
      "rewards/verify_math_reward/std": 0.448766827583313,
      "step": 1434
    },
    {
      "clip_ratio/high_max": 0.0017861463347799145,
      "clip_ratio/high_mean": 0.0006591451729036635,
      "clip_ratio/low_mean": 0.00026640965461410815,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009255548211513087,
      "completions/clipped_ratio": 0.1071428571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3829.0,
      "completions/mean_length": 910.3895263671875,
      "completions/mean_terminated_length": 528.1162109375,
      "completions/min_length": 144.0,
      "completions/min_terminated_length": 144.0,
      "epoch": 13.410495626822158,
      "grad_norm": 0.2597056031227112,
      "learning_rate": 1e-06,
      "loss": -0.0691,
      "num_tokens": 781524348.0,
      "reward": 0.7366071939468384,
      "reward_std": 0.13685287535190582,
      "rewards/verify_math_reward/mean": 0.7366071343421936,
      "rewards/verify_math_reward/std": 0.44071969389915466,
      "step": 1435
    },
    {
      "clip_ratio/high_max": 0.0018368865803495282,
      "clip_ratio/high_mean": 0.0007173138292273507,
      "clip_ratio/low_mean": 0.00039299861941799463,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001110312430682825,
      "completions/clipped_ratio": 0.0982142857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3879.0,
      "completions/mean_length": 898.6707763671875,
      "completions/mean_terminated_length": 550.44677734375,
      "completions/min_length": 139.0,
      "completions/min_terminated_length": 139.0,
      "epoch": 13.419825072886297,
      "grad_norm": 0.2400037795305252,
      "learning_rate": 1e-06,
      "loss": -0.0517,
      "num_tokens": 782067797.0,
      "reward": 0.6428571939468384,
      "reward_std": 0.14496827125549316,
      "rewards/verify_math_reward/mean": 0.6428571343421936,
      "rewards/verify_math_reward/std": 0.47942501306533813,
      "step": 1436
    },
    {
      "clip_ratio/high_max": 0.0019859401436406188,
      "clip_ratio/high_mean": 0.0006794103574065957,
      "clip_ratio/low_mean": 0.00034261595601492445,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001022026333885151,
      "completions/clipped_ratio": 0.1104910714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3113.0,
      "completions/mean_length": 965.23779296875,
      "completions/mean_terminated_length": 576.3475341796875,
      "completions/min_length": 159.0,
      "completions/min_terminated_length": 159.0,
      "epoch": 13.429154518950437,
      "grad_norm": 0.18977214395999908,
      "learning_rate": 1e-06,
      "loss": -0.0525,
      "num_tokens": 782613962.0,
      "reward": 0.6428571939468384,
      "reward_std": 0.15405938029289246,
      "rewards/verify_math_reward/mean": 0.6428571343421936,
      "rewards/verify_math_reward/std": 0.4794250428676605,
      "step": 1437
    },
    {
      "clip_ratio/high_max": 0.0012364787071419414,
      "clip_ratio/high_mean": 0.0004290785373086692,
      "clip_ratio/low_mean": 0.00027034244885726366,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006994209816184593,
      "completions/clipped_ratio": 0.1082589285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3631.0,
      "completions/mean_length": 916.7980346679688,
      "completions/mean_terminated_length": 530.8372802734375,
      "completions/min_length": 143.0,
      "completions/min_terminated_length": 143.0,
      "epoch": 13.438483965014576,
      "grad_norm": 1.683428406715393,
      "learning_rate": 1e-06,
      "loss": -0.0624,
      "num_tokens": 783129669.0,
      "reward": 0.676339328289032,
      "reward_std": 0.10479705035686493,
      "rewards/verify_math_reward/mean": 0.6763392686843872,
      "rewards/verify_math_reward/std": 0.4681335985660553,
      "step": 1438
    },
    {
      "clip_ratio/high_max": 0.0017593130869499873,
      "clip_ratio/high_mean": 0.0006590482280444121,
      "clip_ratio/low_mean": 0.00024789660892565735,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009069448442460271,
      "completions/clipped_ratio": 0.1495535714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3133.0,
      "completions/mean_length": 1085.7109375,
      "completions/mean_terminated_length": 556.342529296875,
      "completions/min_length": 146.0,
      "completions/min_terminated_length": 146.0,
      "epoch": 13.447813411078718,
      "grad_norm": 0.2715403139591217,
      "learning_rate": 1e-06,
      "loss": -0.0643,
      "num_tokens": 783649138.0,
      "reward": 0.5970982313156128,
      "reward_std": 0.13842658698558807,
      "rewards/verify_math_reward/mean": 0.5970982313156128,
      "rewards/verify_math_reward/std": 0.49075525999069214,
      "step": 1439
    },
    {
      "clip_ratio/high_max": 0.001605607907549711,
      "clip_ratio/high_mean": 0.0004410829133121297,
      "clip_ratio/low_mean": 0.00031743177214593743,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007585147104691714,
      "completions/clipped_ratio": 0.1060267857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2544.0,
      "completions/mean_length": 927.935302734375,
      "completions/mean_terminated_length": 552.197265625,
      "completions/min_length": 155.0,
      "completions/min_terminated_length": 155.0,
      "epoch": 13.457142857142857,
      "grad_norm": 0.19035007059574127,
      "learning_rate": 1e-06,
      "loss": -0.0495,
      "num_tokens": 784174800.0,
      "reward": 0.6662946939468384,
      "reward_std": 0.09517853707075119,
      "rewards/verify_math_reward/mean": 0.6662946343421936,
      "rewards/verify_math_reward/std": 0.47179925441741943,
      "step": 1440
    },
    {
      "clip_ratio/high_max": 0.0015615803604305256,
      "clip_ratio/high_mean": 0.0005633133860101225,
      "clip_ratio/low_mean": 0.0002843796905835916,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008476930634060409,
      "completions/clipped_ratio": 0.1127232142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4081.0,
      "completions/mean_length": 1000.1484985351562,
      "completions/mean_terminated_length": 606.8389892578125,
      "completions/min_length": 144.0,
      "completions/min_terminated_length": 144.0,
      "epoch": 13.466472303206997,
      "grad_norm": 0.2348775863647461,
      "learning_rate": 1e-06,
      "loss": -0.0628,
      "num_tokens": 784748941.0,
      "reward": 0.6350446939468384,
      "reward_std": 0.12291326373815536,
      "rewards/verify_math_reward/mean": 0.6350446343421936,
      "rewards/verify_math_reward/std": 0.481686532497406,
      "step": 1441
    },
    {
      "clip_ratio/high_max": 0.0015513555008510593,
      "clip_ratio/high_mean": 0.00046216016926337034,
      "clip_ratio/low_mean": 0.00034409974614391103,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008062599245022284,
      "completions/clipped_ratio": 0.1015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4082.0,
      "completions/mean_length": 928.7332763671875,
      "completions/mean_terminated_length": 570.6943969726562,
      "completions/min_length": 160.0,
      "completions/min_terminated_length": 160.0,
      "epoch": 13.475801749271136,
      "grad_norm": 0.21865294873714447,
      "learning_rate": 1e-06,
      "loss": -0.0197,
      "num_tokens": 785295086.0,
      "reward": 0.6707589626312256,
      "reward_std": 0.10220808535814285,
      "rewards/verify_math_reward/mean": 0.6707589030265808,
      "rewards/verify_math_reward/std": 0.4702001214027405,
      "step": 1442
    },
    {
      "clip_ratio/high_max": 0.003610143525293097,
      "clip_ratio/high_mean": 0.000933545805310132,
      "clip_ratio/low_mean": 0.0006267401413424523,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0015602859421051107,
      "completions/clipped_ratio": 0.1004464285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3064.0,
      "completions/mean_length": 886.2332763671875,
      "completions/mean_terminated_length": 527.8225708007812,
      "completions/min_length": 126.0,
      "completions/min_terminated_length": 126.0,
      "epoch": 13.485131195335278,
      "grad_norm": 0.5562987923622131,
      "learning_rate": 1e-06,
      "loss": -0.0486,
      "num_tokens": 785820327.0,
      "reward": 0.6651785969734192,
      "reward_std": 0.14458990097045898,
      "rewards/verify_math_reward/mean": 0.6651785969734192,
      "rewards/verify_math_reward/std": 0.47219160199165344,
      "step": 1443
    },
    {
      "clip_ratio/high_max": 0.0020020690317323897,
      "clip_ratio/high_mean": 0.0007748012221782119,
      "clip_ratio/low_mean": 0.0002964324219192349,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010712336443248205,
      "completions/clipped_ratio": 0.1216517857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1797.0,
      "completions/mean_length": 962.5156860351562,
      "completions/mean_terminated_length": 528.5260009765625,
      "completions/min_length": 141.0,
      "completions/min_terminated_length": 141.0,
      "epoch": 13.494460641399417,
      "grad_norm": 0.3047040104866028,
      "learning_rate": 1e-06,
      "loss": -0.0885,
      "num_tokens": 786324365.0,
      "reward": 0.7142857313156128,
      "reward_std": 0.17130544781684875,
      "rewards/verify_math_reward/mean": 0.7142857313156128,
      "rewards/verify_math_reward/std": 0.4520062506198883,
      "step": 1444
    },
    {
      "clip_ratio/high_max": 0.0011822478027170291,
      "clip_ratio/high_mean": 0.00035488656817506126,
      "clip_ratio/low_mean": 0.0002599489271233324,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006148354896140518,
      "completions/clipped_ratio": 0.1127232142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2685.0,
      "completions/mean_length": 945.638427734375,
      "completions/mean_terminated_length": 545.4037475585938,
      "completions/min_length": 138.0,
      "completions/min_terminated_length": 138.0,
      "epoch": 13.503790087463557,
      "grad_norm": 0.23118868470191956,
      "learning_rate": 1e-06,
      "loss": -0.0322,
      "num_tokens": 786848905.0,
      "reward": 0.645089328289032,
      "reward_std": 0.10517682880163193,
      "rewards/verify_math_reward/mean": 0.6450892686843872,
      "rewards/verify_math_reward/std": 0.4787535071372986,
      "step": 1445
    },
    {
      "clip_ratio/high_max": 0.0020006159065815154,
      "clip_ratio/high_mean": 0.0007312193247344112,
      "clip_ratio/low_mean": 0.0004801676841452718,
      "clip_ratio/low_min": 2.8682881747954525e-05,
      "clip_ratio/region_mean": 0.0012113870034227148,
      "completions/clipped_ratio": 0.1037946428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3201.0,
      "completions/mean_length": 925.33935546875,
      "completions/mean_terminated_length": 558.1270141601562,
      "completions/min_length": 139.0,
      "completions/min_terminated_length": 139.0,
      "epoch": 13.513119533527696,
      "grad_norm": 0.2648131251335144,
      "learning_rate": 1e-06,
      "loss": -0.0608,
      "num_tokens": 787380945.0,
      "reward": 0.6316964626312256,
      "reward_std": 0.13523778319358826,
      "rewards/verify_math_reward/mean": 0.6316964030265808,
      "rewards/verify_math_reward/std": 0.4826137125492096,
      "step": 1446
    },
    {
      "clip_ratio/high_max": 0.0020254668124835007,
      "clip_ratio/high_mean": 0.0006712548993164091,
      "clip_ratio/low_mean": 0.0005241719172772719,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001195426801132271,
      "completions/clipped_ratio": 0.1294642857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2691.0,
      "completions/mean_length": 992.0904541015625,
      "completions/mean_terminated_length": 530.4833374023438,
      "completions/min_length": 124.0,
      "completions/min_terminated_length": 124.0,
      "epoch": 13.522448979591836,
      "grad_norm": 0.4312402307987213,
      "learning_rate": 1e-06,
      "loss": -0.0528,
      "num_tokens": 787896666.0,
      "reward": 0.6160714626312256,
      "reward_std": 0.1471807062625885,
      "rewards/verify_math_reward/mean": 0.6160714030265808,
      "rewards/verify_math_reward/std": 0.486612468957901,
      "step": 1447
    },
    {
      "clip_ratio/high_max": 0.0014023078638274455,
      "clip_ratio/high_mean": 0.0004600453930834192,
      "clip_ratio/low_mean": 0.0003318258569606769,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007918712526588934,
      "completions/clipped_ratio": 0.1082589285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2888.0,
      "completions/mean_length": 903.5982666015625,
      "completions/mean_terminated_length": 516.0350341796875,
      "completions/min_length": 133.0,
      "completions/min_terminated_length": 133.0,
      "epoch": 13.531778425655977,
      "grad_norm": 0.22489960491657257,
      "learning_rate": 1e-06,
      "loss": -0.0313,
      "num_tokens": 788389978.0,
      "reward": 0.6774553656578064,
      "reward_std": 0.10633868724107742,
      "rewards/verify_math_reward/mean": 0.6774553656578064,
      "rewards/verify_math_reward/std": 0.4677111804485321,
      "step": 1448
    },
    {
      "clip_ratio/high_max": 0.0013727660625590943,
      "clip_ratio/high_mean": 0.0005316767715157766,
      "clip_ratio/low_mean": 0.00035899911131309636,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008906758848752361,
      "completions/clipped_ratio": 0.0691964285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3403.0,
      "completions/mean_length": 772.3114013671875,
      "completions/mean_terminated_length": 525.2266235351562,
      "completions/min_length": 155.0,
      "completions/min_terminated_length": 155.0,
      "epoch": 13.541107871720117,
      "grad_norm": 0.2135280966758728,
      "learning_rate": 1e-06,
      "loss": -0.0341,
      "num_tokens": 788917697.0,
      "reward": 0.7187500596046448,
      "reward_std": 0.1278284639120102,
      "rewards/verify_math_reward/mean": 0.71875,
      "rewards/verify_math_reward/std": 0.4498603343963623,
      "step": 1449
    },
    {
      "clip_ratio/high_max": 0.00202522271865746,
      "clip_ratio/high_mean": 0.0006648302296525799,
      "clip_ratio/low_mean": 0.00037332416786739486,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010381543943367433,
      "completions/clipped_ratio": 0.1227678571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2742.0,
      "completions/mean_length": 945.4933471679688,
      "completions/mean_terminated_length": 504.58270263671875,
      "completions/min_length": 126.0,
      "completions/min_terminated_length": 126.0,
      "epoch": 13.550437317784256,
      "grad_norm": 0.45835256576538086,
      "learning_rate": 1e-06,
      "loss": -0.0482,
      "num_tokens": 789414787.0,
      "reward": 0.6383928656578064,
      "reward_std": 0.13846825063228607,
      "rewards/verify_math_reward/mean": 0.6383928656578064,
      "rewards/verify_math_reward/std": 0.4807341694831848,
      "step": 1450
    },
    {
      "clip_ratio/high_max": 0.001945218289620243,
      "clip_ratio/high_mean": 0.0008428751607425511,
      "clip_ratio/low_mean": 0.00034109882290067617,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011839740018331213,
      "completions/clipped_ratio": 0.0948660714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3608.0,
      "completions/mean_length": 900.0379638671875,
      "completions/mean_terminated_length": 565.07275390625,
      "completions/min_length": 136.0,
      "completions/min_terminated_length": 136.0,
      "epoch": 13.559766763848396,
      "grad_norm": 0.25809982419013977,
      "learning_rate": 1e-06,
      "loss": -0.0665,
      "num_tokens": 789953325.0,
      "reward": 0.699776828289032,
      "reward_std": 0.1710016429424286,
      "rewards/verify_math_reward/mean": 0.6997767686843872,
      "rewards/verify_math_reward/std": 0.4586109220981598,
      "step": 1451
    },
    {
      "clip_ratio/high_max": 0.002498020290659042,
      "clip_ratio/high_mean": 0.0008431602927885251,
      "clip_ratio/low_mean": 0.0004155889710091287,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012587492637976538,
      "completions/clipped_ratio": 0.1305803571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3509.0,
      "completions/mean_length": 999.2645263671875,
      "completions/mean_terminated_length": 534.1578979492188,
      "completions/min_length": 144.0,
      "completions/min_terminated_length": 144.0,
      "epoch": 13.569096209912537,
      "grad_norm": 0.3384162187576294,
      "learning_rate": 1e-06,
      "loss": -0.0269,
      "num_tokens": 790452666.0,
      "reward": 0.65625,
      "reward_std": 0.13760244846343994,
      "rewards/verify_math_reward/mean": 0.65625,
      "rewards/verify_math_reward/std": 0.4752241373062134,
      "step": 1452
    },
    {
      "clip_ratio/high_max": 0.0018312933134438936,
      "clip_ratio/high_mean": 0.000621519347987487,
      "clip_ratio/low_mean": 0.00027285034411761444,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008943696666392498,
      "completions/clipped_ratio": 0.1060267857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2930.0,
      "completions/mean_length": 882.2277221679688,
      "completions/mean_terminated_length": 501.06866455078125,
      "completions/min_length": 133.0,
      "completions/min_terminated_length": 133.0,
      "epoch": 13.578425655976677,
      "grad_norm": 0.24517937004566193,
      "learning_rate": 1e-06,
      "loss": -0.0516,
      "num_tokens": 790940270.0,
      "reward": 0.6930803656578064,
      "reward_std": 0.12678678333759308,
      "rewards/verify_math_reward/mean": 0.6930803656578064,
      "rewards/verify_math_reward/std": 0.46147337555885315,
      "step": 1453
    },
    {
      "clip_ratio/high_max": 0.0017484512136434205,
      "clip_ratio/high_mean": 0.0005834791468259937,
      "clip_ratio/low_mean": 0.0004431280758581124,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.00102660721859138,
      "completions/clipped_ratio": 0.1127232142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1966.0,
      "completions/mean_length": 914.5904541015625,
      "completions/mean_terminated_length": 510.41131591796875,
      "completions/min_length": 152.0,
      "completions/min_terminated_length": 152.0,
      "epoch": 13.587755102040816,
      "grad_norm": 0.2504347562789917,
      "learning_rate": 1e-06,
      "loss": -0.0558,
      "num_tokens": 791439559.0,
      "reward": 0.652901828289032,
      "reward_std": 0.12851063907146454,
      "rewards/verify_math_reward/mean": 0.6529017686843872,
      "rewards/verify_math_reward/std": 0.47631320357322693,
      "step": 1454
    },
    {
      "clip_ratio/high_max": 0.0018982372566824779,
      "clip_ratio/high_mean": 0.0007249323934956919,
      "clip_ratio/low_mean": 0.000293872105430637,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001018804519844707,
      "completions/clipped_ratio": 0.109375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3704.0,
      "completions/mean_length": 965.7522583007812,
      "completions/mean_terminated_length": 581.3358154296875,
      "completions/min_length": 153.0,
      "completions/min_terminated_length": 153.0,
      "epoch": 13.597084548104956,
      "grad_norm": 0.3169480860233307,
      "learning_rate": 1e-06,
      "loss": -0.0408,
      "num_tokens": 791999737.0,
      "reward": 0.6428571939468384,
      "reward_std": 0.11948806047439575,
      "rewards/verify_math_reward/mean": 0.6428571343421936,
      "rewards/verify_math_reward/std": 0.47942501306533813,
      "step": 1455
    },
    {
      "clip_ratio/high_max": 0.002013426914345473,
      "clip_ratio/high_mean": 0.000531723276253615,
      "clip_ratio/low_mean": 0.00028123971742388676,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008129629859467968,
      "completions/clipped_ratio": 0.1339285714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3913.0,
      "completions/mean_length": 1059.10498046875,
      "completions/mean_terminated_length": 589.48193359375,
      "completions/min_length": 132.0,
      "completions/min_terminated_length": 132.0,
      "epoch": 13.606413994169095,
      "grad_norm": 0.1967323124408722,
      "learning_rate": 1e-06,
      "loss": -0.0476,
      "num_tokens": 792553551.0,
      "reward": 0.6462053656578064,
      "reward_std": 0.1210271567106247,
      "rewards/verify_math_reward/mean": 0.6462053656578064,
      "rewards/verify_math_reward/std": 0.478413462638855,
      "step": 1456
    },
    {
      "clip_ratio/high_max": 0.001914564665639773,
      "clip_ratio/high_mean": 0.000788834149716422,
      "clip_ratio/low_mean": 0.0003496232891393447,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011384574318071827,
      "completions/clipped_ratio": 0.1261160714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3653.0,
      "completions/mean_length": 993.4688110351562,
      "completions/mean_terminated_length": 545.7215576171875,
      "completions/min_length": 142.0,
      "completions/min_terminated_length": 142.0,
      "epoch": 13.615743440233237,
      "grad_norm": 0.4443165957927704,
      "learning_rate": 1e-06,
      "loss": -0.0684,
      "num_tokens": 793067347.0,
      "reward": 0.6629464626312256,
      "reward_std": 0.14812570810317993,
      "rewards/verify_math_reward/mean": 0.6629464030265808,
      "rewards/verify_math_reward/std": 0.47296738624572754,
      "step": 1457
    },
    {
      "clip_ratio/high_max": 0.001701616663922323,
      "clip_ratio/high_mean": 0.0005171892580619897,
      "clip_ratio/low_mean": 0.00031081757128959,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008280068286694586,
      "completions/clipped_ratio": 0.0703125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2139.0,
      "completions/mean_length": 739.0424194335938,
      "completions/mean_terminated_length": 485.15484619140625,
      "completions/min_length": 114.0,
      "completions/min_terminated_length": 114.0,
      "epoch": 13.625072886297376,
      "grad_norm": 0.421511709690094,
      "learning_rate": 1e-06,
      "loss": -0.0526,
      "num_tokens": 793560577.0,
      "reward": 0.6897321939468384,
      "reward_std": 0.09897467494010925,
      "rewards/verify_math_reward/mean": 0.6897321343421936,
      "rewards/verify_math_reward/std": 0.4628615975379944,
      "step": 1458
    },
    {
      "clip_ratio/high_max": 0.0023946032124513295,
      "clip_ratio/high_mean": 0.0009514205348750693,
      "clip_ratio/low_mean": 0.00043876720155822113,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001390187750075711,
      "completions/clipped_ratio": 0.1361607142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2651.0,
      "completions/mean_length": 1038.01904296875,
      "completions/mean_terminated_length": 556.0116577148438,
      "completions/min_length": 148.0,
      "completions/min_terminated_length": 148.0,
      "epoch": 13.634402332361516,
      "grad_norm": 0.29964977502822876,
      "learning_rate": 1e-06,
      "loss": -0.0528,
      "num_tokens": 794086762.0,
      "reward": 0.6004464626312256,
      "reward_std": 0.16183707118034363,
      "rewards/verify_math_reward/mean": 0.6004464030265808,
      "rewards/verify_math_reward/std": 0.49008017778396606,
      "step": 1459
    },
    {
      "clip_ratio/high_max": 0.0018743944528978318,
      "clip_ratio/high_mean": 0.0007847478591429535,
      "clip_ratio/low_mean": 0.0004296823426557239,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001214430209074635,
      "completions/clipped_ratio": 0.1116071428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2594.0,
      "completions/mean_length": 983.6105346679688,
      "completions/mean_terminated_length": 592.6067504882812,
      "completions/min_length": 124.0,
      "completions/min_terminated_length": 124.0,
      "epoch": 13.643731778425655,
      "grad_norm": 0.25965142250061035,
      "learning_rate": 1e-06,
      "loss": -0.052,
      "num_tokens": 794653301.0,
      "reward": 0.6674107313156128,
      "reward_std": 0.15736766159534454,
      "rewards/verify_math_reward/mean": 0.6674107313156128,
      "rewards/verify_math_reward/std": 0.47140392661094666,
      "step": 1460
    },
    {
      "clip_ratio/high_max": 0.0016970364158623852,
      "clip_ratio/high_mean": 0.0006360034976751194,
      "clip_ratio/low_mean": 0.0003294871667094412,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009654906461946666,
      "completions/clipped_ratio": 0.1339285714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3894.0,
      "completions/mean_length": 1059.421875,
      "completions/mean_terminated_length": 589.847900390625,
      "completions/min_length": 115.0,
      "completions/min_terminated_length": 115.0,
      "epoch": 13.653061224489797,
      "grad_norm": 0.2414640635251999,
      "learning_rate": 1e-06,
      "loss": -0.0441,
      "num_tokens": 795191535.0,
      "reward": 0.6729910969734192,
      "reward_std": 0.13827574253082275,
      "rewards/verify_math_reward/mean": 0.6729910969734192,
      "rewards/verify_math_reward/std": 0.46938255429267883,
      "step": 1461
    },
    {
      "clip_ratio/high_max": 0.002116244228091091,
      "clip_ratio/high_mean": 0.0006606608640140621,
      "clip_ratio/low_mean": 0.00031311290831581573,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000973773785517551,
      "completions/clipped_ratio": 0.1104910714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3703.0,
      "completions/mean_length": 919.14404296875,
      "completions/mean_terminated_length": 524.5281982421875,
      "completions/min_length": 161.0,
      "completions/min_terminated_length": 161.0,
      "epoch": 13.662390670553936,
      "grad_norm": 0.26867973804473877,
      "learning_rate": 1e-06,
      "loss": -0.0586,
      "num_tokens": 795700912.0,
      "reward": 0.6863839626312256,
      "reward_std": 0.12050652503967285,
      "rewards/verify_math_reward/mean": 0.6863839030265808,
      "rewards/verify_math_reward/std": 0.46422141790390015,
      "step": 1462
    },
    {
      "clip_ratio/high_max": 0.00176824001755449,
      "clip_ratio/high_mean": 0.0006499588116639643,
      "clip_ratio/low_mean": 0.0004712876520898135,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011212464669370092,
      "completions/clipped_ratio": 0.1506696428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4068.0,
      "completions/mean_length": 1105.4765625,
      "completions/mean_terminated_length": 574.9631958007812,
      "completions/min_length": 165.0,
      "completions/min_terminated_length": 165.0,
      "epoch": 13.671720116618076,
      "grad_norm": 0.23908397555351257,
      "learning_rate": 1e-06,
      "loss": -0.0491,
      "num_tokens": 796225899.0,
      "reward": 0.6205357313156128,
      "reward_std": 0.146812304854393,
      "rewards/verify_math_reward/mean": 0.6205357313156128,
      "rewards/verify_math_reward/std": 0.4855247139930725,
      "step": 1463
    },
    {
      "clip_ratio/high_max": 0.0017086016478060628,
      "clip_ratio/high_mean": 0.0006271425013437693,
      "clip_ratio/low_mean": 0.0004496029623624054,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010767454623419326,
      "completions/clipped_ratio": 0.1450892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3657.0,
      "completions/mean_length": 1058.10498046875,
      "completions/mean_terminated_length": 542.5352783203125,
      "completions/min_length": 130.0,
      "completions/min_terminated_length": 130.0,
      "epoch": 13.681049562682215,
      "grad_norm": 0.25820058584213257,
      "learning_rate": 1e-06,
      "loss": -0.0462,
      "num_tokens": 796726169.0,
      "reward": 0.5691964626312256,
      "reward_std": 0.13493607938289642,
      "rewards/verify_math_reward/mean": 0.5691964030265808,
      "rewards/verify_math_reward/std": 0.4954652488231659,
      "step": 1464
    },
    {
      "clip_ratio/high_max": 0.0024719912617001683,
      "clip_ratio/high_mean": 0.0008474741189274937,
      "clip_ratio/low_mean": 0.0005479869960254291,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0013954611058579758,
      "completions/clipped_ratio": 0.0915178571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3713.0,
      "completions/mean_length": 865.1517944335938,
      "completions/mean_terminated_length": 539.6854858398438,
      "completions/min_length": 176.0,
      "completions/min_terminated_length": 176.0,
      "epoch": 13.690379008746355,
      "grad_norm": 0.30524924397468567,
      "learning_rate": 1e-06,
      "loss": -0.0546,
      "num_tokens": 797261913.0,
      "reward": 0.6752232313156128,
      "reward_std": 0.17325612902641296,
      "rewards/verify_math_reward/mean": 0.6752232313156128,
      "rewards/verify_math_reward/std": 0.46855294704437256,
      "step": 1465
    },
    {
      "clip_ratio/high_max": 0.0018566695907793473,
      "clip_ratio/high_mean": 0.0006630695525018382,
      "clip_ratio/low_mean": 0.0004151960770286678,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010782656318042427,
      "completions/clipped_ratio": 0.0982142857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3872.0,
      "completions/mean_length": 880.1272583007812,
      "completions/mean_terminated_length": 529.8836669921875,
      "completions/min_length": 131.0,
      "completions/min_terminated_length": 131.0,
      "epoch": 13.699708454810496,
      "grad_norm": 0.3109317719936371,
      "learning_rate": 1e-06,
      "loss": -0.0249,
      "num_tokens": 797768427.0,
      "reward": 0.6852678656578064,
      "reward_std": 0.14602065086364746,
      "rewards/verify_math_reward/mean": 0.6852678656578064,
      "rewards/verify_math_reward/std": 0.46466848254203796,
      "step": 1466
    },
    {
      "clip_ratio/high_max": 0.0019457281050563324,
      "clip_ratio/high_mean": 0.000726408077753149,
      "clip_ratio/low_mean": 0.00034779537486429035,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010742034355644137,
      "completions/clipped_ratio": 0.1082589285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3743.0,
      "completions/mean_length": 953.927490234375,
      "completions/mean_terminated_length": 572.474365234375,
      "completions/min_length": 135.0,
      "completions/min_terminated_length": 135.0,
      "epoch": 13.709037900874636,
      "grad_norm": 0.29716986417770386,
      "learning_rate": 1e-06,
      "loss": -0.0491,
      "num_tokens": 798309818.0,
      "reward": 0.6662946939468384,
      "reward_std": 0.14101991057395935,
      "rewards/verify_math_reward/mean": 0.6662946343421936,
      "rewards/verify_math_reward/std": 0.47179922461509705,
      "step": 1467
    },
    {
      "clip_ratio/high_max": 0.0018728079685388366,
      "clip_ratio/high_mean": 0.0005517528716154629,
      "clip_ratio/low_mean": 0.00037528320081037236,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009270360696973512,
      "completions/clipped_ratio": 0.1149553571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2096.0,
      "completions/mean_length": 919.5792846679688,
      "completions/mean_terminated_length": 507.0050354003906,
      "completions/min_length": 160.0,
      "completions/min_terminated_length": 160.0,
      "epoch": 13.718367346938775,
      "grad_norm": 0.9936861991882324,
      "learning_rate": 1e-06,
      "loss": -0.0537,
      "num_tokens": 798813393.0,
      "reward": 0.6383928656578064,
      "reward_std": 0.10990727692842484,
      "rewards/verify_math_reward/mean": 0.6383928656578064,
      "rewards/verify_math_reward/std": 0.4807341992855072,
      "step": 1468
    },
    {
      "clip_ratio/high_max": 0.0024873462971299887,
      "clip_ratio/high_mean": 0.0008032383084355388,
      "clip_ratio/low_mean": 0.0005039778361606295,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0013072161236777902,
      "completions/clipped_ratio": 0.1227678571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3358.0,
      "completions/mean_length": 1041.958740234375,
      "completions/mean_terminated_length": 614.54833984375,
      "completions/min_length": 160.0,
      "completions/min_terminated_length": 160.0,
      "epoch": 13.727696793002915,
      "grad_norm": 0.41095873713493347,
      "learning_rate": 1e-06,
      "loss": -0.039,
      "num_tokens": 799387036.0,
      "reward": 0.6339285969734192,
      "reward_std": 0.1579303741455078,
      "rewards/verify_math_reward/mean": 0.6339285969734192,
      "rewards/verify_math_reward/std": 0.48199835419654846,
      "step": 1469
    },
    {
      "clip_ratio/high_max": 0.0015492766797251534,
      "clip_ratio/high_mean": 0.0006564006334883743,
      "clip_ratio/low_mean": 0.0005405517995313858,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011969524166488554,
      "completions/clipped_ratio": 0.1049107142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3884.0,
      "completions/mean_length": 922.7578735351562,
      "completions/mean_terminated_length": 550.8316650390625,
      "completions/min_length": 157.0,
      "completions/min_terminated_length": 157.0,
      "epoch": 13.737026239067056,
      "grad_norm": 0.30893468856811523,
      "learning_rate": 1e-06,
      "loss": -0.0447,
      "num_tokens": 799908515.0,
      "reward": 0.6964285969734192,
      "reward_std": 0.14489158987998962,
      "rewards/verify_math_reward/mean": 0.6964285969734192,
      "rewards/verify_math_reward/std": 0.4600565433502197,
      "step": 1470
    },
    {
      "clip_ratio/high_max": 0.0026250950031680986,
      "clip_ratio/high_mean": 0.000864764811922214,
      "clip_ratio/low_mean": 0.0003525011343299411,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001217265937157208,
      "completions/clipped_ratio": 0.1417410714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3280.0,
      "completions/mean_length": 1033.6473388671875,
      "completions/mean_terminated_length": 527.9011840820312,
      "completions/min_length": 147.0,
      "completions/min_terminated_length": 147.0,
      "epoch": 13.746355685131196,
      "grad_norm": 5.366886138916016,
      "learning_rate": 1e-06,
      "loss": -0.0506,
      "num_tokens": 800402807.0,
      "reward": 0.6808035969734192,
      "reward_std": 0.14913460612297058,
      "rewards/verify_math_reward/mean": 0.6808035969734192,
      "rewards/verify_math_reward/std": 0.4664256274700165,
      "step": 1471
    },
    {
      "clip_ratio/high_max": 0.0016081378271337599,
      "clip_ratio/high_mean": 0.0005036907032263116,
      "clip_ratio/low_mean": 0.0002505595759885182,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007542502844444243,
      "completions/clipped_ratio": 0.1194196428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3582.0,
      "completions/mean_length": 969.93310546875,
      "completions/mean_terminated_length": 545.9923706054688,
      "completions/min_length": 134.0,
      "completions/min_terminated_length": 134.0,
      "epoch": 13.755685131195335,
      "grad_norm": 0.2958771586418152,
      "learning_rate": 1e-06,
      "loss": -0.025,
      "num_tokens": 800932339.0,
      "reward": 0.6551339626312256,
      "reward_std": 0.10257647186517715,
      "rewards/verify_math_reward/mean": 0.6551339030265808,
      "rewards/verify_math_reward/std": 0.4755900502204895,
      "step": 1472
    },
    {
      "clip_ratio/high_max": 0.0016386216011596844,
      "clip_ratio/high_mean": 0.0006217188465598156,
      "clip_ratio/low_mean": 0.0003549203192960704,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009766391649463912,
      "completions/clipped_ratio": 0.1205357142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4064.0,
      "completions/mean_length": 1005.3795166015625,
      "completions/mean_terminated_length": 581.7918701171875,
      "completions/min_length": 171.0,
      "completions/min_terminated_length": 171.0,
      "epoch": 13.765014577259475,
      "grad_norm": 0.2535308003425598,
      "learning_rate": 1e-06,
      "loss": -0.0433,
      "num_tokens": 801474783.0,
      "reward": 0.640625,
      "reward_std": 0.13275183737277985,
      "rewards/verify_math_reward/mean": 0.640625,
      "rewards/verify_math_reward/std": 0.48008525371551514,
      "step": 1473
    },
    {
      "clip_ratio/high_max": 0.0018186428060289472,
      "clip_ratio/high_mean": 0.0005990914196445374,
      "clip_ratio/low_mean": 0.00041964480942624505,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010187362277065404,
      "completions/clipped_ratio": 0.1227678571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3347.0,
      "completions/mean_length": 995.4129638671875,
      "completions/mean_terminated_length": 561.488525390625,
      "completions/min_length": 183.0,
      "completions/min_terminated_length": 183.0,
      "epoch": 13.774344023323614,
      "grad_norm": 0.39022114872932434,
      "learning_rate": 1e-06,
      "loss": -0.036,
      "num_tokens": 802014017.0,
      "reward": 0.6116071939468384,
      "reward_std": 0.15770283341407776,
      "rewards/verify_math_reward/mean": 0.6116071343421936,
      "rewards/verify_math_reward/std": 0.4876568913459778,
      "step": 1474
    },
    {
      "clip_ratio/high_max": 0.0014813031448284164,
      "clip_ratio/high_mean": 0.0005341916303223115,
      "clip_ratio/low_mean": 0.00038841452828819456,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009226061210938497,
      "completions/clipped_ratio": 0.1350446428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2462.0,
      "completions/mean_length": 1058.1373291015625,
      "completions/mean_terminated_length": 583.8387451171875,
      "completions/min_length": 164.0,
      "completions/min_terminated_length": 164.0,
      "epoch": 13.783673469387756,
      "grad_norm": 0.2332642674446106,
      "learning_rate": 1e-06,
      "loss": -0.0539,
      "num_tokens": 802560236.0,
      "reward": 0.6082589626312256,
      "reward_std": 0.12828311324119568,
      "rewards/verify_math_reward/mean": 0.6082589030265808,
      "rewards/verify_math_reward/std": 0.48841196298599243,
      "step": 1475
    },
    {
      "clip_ratio/high_max": 0.0019524216404533945,
      "clip_ratio/high_mean": 0.0007695368740314734,
      "clip_ratio/low_mean": 0.00037678508579119807,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011463219507277245,
      "completions/clipped_ratio": 0.0837053571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2047.0,
      "completions/mean_length": 801.8939819335938,
      "completions/mean_terminated_length": 500.9707946777344,
      "completions/min_length": 149.0,
      "completions/min_terminated_length": 149.0,
      "epoch": 13.793002915451895,
      "grad_norm": 0.5175551772117615,
      "learning_rate": 1e-06,
      "loss": -0.044,
      "num_tokens": 803068061.0,
      "reward": 0.7098214626312256,
      "reward_std": 0.14635835587978363,
      "rewards/verify_math_reward/mean": 0.7098214030265808,
      "rewards/verify_math_reward/std": 0.454098105430603,
      "step": 1476
    },
    {
      "clip_ratio/high_max": 0.001574541403897456,
      "clip_ratio/high_mean": 0.0005633783980556473,
      "clip_ratio/low_mean": 0.0003890172697538219,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009523956305201864,
      "completions/clipped_ratio": 0.1227678571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3808.0,
      "completions/mean_length": 1013.9006958007812,
      "completions/mean_terminated_length": 582.5635986328125,
      "completions/min_length": 139.0,
      "completions/min_terminated_length": 139.0,
      "epoch": 13.802332361516035,
      "grad_norm": 0.24361242353916168,
      "learning_rate": 1e-06,
      "loss": -0.0465,
      "num_tokens": 803621268.0,
      "reward": 0.613839328289032,
      "reward_std": 0.13673411309719086,
      "rewards/verify_math_reward/mean": 0.6138392686843872,
      "rewards/verify_math_reward/std": 0.48714008927345276,
      "step": 1477
    },
    {
      "clip_ratio/high_max": 0.0019728389888769016,
      "clip_ratio/high_mean": 0.0006432071022572927,
      "clip_ratio/low_mean": 0.000295518333587097,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009387254485773155,
      "completions/clipped_ratio": 0.1160714285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3161.0,
      "completions/mean_length": 962.2879638671875,
      "completions/mean_terminated_length": 550.7904052734375,
      "completions/min_length": 132.0,
      "completions/min_terminated_length": 132.0,
      "epoch": 13.811661807580174,
      "grad_norm": 0.257691890001297,
      "learning_rate": 1e-06,
      "loss": -0.0352,
      "num_tokens": 804135934.0,
      "reward": 0.6785714626312256,
      "reward_std": 0.12106994539499283,
      "rewards/verify_math_reward/mean": 0.6785714030265808,
      "rewards/verify_math_reward/std": 0.46728572249412537,
      "step": 1478
    },
    {
      "clip_ratio/high_max": 0.0021615647201542743,
      "clip_ratio/high_mean": 0.0008199662770493887,
      "clip_ratio/low_mean": 0.0003965479390899418,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012165141852165107,
      "completions/clipped_ratio": 0.1227678571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1740.0,
      "completions/mean_length": 1011.2366333007812,
      "completions/mean_terminated_length": 579.5267333984375,
      "completions/min_length": 182.0,
      "completions/min_terminated_length": 182.0,
      "epoch": 13.820991253644316,
      "grad_norm": 5.130914688110352,
      "learning_rate": 1e-06,
      "loss": -0.044,
      "num_tokens": 804678978.0,
      "reward": 0.6261160969734192,
      "reward_std": 0.15631458163261414,
      "rewards/verify_math_reward/mean": 0.6261160969734192,
      "rewards/verify_math_reward/std": 0.48410359025001526,
      "step": 1479
    },
    {
      "clip_ratio/high_max": 0.00164507688532467,
      "clip_ratio/high_mean": 0.0005966942408122122,
      "clip_ratio/low_mean": 0.00026963624486597837,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008663305070513161,
      "completions/clipped_ratio": 0.1238839285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2364.0,
      "completions/mean_length": 956.6652221679688,
      "completions/mean_terminated_length": 512.7592163085938,
      "completions/min_length": 131.0,
      "completions/min_terminated_length": 131.0,
      "epoch": 13.830320699708455,
      "grad_norm": 0.19591157138347626,
      "learning_rate": 1e-06,
      "loss": -0.0542,
      "num_tokens": 805165902.0,
      "reward": 0.6975446939468384,
      "reward_std": 0.132416233420372,
      "rewards/verify_math_reward/mean": 0.6975446343421936,
      "rewards/verify_math_reward/std": 0.45957788825035095,
      "step": 1480
    },
    {
      "clip_ratio/high_max": 0.0014007445561219356,
      "clip_ratio/high_mean": 0.000502886847471018,
      "clip_ratio/low_mean": 0.0003058118484204897,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008086986745183822,
      "completions/clipped_ratio": 0.1004464285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3935.0,
      "completions/mean_length": 904.5100708007812,
      "completions/mean_terminated_length": 548.1401977539062,
      "completions/min_length": 134.0,
      "completions/min_terminated_length": 134.0,
      "epoch": 13.839650145772595,
      "grad_norm": 0.3516015112400055,
      "learning_rate": 1e-06,
      "loss": -0.0217,
      "num_tokens": 805690495.0,
      "reward": 0.7042410969734192,
      "reward_std": 0.0993858352303505,
      "rewards/verify_math_reward/mean": 0.7042410969734192,
      "rewards/verify_math_reward/std": 0.45663803815841675,
      "step": 1481
    },
    {
      "clip_ratio/high_max": 0.002013493052800186,
      "clip_ratio/high_mean": 0.0006001068650220986,
      "clip_ratio/low_mean": 0.0003182639659371489,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009183708280033898,
      "completions/clipped_ratio": 0.1964285714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4046.0,
      "completions/mean_length": 1311.4888916015625,
      "completions/mean_terminated_length": 630.83056640625,
      "completions/min_length": 165.0,
      "completions/min_terminated_length": 165.0,
      "epoch": 13.848979591836734,
      "grad_norm": 0.35657399892807007,
      "learning_rate": 1e-06,
      "loss": -0.0466,
      "num_tokens": 806244477.0,
      "reward": 0.543526828289032,
      "reward_std": 0.11347699910402298,
      "rewards/verify_math_reward/mean": 0.5435267686843872,
      "rewards/verify_math_reward/std": 0.49838000535964966,
      "step": 1482
    },
    {
      "clip_ratio/high_max": 0.0021818411951244343,
      "clip_ratio/high_mean": 0.0006567358777829213,
      "clip_ratio/low_mean": 0.0006275751775319804,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012843110416724812,
      "completions/clipped_ratio": 0.1261160714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3680.0,
      "completions/mean_length": 1001.90185546875,
      "completions/mean_terminated_length": 555.3716430664062,
      "completions/min_length": 145.0,
      "completions/min_terminated_length": 145.0,
      "epoch": 13.858309037900874,
      "grad_norm": 0.5369406342506409,
      "learning_rate": 1e-06,
      "loss": -0.0458,
      "num_tokens": 806772429.0,
      "reward": 0.6261160969734192,
      "reward_std": 0.15285293757915497,
      "rewards/verify_math_reward/mean": 0.6261160969734192,
      "rewards/verify_math_reward/std": 0.48410359025001526,
      "step": 1483
    },
    {
      "clip_ratio/high_max": 0.0016363545873900875,
      "clip_ratio/high_mean": 0.0005250055328360759,
      "clip_ratio/low_mean": 0.000411384296057804,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009363898097944912,
      "completions/clipped_ratio": 0.1071428571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3075.0,
      "completions/mean_length": 908.2433471679688,
      "completions/mean_terminated_length": 525.7124633789062,
      "completions/min_length": 133.0,
      "completions/min_terminated_length": 133.0,
      "epoch": 13.867638483965015,
      "grad_norm": 1.6832938194274902,
      "learning_rate": 1e-06,
      "loss": -0.0408,
      "num_tokens": 807282567.0,
      "reward": 0.6640625,
      "reward_std": 0.12215510755777359,
      "rewards/verify_math_reward/mean": 0.6640625,
      "rewards/verify_math_reward/std": 0.4725809693336487,
      "step": 1484
    },
    {
      "clip_ratio/high_max": 0.002052319818176329,
      "clip_ratio/high_mean": 0.0008283975366794039,
      "clip_ratio/low_mean": 0.0003622072408688837,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011906047984666657,
      "completions/clipped_ratio": 0.1227678571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3848.0,
      "completions/mean_length": 976.26123046875,
      "completions/mean_terminated_length": 539.656494140625,
      "completions/min_length": 127.0,
      "completions/min_terminated_length": 127.0,
      "epoch": 13.876967930029155,
      "grad_norm": 0.24110916256904602,
      "learning_rate": 1e-06,
      "loss": -0.0974,
      "num_tokens": 807788137.0,
      "reward": 0.6662946939468384,
      "reward_std": 0.15680311620235443,
      "rewards/verify_math_reward/mean": 0.6662946343421936,
      "rewards/verify_math_reward/std": 0.47179925441741943,
      "step": 1485
    },
    {
      "clip_ratio/high_max": 0.0017715245303406846,
      "clip_ratio/high_mean": 0.0005689511453965679,
      "clip_ratio/low_mean": 0.0002710913105374857,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008400424503633985,
      "completions/clipped_ratio": 0.1551339285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2613.0,
      "completions/mean_length": 1114.954345703125,
      "completions/mean_terminated_length": 567.5759887695312,
      "completions/min_length": 123.0,
      "completions/min_terminated_length": 123.0,
      "epoch": 13.886297376093294,
      "grad_norm": 0.23268258571624756,
      "learning_rate": 1e-06,
      "loss": -0.0636,
      "num_tokens": 808304704.0,
      "reward": 0.6015625,
      "reward_std": 0.11674319207668304,
      "rewards/verify_math_reward/mean": 0.6015625,
      "rewards/verify_math_reward/std": 0.48984986543655396,
      "step": 1486
    },
    {
      "clip_ratio/high_max": 0.0015273608551069628,
      "clip_ratio/high_mean": 0.0005877606245121569,
      "clip_ratio/low_mean": 0.0004931669473080547,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010809276063810103,
      "completions/clipped_ratio": 0.1160714285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3066.0,
      "completions/mean_length": 937.0725708007812,
      "completions/mean_terminated_length": 522.263916015625,
      "completions/min_length": 146.0,
      "completions/min_terminated_length": 146.0,
      "epoch": 13.895626822157434,
      "grad_norm": 0.2541758716106415,
      "learning_rate": 1e-06,
      "loss": -0.0398,
      "num_tokens": 808807705.0,
      "reward": 0.6718750596046448,
      "reward_std": 0.12569162249565125,
      "rewards/verify_math_reward/mean": 0.671875,
      "rewards/verify_math_reward/std": 0.46979284286499023,
      "step": 1487
    },
    {
      "clip_ratio/high_max": 0.0018329851263843011,
      "clip_ratio/high_mean": 0.00056038483944576,
      "clip_ratio/low_mean": 0.00028137042909293086,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008417552562605124,
      "completions/clipped_ratio": 0.0881696428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3829.0,
      "completions/mean_length": 847.9230346679688,
      "completions/mean_terminated_length": 533.8494873046875,
      "completions/min_length": 140.0,
      "completions/min_terminated_length": 140.0,
      "epoch": 13.904956268221575,
      "grad_norm": 0.20590364933013916,
      "learning_rate": 1e-06,
      "loss": -0.037,
      "num_tokens": 809321596.0,
      "reward": 0.6886160969734192,
      "reward_std": 0.11039513349533081,
      "rewards/verify_math_reward/mean": 0.6886160969734192,
      "rewards/verify_math_reward/std": 0.46331802010536194,
      "step": 1488
    },
    {
      "clip_ratio/high_max": 0.00229115974434535,
      "clip_ratio/high_mean": 0.000901408802747028,
      "clip_ratio/low_mean": 0.00042819570444407873,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001329604521743022,
      "completions/clipped_ratio": 0.1071428571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2226.0,
      "completions/mean_length": 902.654052734375,
      "completions/mean_terminated_length": 519.4525146484375,
      "completions/min_length": 154.0,
      "completions/min_terminated_length": 154.0,
      "epoch": 13.914285714285715,
      "grad_norm": 0.36295849084854126,
      "learning_rate": 1e-06,
      "loss": -0.0635,
      "num_tokens": 809832070.0,
      "reward": 0.6741071939468384,
      "reward_std": 0.15139050781726837,
      "rewards/verify_math_reward/mean": 0.6741071343421936,
      "rewards/verify_math_reward/std": 0.4689692556858063,
      "step": 1489
    },
    {
      "clip_ratio/high_max": 0.002087779728753958,
      "clip_ratio/high_mean": 0.0007416142852889607,
      "clip_ratio/low_mean": 0.0004565545327750442,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011981688330706675,
      "completions/clipped_ratio": 0.1194196428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2305.0,
      "completions/mean_length": 932.2199096679688,
      "completions/mean_terminated_length": 503.1647644042969,
      "completions/min_length": 152.0,
      "completions/min_terminated_length": 152.0,
      "epoch": 13.923615160349854,
      "grad_norm": 0.3551120460033417,
      "learning_rate": 1e-06,
      "loss": -0.0909,
      "num_tokens": 810317467.0,
      "reward": 0.660714328289032,
      "reward_std": 0.1403031200170517,
      "rewards/verify_math_reward/mean": 0.6607142686843872,
      "rewards/verify_math_reward/std": 0.4737313687801361,
      "step": 1490
    },
    {
      "clip_ratio/high_max": 0.0013714795095438603,
      "clip_ratio/high_mean": 0.00045341820987232495,
      "clip_ratio/low_mean": 0.0003700997604028089,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008235179739131127,
      "completions/clipped_ratio": 0.1116071428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4037.0,
      "completions/mean_length": 973.0803833007812,
      "completions/mean_terminated_length": 580.7537841796875,
      "completions/min_length": 200.0,
      "completions/min_terminated_length": 200.0,
      "epoch": 13.932944606413994,
      "grad_norm": 0.29760563373565674,
      "learning_rate": 1e-06,
      "loss": -0.0529,
      "num_tokens": 810866995.0,
      "reward": 0.613839328289032,
      "reward_std": 0.13057151436805725,
      "rewards/verify_math_reward/mean": 0.6138392686843872,
      "rewards/verify_math_reward/std": 0.48714008927345276,
      "step": 1491
    },
    {
      "clip_ratio/high_max": 0.001929035919602029,
      "clip_ratio/high_mean": 0.0006863545404485194,
      "clip_ratio/low_mean": 0.00037580204616460833,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010621565670589916,
      "completions/clipped_ratio": 0.1450892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3876.0,
      "completions/mean_length": 1046.118408203125,
      "completions/mean_terminated_length": 528.5143432617188,
      "completions/min_length": 147.0,
      "completions/min_terminated_length": 147.0,
      "epoch": 13.942274052478133,
      "grad_norm": 0.24002155661582947,
      "learning_rate": 1e-06,
      "loss": -0.054,
      "num_tokens": 811361005.0,
      "reward": 0.6071428656578064,
      "reward_std": 0.12767621874809265,
      "rewards/verify_math_reward/mean": 0.6071428656578064,
      "rewards/verify_math_reward/std": 0.48865827918052673,
      "step": 1492
    },
    {
      "clip_ratio/high_max": 0.0014654858168796636,
      "clip_ratio/high_mean": 0.0005337569509720197,
      "clip_ratio/low_mean": 0.00028353541347314604,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000817292378997081,
      "completions/clipped_ratio": 0.0870535714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2608.0,
      "completions/mean_length": 818.2433471679688,
      "completions/mean_terminated_length": 505.69439697265625,
      "completions/min_length": 144.0,
      "completions/min_terminated_length": 144.0,
      "epoch": 13.951603498542275,
      "grad_norm": 0.23312684893608093,
      "learning_rate": 1e-06,
      "loss": -0.0397,
      "num_tokens": 811861967.0,
      "reward": 0.6941964626312256,
      "reward_std": 0.11945484578609467,
      "rewards/verify_math_reward/mean": 0.6941964030265808,
      "rewards/verify_math_reward/std": 0.4610042870044708,
      "step": 1493
    },
    {
      "clip_ratio/high_max": 0.002324598935956601,
      "clip_ratio/high_mean": 0.0007541353406850249,
      "clip_ratio/low_mean": 0.00045425827102008043,
      "clip_ratio/low_min": 1.3031693015363999e-05,
      "clip_ratio/region_mean": 0.0012083935907867271,
      "completions/clipped_ratio": 0.1428571428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3613.0,
      "completions/mean_length": 1113.310302734375,
      "completions/mean_terminated_length": 616.1953125,
      "completions/min_length": 137.0,
      "completions/min_terminated_length": 137.0,
      "epoch": 13.960932944606414,
      "grad_norm": 0.3092258870601654,
      "learning_rate": 1e-06,
      "loss": -0.0832,
      "num_tokens": 812431469.0,
      "reward": 0.6261160969734192,
      "reward_std": 0.14755865931510925,
      "rewards/verify_math_reward/mean": 0.6261160969734192,
      "rewards/verify_math_reward/std": 0.48410359025001526,
      "step": 1494
    },
    {
      "clip_ratio/high_max": 0.0015647552099835593,
      "clip_ratio/high_mean": 0.0005115541280247271,
      "clip_ratio/low_mean": 0.00038354821617758716,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008951023701229133,
      "completions/clipped_ratio": 0.140625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3918.0,
      "completions/mean_length": 1059.239990234375,
      "completions/mean_terminated_length": 562.3155517578125,
      "completions/min_length": 147.0,
      "completions/min_terminated_length": 147.0,
      "epoch": 13.970262390670554,
      "grad_norm": 0.27657631039619446,
      "learning_rate": 1e-06,
      "loss": -0.0487,
      "num_tokens": 812945540.0,
      "reward": 0.5926339626312256,
      "reward_std": 0.11997589468955994,
      "rewards/verify_math_reward/mean": 0.5926339030265808,
      "rewards/verify_math_reward/std": 0.49161845445632935,
      "step": 1495
    },
    {
      "clip_ratio/high_max": 0.0020840627112193033,
      "clip_ratio/high_mean": 0.0008205575250030961,
      "clip_ratio/low_mean": 0.00040433476669932134,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012248922867001966,
      "completions/clipped_ratio": 0.109375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2438.0,
      "completions/mean_length": 896.6016235351562,
      "completions/mean_terminated_length": 503.6929931640625,
      "completions/min_length": 110.0,
      "completions/min_terminated_length": 110.0,
      "epoch": 13.979591836734693,
      "grad_norm": 0.2688261568546295,
      "learning_rate": 1e-06,
      "loss": -0.036,
      "num_tokens": 813432687.0,
      "reward": 0.6640625,
      "reward_std": 0.1401950567960739,
      "rewards/verify_math_reward/mean": 0.6640625,
      "rewards/verify_math_reward/std": 0.4725809693336487,
      "step": 1496
    },
    {
      "clip_ratio/high_max": 0.0015964990416250657,
      "clip_ratio/high_mean": 0.000543468352589116,
      "clip_ratio/low_mean": 0.0004935717452099198,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001037040121445898,
      "completions/clipped_ratio": 0.1462053571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3648.0,
      "completions/mean_length": 1108.9989013671875,
      "completions/mean_terminated_length": 597.4993896484375,
      "completions/min_length": 171.0,
      "completions/min_terminated_length": 171.0,
      "epoch": 13.988921282798835,
      "grad_norm": 0.21441441774368286,
      "learning_rate": 1e-06,
      "loss": -0.0758,
      "num_tokens": 813982822.0,
      "reward": 0.5915178656578064,
      "reward_std": 0.13985693454742432,
      "rewards/verify_math_reward/mean": 0.5915178656578064,
      "rewards/verify_math_reward/std": 0.49182769656181335,
      "step": 1497
    },
    {
      "clip_ratio/high_max": 0.0017150953171949368,
      "clip_ratio/high_mean": 0.0006848354114481481,
      "clip_ratio/low_mean": 0.0002689252323762048,
      "clip_ratio/low_min": 1.4240145901567303e-05,
      "clip_ratio/region_mean": 0.0009537606310914271,
      "completions/clipped_ratio": 0.15909090909090906,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2030.0,
      "completions/mean_length": 1102.0909423828125,
      "completions/mean_terminated_length": 535.6756591796875,
      "completions/min_length": 190.0,
      "completions/min_terminated_length": 190.0,
      "epoch": 13.998250728862974,
      "grad_norm": 0.239691361784935,
      "learning_rate": 1e-06,
      "loss": -0.0797,
      "num_tokens": 814499339.0,
      "reward": 0.6662946939468384,
      "reward_std": 0.13357990980148315,
      "rewards/verify_math_reward/mean": 0.6662946343421936,
      "rewards/verify_math_reward/std": 0.47179925441741943,
      "step": 1498
    },
    {
      "clip_ratio/high_max": 0.001494333037044271,
      "clip_ratio/high_mean": 0.000578145759391191,
      "clip_ratio/low_mean": 0.0002765736257970275,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008547193610866088,
      "completions/clipped_ratio": 0.1339285714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2152.0,
      "completions/mean_length": 1018.07373046875,
      "completions/mean_terminated_length": 542.1056518554688,
      "completions/min_length": 131.0,
      "completions/min_terminated_length": 131.0,
      "epoch": 14.00932944606414,
      "grad_norm": 1.4641883373260498,
      "learning_rate": 1e-06,
      "loss": -0.0358,
      "num_tokens": 815007925.0,
      "reward": 0.6573660969734192,
      "reward_std": 0.11637480556964874,
      "rewards/verify_math_reward/mean": 0.6573660969734192,
      "rewards/verify_math_reward/std": 0.47485533356666565,
      "step": 1499
    },
    {
      "clip_ratio/high_max": 0.0020549969995045103,
      "clip_ratio/high_mean": 0.0006765435027773492,
      "clip_ratio/low_mean": 0.0003070677812502254,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000983611276751617,
      "completions/clipped_ratio": 0.0926339285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2758.0,
      "completions/mean_length": 870.6529541015625,
      "completions/mean_terminated_length": 541.3739013671875,
      "completions/min_length": 124.0,
      "completions/min_terminated_length": 124.0,
      "epoch": 14.018658892128279,
      "grad_norm": 0.24957065284252167,
      "learning_rate": 1e-06,
      "loss": -0.0646,
      "num_tokens": 815543750.0,
      "reward": 0.7410714626312256,
      "reward_std": 0.14895054697990417,
      "rewards/verify_math_reward/mean": 0.7410714030265808,
      "rewards/verify_math_reward/std": 0.43829095363616943,
      "step": 1500
    },
    {
      "clip_ratio/high_max": 0.001968701009900542,
      "clip_ratio/high_mean": 0.000634106368124776,
      "clip_ratio/low_mean": 0.0005157212635822361,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011498276089696446,
      "completions/clipped_ratio": 0.1283482142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3604.0,
      "completions/mean_length": 1011.075927734375,
      "completions/mean_terminated_length": 556.8297119140625,
      "completions/min_length": 150.0,
      "completions/min_terminated_length": 150.0,
      "epoch": 14.02798833819242,
      "grad_norm": 0.44304928183555603,
      "learning_rate": 1e-06,
      "loss": -0.0619,
      "num_tokens": 816077162.0,
      "reward": 0.6037946939468384,
      "reward_std": 0.129141166806221,
      "rewards/verify_math_reward/mean": 0.6037946343421936,
      "rewards/verify_math_reward/std": 0.48938122391700745,
      "step": 1501
    },
    {
      "clip_ratio/high_max": 0.0018333724947297014,
      "clip_ratio/high_mean": 0.0007765739246679004,
      "clip_ratio/low_mean": 0.0004834348496842722,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012600087884493405,
      "completions/clipped_ratio": 0.1160714285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2947.0,
      "completions/mean_length": 930.888427734375,
      "completions/mean_terminated_length": 515.2677001953125,
      "completions/min_length": 124.0,
      "completions/min_terminated_length": 124.0,
      "epoch": 14.03731778425656,
      "grad_norm": 0.36276882886886597,
      "learning_rate": 1e-06,
      "loss": -0.0471,
      "num_tokens": 816566382.0,
      "reward": 0.6674107313156128,
      "reward_std": 0.1360626220703125,
      "rewards/verify_math_reward/mean": 0.6674107313156128,
      "rewards/verify_math_reward/std": 0.47140392661094666,
      "step": 1502
    },
    {
      "clip_ratio/high_max": 0.002207079916843213,
      "clip_ratio/high_mean": 0.000813964232293074,
      "clip_ratio/low_mean": 0.0004805520211448311,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001294516277994262,
      "completions/clipped_ratio": 0.1004464285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2487.0,
      "completions/mean_length": 910.5357666015625,
      "completions/mean_terminated_length": 554.8386840820312,
      "completions/min_length": 182.0,
      "completions/min_terminated_length": 182.0,
      "epoch": 14.0466472303207,
      "grad_norm": 0.26648738980293274,
      "learning_rate": 1e-06,
      "loss": -0.0634,
      "num_tokens": 817103662.0,
      "reward": 0.6741071939468384,
      "reward_std": 0.16484086215496063,
      "rewards/verify_math_reward/mean": 0.6741071343421936,
      "rewards/verify_math_reward/std": 0.4689692556858063,
      "step": 1503
    },
    {
      "clip_ratio/high_max": 0.001593323813722236,
      "clip_ratio/high_mean": 0.000571819620745373,
      "clip_ratio/low_mean": 0.0004090447291673627,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000980864360826672,
      "completions/clipped_ratio": 0.1037946428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3443.0,
      "completions/mean_length": 897.1406860351562,
      "completions/mean_terminated_length": 526.6624755859375,
      "completions/min_length": 125.0,
      "completions/min_terminated_length": 125.0,
      "epoch": 14.055976676384839,
      "grad_norm": 0.28196415305137634,
      "learning_rate": 1e-06,
      "loss": -0.0329,
      "num_tokens": 817611732.0,
      "reward": 0.65625,
      "reward_std": 0.13932742178440094,
      "rewards/verify_math_reward/mean": 0.65625,
      "rewards/verify_math_reward/std": 0.4752241373062134,
      "step": 1504
    },
    {
      "clip_ratio/high_max": 0.0013122446544002742,
      "clip_ratio/high_mean": 0.0003747532045963453,
      "clip_ratio/low_mean": 0.0003058970505662728,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006806502588005969,
      "completions/clipped_ratio": 0.1037946428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3077.0,
      "completions/mean_length": 932.4967041015625,
      "completions/mean_terminated_length": 566.11328125,
      "completions/min_length": 156.0,
      "completions/min_terminated_length": 156.0,
      "epoch": 14.06530612244898,
      "grad_norm": 0.20008648931980133,
      "learning_rate": 1e-06,
      "loss": -0.0428,
      "num_tokens": 818153761.0,
      "reward": 0.6517857313156128,
      "reward_std": 0.11794712394475937,
      "rewards/verify_math_reward/mean": 0.6517857313156128,
      "rewards/verify_math_reward/std": 0.47667041420936584,
      "step": 1505
    },
    {
      "clip_ratio/high_max": 0.0018449998533469625,
      "clip_ratio/high_mean": 0.0005978632216283586,
      "clip_ratio/low_mean": 0.0003785571439038904,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009764203714439645,
      "completions/clipped_ratio": 0.1383928571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4040.0,
      "completions/mean_length": 1013.8638916015625,
      "completions/mean_terminated_length": 518.8056640625,
      "completions/min_length": 175.0,
      "completions/min_terminated_length": 175.0,
      "epoch": 14.07463556851312,
      "grad_norm": 0.24399596452713013,
      "learning_rate": 1e-06,
      "loss": -0.0437,
      "num_tokens": 818649047.0,
      "reward": 0.6540178656578064,
      "reward_std": 0.11899950355291367,
      "rewards/verify_math_reward/mean": 0.6540178656578064,
      "rewards/verify_math_reward/std": 0.4759531021118164,
      "step": 1506
    },
    {
      "clip_ratio/high_max": 0.0029248369501146954,
      "clip_ratio/high_mean": 0.0008199370113288751,
      "clip_ratio/low_mean": 0.00039066135104803834,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001210598351462977,
      "completions/clipped_ratio": 0.1372767857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3702.0,
      "completions/mean_length": 1035.27685546875,
      "completions/mean_terminated_length": 548.2535400390625,
      "completions/min_length": 136.0,
      "completions/min_terminated_length": 136.0,
      "epoch": 14.08396501457726,
      "grad_norm": 0.4364930987358093,
      "learning_rate": 1e-06,
      "loss": -0.0452,
      "num_tokens": 819161023.0,
      "reward": 0.6517857313156128,
      "reward_std": 0.14365628361701965,
      "rewards/verify_math_reward/mean": 0.6517857313156128,
      "rewards/verify_math_reward/std": 0.47667041420936584,
      "step": 1507
    },
    {
      "clip_ratio/high_max": 0.0018456281832186505,
      "clip_ratio/high_mean": 0.0006877476062072674,
      "clip_ratio/low_mean": 0.0004106035758013604,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010983512038365006,
      "completions/clipped_ratio": 0.125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2231.0,
      "completions/mean_length": 963.0558471679688,
      "completions/mean_terminated_length": 515.4923095703125,
      "completions/min_length": 170.0,
      "completions/min_terminated_length": 170.0,
      "epoch": 14.093294460641399,
      "grad_norm": 0.3371732831001282,
      "learning_rate": 1e-06,
      "loss": -0.0783,
      "num_tokens": 819653073.0,
      "reward": 0.6819196939468384,
      "reward_std": 0.15458819270133972,
      "rewards/verify_math_reward/mean": 0.6819196343421936,
      "rewards/verify_math_reward/std": 0.46599099040031433,
      "step": 1508
    },
    {
      "clip_ratio/high_max": 0.0016413013872806914,
      "clip_ratio/high_mean": 0.0005821581821692234,
      "clip_ratio/low_mean": 0.00026635393624019343,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000848512137963553,
      "completions/clipped_ratio": 0.1283482142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3787.0,
      "completions/mean_length": 992.4241333007812,
      "completions/mean_terminated_length": 535.4315185546875,
      "completions/min_length": 142.0,
      "completions/min_terminated_length": 142.0,
      "epoch": 14.102623906705539,
      "grad_norm": 0.2613990008831024,
      "learning_rate": 1e-06,
      "loss": -0.0325,
      "num_tokens": 820162677.0,
      "reward": 0.6986607313156128,
      "reward_std": 0.11419376730918884,
      "rewards/verify_math_reward/mean": 0.6986607313156128,
      "rewards/verify_math_reward/std": 0.4590960443019867,
      "step": 1509
    },
    {
      "clip_ratio/high_max": 0.00206036433155532,
      "clip_ratio/high_mean": 0.0007325881288124947,
      "clip_ratio/low_mean": 0.00027539572738533025,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001007983857562067,
      "completions/clipped_ratio": 0.0926339285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3854.0,
      "completions/mean_length": 857.6506958007812,
      "completions/mean_terminated_length": 527.0442504882812,
      "completions/min_length": 148.0,
      "completions/min_terminated_length": 148.0,
      "epoch": 14.11195335276968,
      "grad_norm": 0.20512865483760834,
      "learning_rate": 1e-06,
      "loss": -0.0719,
      "num_tokens": 820673396.0,
      "reward": 0.731026828289032,
      "reward_std": 0.13568215072155,
      "rewards/verify_math_reward/mean": 0.7310267686843872,
      "rewards/verify_math_reward/std": 0.44367367029190063,
      "step": 1510
    },
    {
      "clip_ratio/high_max": 0.0014188510867825244,
      "clip_ratio/high_mean": 0.0004800527813131339,
      "clip_ratio/low_mean": 0.00029871640936107724,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007787692102283472,
      "completions/clipped_ratio": 0.1339285714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3358.0,
      "completions/mean_length": 1049.77685546875,
      "completions/mean_terminated_length": 578.7113037109375,
      "completions/min_length": 137.0,
      "completions/min_terminated_length": 137.0,
      "epoch": 14.12128279883382,
      "grad_norm": 0.24495892226696014,
      "learning_rate": 1e-06,
      "loss": -0.0478,
      "num_tokens": 821206028.0,
      "reward": 0.6361607313156128,
      "reward_std": 0.1256481409072876,
      "rewards/verify_math_reward/mean": 0.6361607313156128,
      "rewards/verify_math_reward/std": 0.4813718795776367,
      "step": 1511
    },
    {
      "clip_ratio/high_max": 0.0010677577156457119,
      "clip_ratio/high_mean": 0.00040600342754260055,
      "clip_ratio/low_mean": 0.00016483723129567807,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0005708406652047415,
      "completions/clipped_ratio": 0.0691964285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3004.0,
      "completions/mean_length": 754.8917846679688,
      "completions/mean_terminated_length": 506.5119934082031,
      "completions/min_length": 165.0,
      "completions/min_terminated_length": 165.0,
      "epoch": 14.130612244897959,
      "grad_norm": 0.18473725020885468,
      "learning_rate": 1e-06,
      "loss": -0.0379,
      "num_tokens": 821708779.0,
      "reward": 0.7533482313156128,
      "reward_std": 0.09130503982305527,
      "rewards/verify_math_reward/mean": 0.7533482313156128,
      "rewards/verify_math_reward/std": 0.4313030242919922,
      "step": 1512
    },
    {
      "clip_ratio/high_max": 0.0018346720971749164,
      "clip_ratio/high_mean": 0.0006204481560416752,
      "clip_ratio/low_mean": 0.0003428258760322933,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009632740475353785,
      "completions/clipped_ratio": 0.1049107142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3574.0,
      "completions/mean_length": 925.114990234375,
      "completions/mean_terminated_length": 553.465087890625,
      "completions/min_length": 133.0,
      "completions/min_terminated_length": 133.0,
      "epoch": 14.139941690962099,
      "grad_norm": 1.1437463760375977,
      "learning_rate": 1e-06,
      "loss": -0.0509,
      "num_tokens": 822246506.0,
      "reward": 0.6194196939468384,
      "reward_std": 0.13083434104919434,
      "rewards/verify_math_reward/mean": 0.6194196343421936,
      "rewards/verify_math_reward/std": 0.48580074310302734,
      "step": 1513
    },
    {
      "clip_ratio/high_max": 0.0012576879689731868,
      "clip_ratio/high_mean": 0.000406599920552253,
      "clip_ratio/low_mean": 0.00036734900686496985,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007739489374216646,
      "completions/clipped_ratio": 0.1305803571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3198.0,
      "completions/mean_length": 1022.2254638671875,
      "completions/mean_terminated_length": 560.5673828125,
      "completions/min_length": 154.0,
      "completions/min_terminated_length": 154.0,
      "epoch": 14.14927113702624,
      "grad_norm": 0.848977267742157,
      "learning_rate": 1e-06,
      "loss": -0.0438,
      "num_tokens": 822777356.0,
      "reward": 0.6462053656578064,
      "reward_std": 0.10246770083904266,
      "rewards/verify_math_reward/mean": 0.6462053656578064,
      "rewards/verify_math_reward/std": 0.478413462638855,
      "step": 1514
    },
    {
      "clip_ratio/high_max": 0.0016075218627520371,
      "clip_ratio/high_mean": 0.0005811413732317305,
      "clip_ratio/low_mean": 0.00028787522683160205,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008690165977895958,
      "completions/clipped_ratio": 0.1316964285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4086.0,
      "completions/mean_length": 1043.935302734375,
      "completions/mean_terminated_length": 581.0256958007812,
      "completions/min_length": 117.0,
      "completions/min_terminated_length": 117.0,
      "epoch": 14.15860058309038,
      "grad_norm": 0.2685943841934204,
      "learning_rate": 1e-06,
      "loss": -0.0506,
      "num_tokens": 823319834.0,
      "reward": 0.6506696939468384,
      "reward_std": 0.10882211476564407,
      "rewards/verify_math_reward/mean": 0.6506696343421936,
      "rewards/verify_math_reward/std": 0.47702476382255554,
      "step": 1515
    },
    {
      "clip_ratio/high_max": 0.0016328601450368296,
      "clip_ratio/high_mean": 0.0006034293946868274,
      "clip_ratio/low_mean": 0.000265215342551528,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008686447181389667,
      "completions/clipped_ratio": 0.1216517857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3138.0,
      "completions/mean_length": 951.2277221679688,
      "completions/mean_terminated_length": 515.6746826171875,
      "completions/min_length": 116.0,
      "completions/min_terminated_length": 116.0,
      "epoch": 14.167930029154519,
      "grad_norm": 0.2632819414138794,
      "learning_rate": 1e-06,
      "loss": -0.0863,
      "num_tokens": 823814934.0,
      "reward": 0.7042410969734192,
      "reward_std": 0.12944427132606506,
      "rewards/verify_math_reward/mean": 0.7042410969734192,
      "rewards/verify_math_reward/std": 0.45663803815841675,
      "step": 1516
    },
    {
      "clip_ratio/high_max": 0.002156028051103931,
      "clip_ratio/high_mean": 0.000844777683596476,
      "clip_ratio/low_mean": 0.0005361340854506125,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0013809117663186044,
      "completions/clipped_ratio": 0.1138392857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3776.0,
      "completions/mean_length": 911.6819458007812,
      "completions/mean_terminated_length": 502.61334228515625,
      "completions/min_length": 116.0,
      "completions/min_terminated_length": 116.0,
      "epoch": 14.177259475218658,
      "grad_norm": 0.31470048427581787,
      "learning_rate": 1e-06,
      "loss": -0.0537,
      "num_tokens": 824302201.0,
      "reward": 0.6863839626312256,
      "reward_std": 0.17589321732521057,
      "rewards/verify_math_reward/mean": 0.6863839030265808,
      "rewards/verify_math_reward/std": 0.46422141790390015,
      "step": 1517
    },
    {
      "clip_ratio/high_max": 0.0016641835100017488,
      "clip_ratio/high_mean": 0.0005968749319436029,
      "clip_ratio/low_mean": 0.00023741816278288752,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008342931150764343,
      "completions/clipped_ratio": 0.1149553571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2608.0,
      "completions/mean_length": 977.8035888671875,
      "completions/mean_terminated_length": 572.7919311523438,
      "completions/min_length": 143.0,
      "completions/min_terminated_length": 143.0,
      "epoch": 14.186588921282798,
      "grad_norm": 0.2105596363544464,
      "learning_rate": 1e-06,
      "loss": -0.0698,
      "num_tokens": 824841865.0,
      "reward": 0.637276828289032,
      "reward_std": 0.12313193827867508,
      "rewards/verify_math_reward/mean": 0.6372767686843872,
      "rewards/verify_math_reward/std": 0.481054425239563,
      "step": 1518
    },
    {
      "clip_ratio/high_max": 0.0015158499372773804,
      "clip_ratio/high_mean": 0.0005213018552012727,
      "clip_ratio/low_mean": 0.0002866446910729792,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008079465351329418,
      "completions/clipped_ratio": 0.1283482142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3851.0,
      "completions/mean_length": 979.4576416015625,
      "completions/mean_terminated_length": 520.5557250976562,
      "completions/min_length": 134.0,
      "completions/min_terminated_length": 134.0,
      "epoch": 14.19591836734694,
      "grad_norm": 0.2224515825510025,
      "learning_rate": 1e-06,
      "loss": -0.032,
      "num_tokens": 825341411.0,
      "reward": 0.6852678656578064,
      "reward_std": 0.10618714243173599,
      "rewards/verify_math_reward/mean": 0.6852678656578064,
      "rewards/verify_math_reward/std": 0.46466848254203796,
      "step": 1519
    },
    {
      "clip_ratio/high_max": 0.002123734593624249,
      "clip_ratio/high_mean": 0.0008263798099505948,
      "clip_ratio/low_mean": 0.0006075028886698419,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001433882691344479,
      "completions/clipped_ratio": 0.0814732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3952.0,
      "completions/mean_length": 803.4475708007812,
      "completions/mean_terminated_length": 511.3985595703125,
      "completions/min_length": 140.0,
      "completions/min_terminated_length": 140.0,
      "epoch": 14.205247813411079,
      "grad_norm": 0.5233650207519531,
      "learning_rate": 1e-06,
      "loss": -0.0407,
      "num_tokens": 825851604.0,
      "reward": 0.7087053656578064,
      "reward_std": 0.15526330471038818,
      "rewards/verify_math_reward/mean": 0.7087053656578064,
      "rewards/verify_math_reward/std": 0.45461276173591614,
      "step": 1520
    },
    {
      "clip_ratio/high_max": 0.001779746904503554,
      "clip_ratio/high_mean": 0.0006358881364576519,
      "clip_ratio/low_mean": 0.0006412241928046569,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012771123547281604,
      "completions/clipped_ratio": 0.1205357142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3425.0,
      "completions/mean_length": 954.9464721679688,
      "completions/mean_terminated_length": 524.4467163085938,
      "completions/min_length": 160.0,
      "completions/min_terminated_length": 160.0,
      "epoch": 14.214577259475218,
      "grad_norm": 0.5685335993766785,
      "learning_rate": 1e-06,
      "loss": -0.0455,
      "num_tokens": 826350204.0,
      "reward": 0.6785714626312256,
      "reward_std": 0.13023632764816284,
      "rewards/verify_math_reward/mean": 0.6785714030265808,
      "rewards/verify_math_reward/std": 0.46728572249412537,
      "step": 1521
    },
    {
      "clip_ratio/high_max": 0.0014518308926199097,
      "clip_ratio/high_mean": 0.000579665770601423,
      "clip_ratio/low_mean": 0.0005093620625302719,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001089027819034527,
      "completions/clipped_ratio": 0.1350446428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3067.0,
      "completions/mean_length": 1039.0614013671875,
      "completions/mean_terminated_length": 561.7845458984375,
      "completions/min_length": 141.0,
      "completions/min_terminated_length": 141.0,
      "epoch": 14.223906705539358,
      "grad_norm": 0.2760242521762848,
      "learning_rate": 1e-06,
      "loss": -0.0753,
      "num_tokens": 826867123.0,
      "reward": 0.6540178656578064,
      "reward_std": 0.15052077174186707,
      "rewards/verify_math_reward/mean": 0.6540178656578064,
      "rewards/verify_math_reward/std": 0.4759531021118164,
      "step": 1522
    },
    {
      "clip_ratio/high_max": 0.0016179232188733295,
      "clip_ratio/high_mean": 0.0005964905258224462,
      "clip_ratio/low_mean": 0.00032461810633321875,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009211086289724335,
      "completions/clipped_ratio": 0.1238839285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2828.0,
      "completions/mean_length": 1017.26904296875,
      "completions/mean_terminated_length": 581.9324951171875,
      "completions/min_length": 124.0,
      "completions/min_terminated_length": 124.0,
      "epoch": 14.2332361516035,
      "grad_norm": 0.22143647074699402,
      "learning_rate": 1e-06,
      "loss": -0.0716,
      "num_tokens": 827412236.0,
      "reward": 0.625,
      "reward_std": 0.1583433598279953,
      "rewards/verify_math_reward/mean": 0.625,
      "rewards/verify_math_reward/std": 0.48439329862594604,
      "step": 1523
    },
    {
      "clip_ratio/high_max": 0.001661519267145195,
      "clip_ratio/high_mean": 0.0005118761273479322,
      "clip_ratio/low_mean": 0.0004245043710398022,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009363805038447026,
      "completions/clipped_ratio": 0.0837053571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3926.0,
      "completions/mean_length": 846.8303833007812,
      "completions/mean_terminated_length": 550.01220703125,
      "completions/min_length": 151.0,
      "completions/min_terminated_length": 151.0,
      "epoch": 14.242565597667639,
      "grad_norm": 0.21743392944335938,
      "learning_rate": 1e-06,
      "loss": -0.0106,
      "num_tokens": 827952252.0,
      "reward": 0.6640625,
      "reward_std": 0.1083681657910347,
      "rewards/verify_math_reward/mean": 0.6640625,
      "rewards/verify_math_reward/std": 0.4725809693336487,
      "step": 1524
    },
    {
      "clip_ratio/high_max": 0.0018410912598483264,
      "clip_ratio/high_mean": 0.0005218886806233058,
      "clip_ratio/low_mean": 0.000281309810361563,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008031984616536647,
      "completions/clipped_ratio": 0.1194196428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4015.0,
      "completions/mean_length": 951.0078735351562,
      "completions/mean_terminated_length": 524.5006103515625,
      "completions/min_length": 136.0,
      "completions/min_terminated_length": 136.0,
      "epoch": 14.251895043731778,
      "grad_norm": 0.8745452761650085,
      "learning_rate": 1e-06,
      "loss": -0.0363,
      "num_tokens": 828466963.0,
      "reward": 0.652901828289032,
      "reward_std": 0.10952933132648468,
      "rewards/verify_math_reward/mean": 0.6529017686843872,
      "rewards/verify_math_reward/std": 0.47631317377090454,
      "step": 1525
    },
    {
      "clip_ratio/high_max": 0.001884348279418191,
      "clip_ratio/high_mean": 0.0006042873546903138,
      "clip_ratio/low_mean": 0.0005702761377506249,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001174563483800739,
      "completions/clipped_ratio": 0.1227678571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3868.0,
      "completions/mean_length": 968.3058471679688,
      "completions/mean_terminated_length": 530.5877685546875,
      "completions/min_length": 109.0,
      "completions/min_terminated_length": 109.0,
      "epoch": 14.261224489795918,
      "grad_norm": 0.30108314752578735,
      "learning_rate": 1e-06,
      "loss": -0.0776,
      "num_tokens": 828974245.0,
      "reward": 0.645089328289032,
      "reward_std": 0.11885752528905869,
      "rewards/verify_math_reward/mean": 0.6450892686843872,
      "rewards/verify_math_reward/std": 0.4787535071372986,
      "step": 1526
    },
    {
      "clip_ratio/high_max": 0.002004976682655979,
      "clip_ratio/high_mean": 0.0006427386269933777,
      "clip_ratio/low_mean": 0.0003822580783889862,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010249966981064063,
      "completions/clipped_ratio": 0.1171875,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2648.0,
      "completions/mean_length": 954.4944458007812,
      "completions/mean_terminated_length": 537.4804077148438,
      "completions/min_length": 125.0,
      "completions/min_terminated_length": 125.0,
      "epoch": 14.270553935860057,
      "grad_norm": 0.3322337567806244,
      "learning_rate": 1e-06,
      "loss": -0.0681,
      "num_tokens": 829497208.0,
      "reward": 0.6707589626312256,
      "reward_std": 0.12467173486948013,
      "rewards/verify_math_reward/mean": 0.6707589030265808,
      "rewards/verify_math_reward/std": 0.4702001214027405,
      "step": 1527
    },
    {
      "clip_ratio/high_max": 0.0026705336204031482,
      "clip_ratio/high_mean": 0.0007938176222523907,
      "clip_ratio/low_mean": 0.0004538355865406629,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012476531974243699,
      "completions/clipped_ratio": 0.1584821428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2441.0,
      "completions/mean_length": 1123.77685546875,
      "completions/mean_terminated_length": 564.021240234375,
      "completions/min_length": 153.0,
      "completions/min_terminated_length": 153.0,
      "epoch": 14.279883381924199,
      "grad_norm": 0.6157507300376892,
      "learning_rate": 1e-06,
      "loss": -0.0707,
      "num_tokens": 830015192.0,
      "reward": 0.5770089626312256,
      "reward_std": 0.14815708994865417,
      "rewards/verify_math_reward/mean": 0.5770089030265808,
      "rewards/verify_math_reward/std": 0.4943099319934845,
      "step": 1528
    },
    {
      "clip_ratio/high_max": 0.0017188321180583443,
      "clip_ratio/high_mean": 0.0007196113292593509,
      "clip_ratio/low_mean": 0.0004686434240284143,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011882547878485639,
      "completions/clipped_ratio": 0.1417410714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2788.0,
      "completions/mean_length": 1075.4051513671875,
      "completions/mean_terminated_length": 576.5552368164062,
      "completions/min_length": 149.0,
      "completions/min_terminated_length": 149.0,
      "epoch": 14.289212827988338,
      "grad_norm": 0.2834455966949463,
      "learning_rate": 1e-06,
      "loss": -0.0597,
      "num_tokens": 830539283.0,
      "reward": 0.5993303656578064,
      "reward_std": 0.15988357365131378,
      "rewards/verify_math_reward/mean": 0.5993303656578064,
      "rewards/verify_math_reward/std": 0.49030786752700806,
      "step": 1529
    },
    {
      "clip_ratio/high_max": 0.0013908015898778103,
      "clip_ratio/high_mean": 0.00041138293636322487,
      "clip_ratio/low_mean": 0.00022284483452494896,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006342277683870634,
      "completions/clipped_ratio": 0.0837053571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2688.0,
      "completions/mean_length": 796.5491333007812,
      "completions/mean_terminated_length": 495.1376647949219,
      "completions/min_length": 130.0,
      "completions/min_terminated_length": 130.0,
      "epoch": 14.298542274052478,
      "grad_norm": 0.23000116646289825,
      "learning_rate": 1e-06,
      "loss": -0.0612,
      "num_tokens": 831018359.0,
      "reward": 0.7611607313156128,
      "reward_std": 0.08781202137470245,
      "rewards/verify_math_reward/mean": 0.7611607313156128,
      "rewards/verify_math_reward/std": 0.4266124963760376,
      "step": 1530
    },
    {
      "clip_ratio/high_max": 0.0016838195297168568,
      "clip_ratio/high_mean": 0.0005107820786633965,
      "clip_ratio/low_mean": 0.0003008752792084124,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008116573462757515,
      "completions/clipped_ratio": 0.1104910714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3761.0,
      "completions/mean_length": 952.9676513671875,
      "completions/mean_terminated_length": 562.5533447265625,
      "completions/min_length": 149.0,
      "completions/min_terminated_length": 149.0,
      "epoch": 14.307871720116617,
      "grad_norm": 0.2157803773880005,
      "learning_rate": 1e-06,
      "loss": -0.0428,
      "num_tokens": 831554218.0,
      "reward": 0.6975446939468384,
      "reward_std": 0.12978056073188782,
      "rewards/verify_math_reward/mean": 0.6975446343421936,
      "rewards/verify_math_reward/std": 0.45957788825035095,
      "step": 1531
    },
    {
      "clip_ratio/high_max": 0.002110756959154969,
      "clip_ratio/high_mean": 0.0007731584628345445,
      "clip_ratio/low_mean": 0.00021015188758610748,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009833103358687367,
      "completions/clipped_ratio": 0.109375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2834.0,
      "completions/mean_length": 948.55810546875,
      "completions/mean_terminated_length": 562.0300903320312,
      "completions/min_length": 149.0,
      "completions/min_terminated_length": 149.0,
      "epoch": 14.317201166180759,
      "grad_norm": 0.21734938025474548,
      "learning_rate": 1e-06,
      "loss": -0.0595,
      "num_tokens": 832096062.0,
      "reward": 0.6618303656578064,
      "reward_std": 0.12816546857357025,
      "rewards/verify_math_reward/mean": 0.6618303656578064,
      "rewards/verify_math_reward/std": 0.4733508229255676,
      "step": 1532
    },
    {
      "clip_ratio/high_max": 0.0017332326679024845,
      "clip_ratio/high_mean": 0.0006725101638949127,
      "clip_ratio/low_mean": 0.0002929731977019401,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009654833429522114,
      "completions/clipped_ratio": 0.1116071428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2972.0,
      "completions/mean_length": 923.9710083007812,
      "completions/mean_terminated_length": 525.474853515625,
      "completions/min_length": 128.0,
      "completions/min_terminated_length": 128.0,
      "epoch": 14.326530612244898,
      "grad_norm": 0.2405531257390976,
      "learning_rate": 1e-06,
      "loss": -0.0781,
      "num_tokens": 832594404.0,
      "reward": 0.625,
      "reward_std": 0.14897871017456055,
      "rewards/verify_math_reward/mean": 0.625,
      "rewards/verify_math_reward/std": 0.48439329862594604,
      "step": 1533
    },
    {
      "clip_ratio/high_max": 0.00227256025391398,
      "clip_ratio/high_mean": 0.0008926759655878413,
      "clip_ratio/low_mean": 0.0004002686164312763,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012929445874760859,
      "completions/clipped_ratio": 0.1305803571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3894.0,
      "completions/mean_length": 1032.454345703125,
      "completions/mean_terminated_length": 572.33251953125,
      "completions/min_length": 166.0,
      "completions/min_terminated_length": 166.0,
      "epoch": 14.335860058309038,
      "grad_norm": 1.404003381729126,
      "learning_rate": 1e-06,
      "loss": -0.0594,
      "num_tokens": 833128595.0,
      "reward": 0.652901828289032,
      "reward_std": 0.17442122101783752,
      "rewards/verify_math_reward/mean": 0.6529017686843872,
      "rewards/verify_math_reward/std": 0.47631320357322693,
      "step": 1534
    },
    {
      "clip_ratio/high_max": 0.0018078499851981178,
      "clip_ratio/high_mean": 0.0006283736802288331,
      "clip_ratio/low_mean": 0.0003628180102168699,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009911917004501447,
      "completions/clipped_ratio": 0.1071428571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2368.0,
      "completions/mean_length": 932.7120971679688,
      "completions/mean_terminated_length": 553.1174926757812,
      "completions/min_length": 125.0,
      "completions/min_terminated_length": 125.0,
      "epoch": 14.345189504373177,
      "grad_norm": 0.2802864909172058,
      "learning_rate": 1e-06,
      "loss": -0.0855,
      "num_tokens": 833662209.0,
      "reward": 0.6495535969734192,
      "reward_std": 0.16059784591197968,
      "rewards/verify_math_reward/mean": 0.6495535969734192,
      "rewards/verify_math_reward/std": 0.477376252412796,
      "step": 1535
    },
    {
      "clip_ratio/high_max": 0.0019803850263997447,
      "clip_ratio/high_mean": 0.0006916289194123237,
      "clip_ratio/low_mean": 0.00033670690902454226,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010283358460583258,
      "completions/clipped_ratio": 0.0870535714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2288.0,
      "completions/mean_length": 849.2433471679688,
      "completions/mean_terminated_length": 539.650390625,
      "completions/min_length": 141.0,
      "completions/min_terminated_length": 141.0,
      "epoch": 14.354518950437317,
      "grad_norm": 0.2729128897190094,
      "learning_rate": 1e-06,
      "loss": -0.0445,
      "num_tokens": 834186579.0,
      "reward": 0.6830357313156128,
      "reward_std": 0.13174405694007874,
      "rewards/verify_math_reward/mean": 0.6830357313156128,
      "rewards/verify_math_reward/std": 0.46555325388908386,
      "step": 1536
    },
    {
      "clip_ratio/high_max": 0.001609412760444684,
      "clip_ratio/high_mean": 0.0005967626675555948,
      "clip_ratio/low_mean": 0.0005037324272052501,
      "clip_ratio/low_min": 1.6344141840818338e-05,
      "clip_ratio/region_mean": 0.001100495079299435,
      "completions/clipped_ratio": 0.1049107142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3476.0,
      "completions/mean_length": 940.35498046875,
      "completions/mean_terminated_length": 570.4912719726562,
      "completions/min_length": 145.0,
      "completions/min_terminated_length": 145.0,
      "epoch": 14.363848396501458,
      "grad_norm": 0.3091679811477661,
      "learning_rate": 1e-06,
      "loss": -0.0222,
      "num_tokens": 834743345.0,
      "reward": 0.5870535969734192,
      "reward_std": 0.13891583681106567,
      "rewards/verify_math_reward/mean": 0.5870535969734192,
      "rewards/verify_math_reward/std": 0.49263834953308105,
      "step": 1537
    },
    {
      "clip_ratio/high_max": 0.0018131489341612905,
      "clip_ratio/high_mean": 0.0007738125659670914,
      "clip_ratio/low_mean": 0.0003779981457228132,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011518107094161678,
      "completions/clipped_ratio": 0.1261160714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3420.0,
      "completions/mean_length": 987.7020263671875,
      "completions/mean_terminated_length": 539.1226196289062,
      "completions/min_length": 150.0,
      "completions/min_terminated_length": 150.0,
      "epoch": 14.373177842565598,
      "grad_norm": 0.28053566813468933,
      "learning_rate": 1e-06,
      "loss": -0.0514,
      "num_tokens": 835245926.0,
      "reward": 0.6305803656578064,
      "reward_std": 0.1388830542564392,
      "rewards/verify_math_reward/mean": 0.6305803656578064,
      "rewards/verify_math_reward/std": 0.4829172194004059,
      "step": 1538
    },
    {
      "clip_ratio/high_max": 0.001976877418201184,
      "clip_ratio/high_mean": 0.0006994004324951675,
      "clip_ratio/low_mean": 0.00029225382195363636,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009916542330756783,
      "completions/clipped_ratio": 0.1071428571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2877.0,
      "completions/mean_length": 930.2422485351562,
      "completions/mean_terminated_length": 550.3512573242188,
      "completions/min_length": 127.0,
      "completions/min_terminated_length": 127.0,
      "epoch": 14.382507288629737,
      "grad_norm": 0.38584429025650024,
      "learning_rate": 1e-06,
      "loss": -0.0378,
      "num_tokens": 835770783.0,
      "reward": 0.660714328289032,
      "reward_std": 0.12816476821899414,
      "rewards/verify_math_reward/mean": 0.6607142686843872,
      "rewards/verify_math_reward/std": 0.4737313687801361,
      "step": 1539
    },
    {
      "clip_ratio/high_max": 0.0015576084151689429,
      "clip_ratio/high_mean": 0.000549391243566788,
      "clip_ratio/low_mean": 0.0003806230924965348,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009300143301516073,
      "completions/clipped_ratio": 0.0747767857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3619.0,
      "completions/mean_length": 777.021240234375,
      "completions/mean_terminated_length": 508.78045654296875,
      "completions/min_length": 125.0,
      "completions/min_terminated_length": 125.0,
      "epoch": 14.391836734693877,
      "grad_norm": 0.31385764479637146,
      "learning_rate": 1e-06,
      "loss": -0.0272,
      "num_tokens": 836275162.0,
      "reward": 0.7198660969734192,
      "reward_std": 0.12253489345312119,
      "rewards/verify_math_reward/mean": 0.7198660969734192,
      "rewards/verify_math_reward/std": 0.44931527972221375,
      "step": 1540
    },
    {
      "clip_ratio/high_max": 0.0019661447731778026,
      "clip_ratio/high_mean": 0.0006889410569783649,
      "clip_ratio/low_mean": 0.000354509037151729,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010434501182317035,
      "completions/clipped_ratio": 0.1060267857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2722.0,
      "completions/mean_length": 970.3248291015625,
      "completions/mean_terminated_length": 599.6141967773438,
      "completions/min_length": 151.0,
      "completions/min_terminated_length": 151.0,
      "epoch": 14.401166180758018,
      "grad_norm": 0.2678011953830719,
      "learning_rate": 1e-06,
      "loss": -0.0306,
      "num_tokens": 836850405.0,
      "reward": 0.6473214626312256,
      "reward_std": 0.13831712305545807,
      "rewards/verify_math_reward/mean": 0.6473214030265808,
      "rewards/verify_math_reward/std": 0.47807058691978455,
      "step": 1541
    },
    {
      "clip_ratio/high_max": 0.001748717335431138,
      "clip_ratio/high_mean": 0.0006184371759445639,
      "clip_ratio/low_mean": 0.0003229705503144942,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009414077449036995,
      "completions/clipped_ratio": 0.1082589285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2646.0,
      "completions/mean_length": 934.1004638671875,
      "completions/mean_terminated_length": 550.2402954101562,
      "completions/min_length": 139.0,
      "completions/min_terminated_length": 139.0,
      "epoch": 14.410495626822158,
      "grad_norm": 0.2840591371059418,
      "learning_rate": 1e-06,
      "loss": -0.0255,
      "num_tokens": 837370727.0,
      "reward": 0.6964285969734192,
      "reward_std": 0.11851094663143158,
      "rewards/verify_math_reward/mean": 0.6964285969734192,
      "rewards/verify_math_reward/std": 0.4600565731525421,
      "step": 1542
    },
    {
      "clip_ratio/high_max": 0.0015848389230086468,
      "clip_ratio/high_mean": 0.0005479890296555823,
      "clip_ratio/low_mean": 0.0002825919219731077,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008305809478770243,
      "completions/clipped_ratio": 0.0915178571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3835.0,
      "completions/mean_length": 899.1016235351562,
      "completions/mean_terminated_length": 577.0552978515625,
      "completions/min_length": 156.0,
      "completions/min_terminated_length": 156.0,
      "epoch": 14.419825072886297,
      "grad_norm": 0.26387879252433777,
      "learning_rate": 1e-06,
      "loss": -0.0298,
      "num_tokens": 837922114.0,
      "reward": 0.6975446939468384,
      "reward_std": 0.10990910232067108,
      "rewards/verify_math_reward/mean": 0.6975446343421936,
      "rewards/verify_math_reward/std": 0.45957788825035095,
      "step": 1543
    },
    {
      "clip_ratio/high_max": 0.001600129977305187,
      "clip_ratio/high_mean": 0.00046474543796648504,
      "clip_ratio/low_mean": 0.0003207011232007062,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007854465457057813,
      "completions/clipped_ratio": 0.1238839285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3104.0,
      "completions/mean_length": 967.3359985351562,
      "completions/mean_terminated_length": 524.9388427734375,
      "completions/min_length": 130.0,
      "completions/min_terminated_length": 130.0,
      "epoch": 14.429154518950437,
      "grad_norm": 0.3005709946155548,
      "learning_rate": 1e-06,
      "loss": -0.0492,
      "num_tokens": 838436319.0,
      "reward": 0.6629464626312256,
      "reward_std": 0.12456366419792175,
      "rewards/verify_math_reward/mean": 0.6629464030265808,
      "rewards/verify_math_reward/std": 0.47296738624572754,
      "step": 1544
    },
    {
      "clip_ratio/high_max": 0.0013656958890351234,
      "clip_ratio/high_mean": 0.0003816203889073222,
      "clip_ratio/low_mean": 0.00028702770123345545,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006686480865027988,
      "completions/clipped_ratio": 0.1361607142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3195.0,
      "completions/mean_length": 1053.65625,
      "completions/mean_terminated_length": 574.1137084960938,
      "completions/min_length": 148.0,
      "completions/min_terminated_length": 148.0,
      "epoch": 14.438483965014576,
      "grad_norm": 0.28173989057540894,
      "learning_rate": 1e-06,
      "loss": -0.0498,
      "num_tokens": 838969747.0,
      "reward": 0.652901828289032,
      "reward_std": 0.11419195681810379,
      "rewards/verify_math_reward/mean": 0.6529017686843872,
      "rewards/verify_math_reward/std": 0.47631320357322693,
      "step": 1545
    },
    {
      "clip_ratio/high_max": 0.0016461078048450872,
      "clip_ratio/high_mean": 0.0006027553740750591,
      "clip_ratio/low_mean": 0.0003256778613831557,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009284332354582148,
      "completions/clipped_ratio": 0.1428571428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2469.0,
      "completions/mean_length": 1075.6640625,
      "completions/mean_terminated_length": 572.2747802734375,
      "completions/min_length": 155.0,
      "completions/min_terminated_length": 155.0,
      "epoch": 14.447813411078718,
      "grad_norm": 0.2972122132778168,
      "learning_rate": 1e-06,
      "loss": -0.0756,
      "num_tokens": 839492134.0,
      "reward": 0.6160714626312256,
      "reward_std": 0.13106118142604828,
      "rewards/verify_math_reward/mean": 0.6160714030265808,
      "rewards/verify_math_reward/std": 0.486612468957901,
      "step": 1546
    },
    {
      "clip_ratio/high_max": 0.002694419275940163,
      "clip_ratio/high_mean": 0.0006896465120007633,
      "clip_ratio/low_mean": 0.0005058231317889295,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001195469649246661,
      "completions/clipped_ratio": 0.1316964285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3206.0,
      "completions/mean_length": 997.536865234375,
      "completions/mean_terminated_length": 527.5899658203125,
      "completions/min_length": 135.0,
      "completions/min_terminated_length": 135.0,
      "epoch": 14.457142857142857,
      "grad_norm": 0.37613818049430847,
      "learning_rate": 1e-06,
      "loss": -0.0639,
      "num_tokens": 839996671.0,
      "reward": 0.6261160969734192,
      "reward_std": 0.13403385877609253,
      "rewards/verify_math_reward/mean": 0.6261160969734192,
      "rewards/verify_math_reward/std": 0.48410359025001526,
      "step": 1547
    },
    {
      "clip_ratio/high_max": 0.0013512234763766173,
      "clip_ratio/high_mean": 0.00044560117385117337,
      "clip_ratio/low_mean": 0.00027892060859358025,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007245217875606613,
      "completions/clipped_ratio": 0.1484375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3773.0,
      "completions/mean_length": 1115.359375,
      "completions/mean_terminated_length": 595.7981567382812,
      "completions/min_length": 167.0,
      "completions/min_terminated_length": 167.0,
      "epoch": 14.466472303206997,
      "grad_norm": 0.20257292687892914,
      "learning_rate": 1e-06,
      "loss": -0.0375,
      "num_tokens": 840553721.0,
      "reward": 0.5915178656578064,
      "reward_std": 0.10051559656858444,
      "rewards/verify_math_reward/mean": 0.5915178656578064,
      "rewards/verify_math_reward/std": 0.49182769656181335,
      "step": 1548
    },
    {
      "clip_ratio/high_max": 0.0017015369103319244,
      "clip_ratio/high_mean": 0.0006325729314085038,
      "clip_ratio/low_mean": 0.0003980436658821418,
      "clip_ratio/low_min": 1.1647409337456338e-05,
      "clip_ratio/region_mean": 0.001030616597745393,
      "completions/clipped_ratio": 0.1149553571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4056.0,
      "completions/mean_length": 966.771240234375,
      "completions/mean_terminated_length": 560.3265991210938,
      "completions/min_length": 165.0,
      "completions/min_terminated_length": 165.0,
      "epoch": 14.475801749271136,
      "grad_norm": 0.30500295758247375,
      "learning_rate": 1e-06,
      "loss": -0.0822,
      "num_tokens": 841079340.0,
      "reward": 0.676339328289032,
      "reward_std": 0.12448951601982117,
      "rewards/verify_math_reward/mean": 0.6763392686843872,
      "rewards/verify_math_reward/std": 0.4681335687637329,
      "step": 1549
    },
    {
      "clip_ratio/high_max": 0.0016326721852237824,
      "clip_ratio/high_mean": 0.0006306517716438975,
      "clip_ratio/low_mean": 0.00029594478337457986,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009265965600206982,
      "completions/clipped_ratio": 0.1015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3088.0,
      "completions/mean_length": 899.1473388671875,
      "completions/mean_terminated_length": 537.7639770507812,
      "completions/min_length": 155.0,
      "completions/min_terminated_length": 155.0,
      "epoch": 14.485131195335278,
      "grad_norm": 0.2316146343946457,
      "learning_rate": 1e-06,
      "loss": -0.0439,
      "num_tokens": 841611320.0,
      "reward": 0.6584821939468384,
      "reward_std": 0.13136427104473114,
      "rewards/verify_math_reward/mean": 0.6584821343421936,
      "rewards/verify_math_reward/std": 0.4744836091995239,
      "step": 1550
    },
    {
      "clip_ratio/high_max": 0.0016231729277933482,
      "clip_ratio/high_mean": 0.0005429292850749334,
      "clip_ratio/low_mean": 0.0005203179716772866,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010632472622091882,
      "completions/clipped_ratio": 0.1004464285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3682.0,
      "completions/mean_length": 896.49560546875,
      "completions/mean_terminated_length": 539.2307739257812,
      "completions/min_length": 176.0,
      "completions/min_terminated_length": 176.0,
      "epoch": 14.494460641399417,
      "grad_norm": 0.3035215735435486,
      "learning_rate": 1e-06,
      "loss": -0.0307,
      "num_tokens": 842131084.0,
      "reward": 0.6819196939468384,
      "reward_std": 0.1396312266588211,
      "rewards/verify_math_reward/mean": 0.6819196343421936,
      "rewards/verify_math_reward/std": 0.46599099040031433,
      "step": 1551
    },
    {
      "clip_ratio/high_max": 0.0016773203897173516,
      "clip_ratio/high_mean": 0.0005758293227700051,
      "clip_ratio/low_mean": 0.0006926721243871725,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012685014662565663,
      "completions/clipped_ratio": 0.1339285714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3566.0,
      "completions/mean_length": 1011.7388916015625,
      "completions/mean_terminated_length": 534.7911987304688,
      "completions/min_length": 143.0,
      "completions/min_terminated_length": 143.0,
      "epoch": 14.503790087463557,
      "grad_norm": 0.700936496257782,
      "learning_rate": 1e-06,
      "loss": -0.0243,
      "num_tokens": 842640098.0,
      "reward": 0.6383928656578064,
      "reward_std": 0.11389067769050598,
      "rewards/verify_math_reward/mean": 0.6383928656578064,
      "rewards/verify_math_reward/std": 0.4807341992855072,
      "step": 1552
    },
    {
      "clip_ratio/high_max": 0.0020729823008878157,
      "clip_ratio/high_mean": 0.0008595442141086096,
      "clip_ratio/low_mean": 0.0005430965966297663,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0014026407916389871,
      "completions/clipped_ratio": 0.1082589285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3245.0,
      "completions/mean_length": 936.2991333007812,
      "completions/mean_terminated_length": 552.7058715820312,
      "completions/min_length": 142.0,
      "completions/min_terminated_length": 142.0,
      "epoch": 14.513119533527696,
      "grad_norm": 0.33733874559402466,
      "learning_rate": 1e-06,
      "loss": -0.0455,
      "num_tokens": 843170374.0,
      "reward": 0.6718750596046448,
      "reward_std": 0.16773615777492523,
      "rewards/verify_math_reward/mean": 0.671875,
      "rewards/verify_math_reward/std": 0.46979284286499023,
      "step": 1553
    },
    {
      "clip_ratio/high_max": 0.00203073719603708,
      "clip_ratio/high_mean": 0.0006710498910251772,
      "clip_ratio/low_mean": 0.000389734203054104,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010607840995362494,
      "completions/clipped_ratio": 0.1395089285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3944.0,
      "completions/mean_length": 1030.282470703125,
      "completions/mean_terminated_length": 533.2464599609375,
      "completions/min_length": 147.0,
      "completions/min_terminated_length": 147.0,
      "epoch": 14.522448979591836,
      "grad_norm": 0.2851661145687103,
      "learning_rate": 1e-06,
      "loss": -0.0968,
      "num_tokens": 843671971.0,
      "reward": 0.629464328289032,
      "reward_std": 0.15277667343616486,
      "rewards/verify_math_reward/mean": 0.6294642686843872,
      "rewards/verify_math_reward/std": 0.4832179844379425,
      "step": 1554
    },
    {
      "clip_ratio/high_max": 0.001452928758226335,
      "clip_ratio/high_mean": 0.00048307620136256446,
      "clip_ratio/low_mean": 0.00024061604199232534,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007236922465381213,
      "completions/clipped_ratio": 0.1216517857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3337.0,
      "completions/mean_length": 1010.6484985351562,
      "completions/mean_terminated_length": 583.3252563476562,
      "completions/min_length": 147.0,
      "completions/min_terminated_length": 147.0,
      "epoch": 14.531778425655977,
      "grad_norm": 0.21543121337890625,
      "learning_rate": 1e-06,
      "loss": -0.0196,
      "num_tokens": 844229576.0,
      "reward": 0.645089328289032,
      "reward_std": 0.10051559656858444,
      "rewards/verify_math_reward/mean": 0.6450892686843872,
      "rewards/verify_math_reward/std": 0.4787535071372986,
      "step": 1555
    },
    {
      "clip_ratio/high_max": 0.002144656846212456,
      "clip_ratio/high_mean": 0.0007379114322247915,
      "clip_ratio/low_mean": 0.0003275800419260122,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010654914731276222,
      "completions/clipped_ratio": 0.140625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4053.0,
      "completions/mean_length": 1059.665283203125,
      "completions/mean_terminated_length": 562.8103637695312,
      "completions/min_length": 155.0,
      "completions/min_terminated_length": 155.0,
      "epoch": 14.541107871720117,
      "grad_norm": 0.27838853001594543,
      "learning_rate": 1e-06,
      "loss": -0.0575,
      "num_tokens": 844758284.0,
      "reward": 0.6238839626312256,
      "reward_std": 0.13260169327259064,
      "rewards/verify_math_reward/mean": 0.6238839030265808,
      "rewards/verify_math_reward/std": 0.4846802353858948,
      "step": 1556
    },
    {
      "clip_ratio/high_max": 0.002209893886174541,
      "clip_ratio/high_mean": 0.0008417112112510949,
      "clip_ratio/low_mean": 0.00031508226584264776,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011567934780032374,
      "completions/clipped_ratio": 0.0859375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4035.0,
      "completions/mean_length": 844.6830444335938,
      "completions/mean_terminated_length": 539.003662109375,
      "completions/min_length": 157.0,
      "completions/min_terminated_length": 157.0,
      "epoch": 14.550437317784256,
      "grad_norm": 0.49434801936149597,
      "learning_rate": 1e-06,
      "loss": -0.0602,
      "num_tokens": 845286176.0,
      "reward": 0.7165178656578064,
      "reward_std": 0.16394072771072388,
      "rewards/verify_math_reward/mean": 0.7165178656578064,
      "rewards/verify_math_reward/std": 0.4509401023387909,
      "step": 1557
    },
    {
      "clip_ratio/high_max": 0.0014534040456055664,
      "clip_ratio/high_mean": 0.0005090693261990964,
      "clip_ratio/low_mean": 0.0002322719885796687,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007413413059111917,
      "completions/clipped_ratio": 0.09375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2905.0,
      "completions/mean_length": 832.8873291015625,
      "completions/mean_terminated_length": 495.3238830566406,
      "completions/min_length": 140.0,
      "completions/min_terminated_length": 140.0,
      "epoch": 14.559766763848396,
      "grad_norm": 0.28550097346305847,
      "learning_rate": 1e-06,
      "loss": -0.0169,
      "num_tokens": 845767787.0,
      "reward": 0.7332589626312256,
      "reward_std": 0.09345327317714691,
      "rewards/verify_math_reward/mean": 0.7332589030265808,
      "rewards/verify_math_reward/std": 0.4425029158592224,
      "step": 1558
    },
    {
      "clip_ratio/high_max": 0.0013655441525770584,
      "clip_ratio/high_mean": 0.0004264852286723908,
      "clip_ratio/low_mean": 0.00022893789764566463,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006554231513291597,
      "completions/clipped_ratio": 0.1026785714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2688.0,
      "completions/mean_length": 881.8594360351562,
      "completions/mean_terminated_length": 514.0721435546875,
      "completions/min_length": 157.0,
      "completions/min_terminated_length": 157.0,
      "epoch": 14.569096209912537,
      "grad_norm": 0.29426658153533936,
      "learning_rate": 1e-06,
      "loss": -0.0422,
      "num_tokens": 846271461.0,
      "reward": 0.6741071939468384,
      "reward_std": 0.09517784416675568,
      "rewards/verify_math_reward/mean": 0.6741071343421936,
      "rewards/verify_math_reward/std": 0.4689692556858063,
      "step": 1559
    },
    {
      "clip_ratio/high_max": 0.0016492469512741081,
      "clip_ratio/high_mean": 0.000665633537209942,
      "clip_ratio/low_mean": 0.0003009655340520112,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009665990692155901,
      "completions/clipped_ratio": 0.1607142857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3719.0,
      "completions/mean_length": 1140.915283203125,
      "completions/mean_terminated_length": 575.0478515625,
      "completions/min_length": 105.0,
      "completions/min_terminated_length": 105.0,
      "epoch": 14.578425655976677,
      "grad_norm": 0.34149521589279175,
      "learning_rate": 1e-06,
      "loss": -0.0457,
      "num_tokens": 846792065.0,
      "reward": 0.609375,
      "reward_std": 0.13809071481227875,
      "rewards/verify_math_reward/mean": 0.609375,
      "rewards/verify_math_reward/std": 0.48816296458244324,
      "step": 1560
    },
    {
      "clip_ratio/high_max": 0.0017487114273535553,
      "clip_ratio/high_mean": 0.0007000863224675413,
      "clip_ratio/low_mean": 0.00026969055079462123,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009697768582555,
      "completions/clipped_ratio": 0.1160714285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3973.0,
      "completions/mean_length": 926.3739013671875,
      "completions/mean_terminated_length": 510.16033935546875,
      "completions/min_length": 147.0,
      "completions/min_terminated_length": 147.0,
      "epoch": 14.587755102040816,
      "grad_norm": 0.27039656043052673,
      "learning_rate": 1e-06,
      "loss": -0.0628,
      "num_tokens": 847283432.0,
      "reward": 0.6897321939468384,
      "reward_std": 0.12497665733098984,
      "rewards/verify_math_reward/mean": 0.6897321343421936,
      "rewards/verify_math_reward/std": 0.4628615975379944,
      "step": 1561
    },
    {
      "clip_ratio/high_max": 0.0025974907548516057,
      "clip_ratio/high_mean": 0.0008866977495927131,
      "clip_ratio/low_mean": 0.0006239195990929147,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0015106173705135006,
      "completions/clipped_ratio": 0.0870535714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2733.0,
      "completions/mean_length": 821.2689819335938,
      "completions/mean_terminated_length": 509.0085754394531,
      "completions/min_length": 88.0,
      "completions/min_terminated_length": 88.0,
      "epoch": 14.597084548104956,
      "grad_norm": 1.9152460098266602,
      "learning_rate": 1e-06,
      "loss": -0.0306,
      "num_tokens": 847793777.0,
      "reward": 0.6897321939468384,
      "reward_std": 0.1448495090007782,
      "rewards/verify_math_reward/mean": 0.6897321343421936,
      "rewards/verify_math_reward/std": 0.462861567735672,
      "step": 1562
    },
    {
      "clip_ratio/high_max": 0.001565180609759409,
      "clip_ratio/high_mean": 0.0005695019563063397,
      "clip_ratio/low_mean": 0.0002960622764476284,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008655642213852843,
      "completions/clipped_ratio": 0.0982142857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3497.0,
      "completions/mean_length": 882.911865234375,
      "completions/mean_terminated_length": 532.9714965820312,
      "completions/min_length": 144.0,
      "completions/min_terminated_length": 144.0,
      "epoch": 14.606413994169095,
      "grad_norm": 0.19960281252861023,
      "learning_rate": 1e-06,
      "loss": -0.0578,
      "num_tokens": 848309738.0,
      "reward": 0.6986607313156128,
      "reward_std": 0.12181811034679413,
      "rewards/verify_math_reward/mean": 0.6986607313156128,
      "rewards/verify_math_reward/std": 0.4590960443019867,
      "step": 1563
    },
    {
      "clip_ratio/high_max": 0.002330915940547129,
      "clip_ratio/high_mean": 0.0007907459512352943,
      "clip_ratio/low_mean": 0.00045763651905872393,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012483825157687534,
      "completions/clipped_ratio": 0.125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2865.0,
      "completions/mean_length": 1017.6417846679688,
      "completions/mean_terminated_length": 577.8762817382812,
      "completions/min_length": 162.0,
      "completions/min_terminated_length": 162.0,
      "epoch": 14.615743440233237,
      "grad_norm": 0.40501147508621216,
      "learning_rate": 1e-06,
      "loss": -0.0476,
      "num_tokens": 848852249.0,
      "reward": 0.652901828289032,
      "reward_std": 0.13057473301887512,
      "rewards/verify_math_reward/mean": 0.6529017686843872,
      "rewards/verify_math_reward/std": 0.47631320357322693,
      "step": 1564
    },
    {
      "clip_ratio/high_max": 0.001787970308214426,
      "clip_ratio/high_mean": 0.0006398654822987737,
      "clip_ratio/low_mean": 0.0003360553191669169,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009759207823663019,
      "completions/clipped_ratio": 0.1160714285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3624.0,
      "completions/mean_length": 922.4810791015625,
      "completions/mean_terminated_length": 505.7563171386719,
      "completions/min_length": 131.0,
      "completions/min_terminated_length": 131.0,
      "epoch": 14.625072886297376,
      "grad_norm": 0.26958921551704407,
      "learning_rate": 1e-06,
      "loss": -0.0441,
      "num_tokens": 849341368.0,
      "reward": 0.684151828289032,
      "reward_std": 0.11881474405527115,
      "rewards/verify_math_reward/mean": 0.6841517686843872,
      "rewards/verify_math_reward/std": 0.4651124179363251,
      "step": 1565
    },
    {
      "clip_ratio/high_max": 0.0016191406866710167,
      "clip_ratio/high_mean": 0.0005101120368635748,
      "clip_ratio/low_mean": 0.00048234225255328056,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000992454315564828,
      "completions/clipped_ratio": 0.1082589285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2959.0,
      "completions/mean_length": 900.8560791015625,
      "completions/mean_terminated_length": 512.9599609375,
      "completions/min_length": 143.0,
      "completions/min_terminated_length": 143.0,
      "epoch": 14.634402332361516,
      "grad_norm": 0.2411060333251953,
      "learning_rate": 1e-06,
      "loss": -0.035,
      "num_tokens": 849836071.0,
      "reward": 0.6707589626312256,
      "reward_std": 0.11411890387535095,
      "rewards/verify_math_reward/mean": 0.6707589030265808,
      "rewards/verify_math_reward/std": 0.4702001214027405,
      "step": 1566
    },
    {
      "clip_ratio/high_max": 0.003277888612501556,
      "clip_ratio/high_mean": 0.0009978387661249144,
      "clip_ratio/low_mean": 0.0007292624004548998,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0017271011383854784,
      "completions/clipped_ratio": 0.0825892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1854.0,
      "completions/mean_length": 798.4933471679688,
      "completions/mean_terminated_length": 501.637451171875,
      "completions/min_length": 153.0,
      "completions/min_terminated_length": 153.0,
      "epoch": 14.643731778425655,
      "grad_norm": 0.955542266368866,
      "learning_rate": 1e-06,
      "loss": -0.0263,
      "num_tokens": 850344489.0,
      "reward": 0.7131696939468384,
      "reward_std": 0.1295209378004074,
      "rewards/verify_math_reward/mean": 0.7131696343421936,
      "rewards/verify_math_reward/std": 0.4525342583656311,
      "step": 1567
    },
    {
      "clip_ratio/high_max": 0.001533329370431602,
      "clip_ratio/high_mean": 0.0005405441042967141,
      "clip_ratio/low_mean": 0.00026572002070679446,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008062641172728036,
      "completions/clipped_ratio": 0.1060267857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3637.0,
      "completions/mean_length": 926.1060791015625,
      "completions/mean_terminated_length": 550.1510620117188,
      "completions/min_length": 132.0,
      "completions/min_terminated_length": 132.0,
      "epoch": 14.653061224489797,
      "grad_norm": 0.22720454633235931,
      "learning_rate": 1e-06,
      "loss": -0.0323,
      "num_tokens": 850867768.0,
      "reward": 0.6863839626312256,
      "reward_std": 0.10772737860679626,
      "rewards/verify_math_reward/mean": 0.6863839030265808,
      "rewards/verify_math_reward/std": 0.46422141790390015,
      "step": 1568
    },
    {
      "clip_ratio/high_max": 0.001610024191904813,
      "clip_ratio/high_mean": 0.0005536392818612512,
      "clip_ratio/low_mean": 0.0003219873933630879,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008756266615819186,
      "completions/clipped_ratio": 0.1316964285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4084.0,
      "completions/mean_length": 1040.7623291015625,
      "completions/mean_terminated_length": 577.3714599609375,
      "completions/min_length": 139.0,
      "completions/min_terminated_length": 139.0,
      "epoch": 14.662390670553936,
      "grad_norm": 0.26372191309928894,
      "learning_rate": 1e-06,
      "loss": -0.0812,
      "num_tokens": 851417939.0,
      "reward": 0.6116071939468384,
      "reward_std": 0.1159176379442215,
      "rewards/verify_math_reward/mean": 0.6116071343421936,
      "rewards/verify_math_reward/std": 0.48765692114830017,
      "step": 1569
    },
    {
      "clip_ratio/high_max": 0.001548176569485804,
      "clip_ratio/high_mean": 0.0005403070363172446,
      "clip_ratio/low_mean": 0.00031034092103254807,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008506479543939349,
      "completions/clipped_ratio": 0.125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3289.0,
      "completions/mean_length": 969.5904541015625,
      "completions/mean_terminated_length": 522.96044921875,
      "completions/min_length": 138.0,
      "completions/min_terminated_length": 138.0,
      "epoch": 14.671720116618076,
      "grad_norm": 0.5310737490653992,
      "learning_rate": 1e-06,
      "loss": -0.0419,
      "num_tokens": 851915020.0,
      "reward": 0.6651785969734192,
      "reward_std": 0.09281206876039505,
      "rewards/verify_math_reward/mean": 0.6651785969734192,
      "rewards/verify_math_reward/std": 0.47219157218933105,
      "step": 1570
    },
    {
      "clip_ratio/high_max": 0.0023266314528882504,
      "clip_ratio/high_mean": 0.001036266794471885,
      "clip_ratio/low_mean": 0.0004133927404836868,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0014496595322270878,
      "completions/clipped_ratio": 0.1060267857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3749.0,
      "completions/mean_length": 917.7969360351562,
      "completions/mean_terminated_length": 540.8564453125,
      "completions/min_length": 137.0,
      "completions/min_terminated_length": 137.0,
      "epoch": 14.681049562682215,
      "grad_norm": 0.41868698596954346,
      "learning_rate": 1e-06,
      "loss": -0.0491,
      "num_tokens": 852440534.0,
      "reward": 0.660714328289032,
      "reward_std": 0.16889871656894684,
      "rewards/verify_math_reward/mean": 0.6607142686843872,
      "rewards/verify_math_reward/std": 0.4737313687801361,
      "step": 1571
    },
    {
      "clip_ratio/high_max": 0.0022973095401539467,
      "clip_ratio/high_mean": 0.0007713541972407256,
      "clip_ratio/low_mean": 0.0002951213218693738,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010664755009202054,
      "completions/clipped_ratio": 0.1361607142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3171.0,
      "completions/mean_length": 1020.1428833007812,
      "completions/mean_terminated_length": 535.31787109375,
      "completions/min_length": 128.0,
      "completions/min_terminated_length": 128.0,
      "epoch": 14.690379008746355,
      "grad_norm": 0.4241335988044739,
      "learning_rate": 1e-06,
      "loss": -0.0658,
      "num_tokens": 852952366.0,
      "reward": 0.637276828289032,
      "reward_std": 0.11937998235225677,
      "rewards/verify_math_reward/mean": 0.6372767686843872,
      "rewards/verify_math_reward/std": 0.481054425239563,
      "step": 1572
    },
    {
      "clip_ratio/high_max": 0.0019253582649980672,
      "clip_ratio/high_mean": 0.0007438572683895472,
      "clip_ratio/low_mean": 0.0004454248191905208,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011892820912180468,
      "completions/clipped_ratio": 0.1808035714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2997.0,
      "completions/mean_length": 1219.53125,
      "completions/mean_terminated_length": 584.6702880859375,
      "completions/min_length": 135.0,
      "completions/min_terminated_length": 135.0,
      "epoch": 14.699708454810496,
      "grad_norm": 0.2726913392543793,
      "learning_rate": 1e-06,
      "loss": -0.0803,
      "num_tokens": 853472754.0,
      "reward": 0.535714328289032,
      "reward_std": 0.15312324464321136,
      "rewards/verify_math_reward/mean": 0.5357142686843872,
      "rewards/verify_math_reward/std": 0.4990014135837555,
      "step": 1573
    },
    {
      "clip_ratio/high_max": 0.001717471230222145,
      "clip_ratio/high_mean": 0.0005929701642344298,
      "clip_ratio/low_mean": 0.0002548115628542291,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008477817191305803,
      "completions/clipped_ratio": 0.1049107142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2517.0,
      "completions/mean_length": 876.1663208007812,
      "completions/mean_terminated_length": 498.7793273925781,
      "completions/min_length": 111.0,
      "completions/min_terminated_length": 111.0,
      "epoch": 14.709037900874636,
      "grad_norm": 0.2161475121974945,
      "learning_rate": 1e-06,
      "loss": -0.0499,
      "num_tokens": 853960511.0,
      "reward": 0.6908482313156128,
      "reward_std": 0.10092676430940628,
      "rewards/verify_math_reward/mean": 0.6908482313156128,
      "rewards/verify_math_reward/std": 0.46240198612213135,
      "step": 1574
    },
    {
      "clip_ratio/high_max": 0.0019312379954499193,
      "clip_ratio/high_mean": 0.0008385988039663061,
      "clip_ratio/low_mean": 0.0004536276237558923,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012922264049848309,
      "completions/clipped_ratio": 0.1004464285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2167.0,
      "completions/mean_length": 878.0881958007812,
      "completions/mean_terminated_length": 518.7680053710938,
      "completions/min_length": 127.0,
      "completions/min_terminated_length": 127.0,
      "epoch": 14.718367346938775,
      "grad_norm": 0.28904110193252563,
      "learning_rate": 1e-06,
      "loss": -0.045,
      "num_tokens": 854460214.0,
      "reward": 0.684151828289032,
      "reward_std": 0.16522133350372314,
      "rewards/verify_math_reward/mean": 0.6841517686843872,
      "rewards/verify_math_reward/std": 0.4651124179363251,
      "step": 1575
    },
    {
      "clip_ratio/high_max": 0.002119021868566051,
      "clip_ratio/high_mean": 0.0007490015395887895,
      "clip_ratio/low_mean": 0.00047770162291271845,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001226703174324939,
      "completions/clipped_ratio": 0.1116071428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3699.0,
      "completions/mean_length": 959.755615234375,
      "completions/mean_terminated_length": 565.7550048828125,
      "completions/min_length": 79.0,
      "completions/min_terminated_length": 79.0,
      "epoch": 14.727696793002915,
      "grad_norm": 0.5477799773216248,
      "learning_rate": 1e-06,
      "loss": -0.0265,
      "num_tokens": 855017243.0,
      "reward": 0.6183035969734192,
      "reward_std": 0.13463158905506134,
      "rewards/verify_math_reward/mean": 0.6183035969734192,
      "rewards/verify_math_reward/std": 0.4860740303993225,
      "step": 1576
    },
    {
      "clip_ratio/high_max": 0.0015941970887070056,
      "clip_ratio/high_mean": 0.0005644643433697638,
      "clip_ratio/low_mean": 0.00034719701125141,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009116613437072374,
      "completions/clipped_ratio": 0.1194196428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3709.0,
      "completions/mean_length": 939.9699096679688,
      "completions/mean_terminated_length": 511.9657897949219,
      "completions/min_length": 133.0,
      "completions/min_terminated_length": 133.0,
      "epoch": 14.737026239067056,
      "grad_norm": 0.2434639036655426,
      "learning_rate": 1e-06,
      "loss": -0.1015,
      "num_tokens": 855520512.0,
      "reward": 0.6875000596046448,
      "reward_std": 0.11666832119226456,
      "rewards/verify_math_reward/mean": 0.6875,
      "rewards/verify_math_reward/std": 0.4637712836265564,
      "step": 1577
    },
    {
      "clip_ratio/high_max": 0.0019176306414010469,
      "clip_ratio/high_mean": 0.0006858818605905981,
      "clip_ratio/low_mean": 0.00029949311669952294,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009853749816102209,
      "completions/clipped_ratio": 0.0825892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2435.0,
      "completions/mean_length": 805.6864013671875,
      "completions/mean_terminated_length": 509.47808837890625,
      "completions/min_length": 153.0,
      "completions/min_terminated_length": 153.0,
      "epoch": 14.746355685131196,
      "grad_norm": 0.2507024109363556,
      "learning_rate": 1e-06,
      "loss": -0.0487,
      "num_tokens": 856019855.0,
      "reward": 0.7276785969734192,
      "reward_std": 0.14391450583934784,
      "rewards/verify_math_reward/mean": 0.7276785969734192,
      "rewards/verify_math_reward/std": 0.4454030692577362,
      "step": 1578
    },
    {
      "clip_ratio/high_max": 0.001521907644928433,
      "clip_ratio/high_mean": 0.0005420800125648384,
      "clip_ratio/low_mean": 0.0003341919791637338,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000876272022651392,
      "completions/clipped_ratio": 0.09375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1684.0,
      "completions/mean_length": 843.622802734375,
      "completions/mean_terminated_length": 507.1699523925781,
      "completions/min_length": 155.0,
      "completions/min_terminated_length": 155.0,
      "epoch": 14.755685131195335,
      "grad_norm": 0.23318514227867126,
      "learning_rate": 1e-06,
      "loss": -0.0467,
      "num_tokens": 856526765.0,
      "reward": 0.699776828289032,
      "reward_std": 0.11814073473215103,
      "rewards/verify_math_reward/mean": 0.6997767686843872,
      "rewards/verify_math_reward/std": 0.4586109220981598,
      "step": 1579
    },
    {
      "clip_ratio/high_max": 0.0023186022008303553,
      "clip_ratio/high_mean": 0.0009736781994433841,
      "clip_ratio/low_mean": 0.0004227125818943023,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001396390800437075,
      "completions/clipped_ratio": 0.1216517857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3698.0,
      "completions/mean_length": 962.54248046875,
      "completions/mean_terminated_length": 528.5565185546875,
      "completions/min_length": 165.0,
      "completions/min_terminated_length": 165.0,
      "epoch": 14.765014577259475,
      "grad_norm": 0.3683277368545532,
      "learning_rate": 1e-06,
      "loss": -0.0837,
      "num_tokens": 857033531.0,
      "reward": 0.7020089626312256,
      "reward_std": 0.1603735387325287,
      "rewards/verify_math_reward/mean": 0.7020089030265808,
      "rewards/verify_math_reward/std": 0.45763099193573,
      "step": 1580
    },
    {
      "clip_ratio/high_max": 0.0021947820932837203,
      "clip_ratio/high_mean": 0.0007659841521672206,
      "clip_ratio/low_mean": 0.00047243521657946985,
      "clip_ratio/low_min": 2.3088288799044676e-05,
      "clip_ratio/region_mean": 0.001238419357832754,
      "completions/clipped_ratio": 0.1171875,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3981.0,
      "completions/mean_length": 998.21435546875,
      "completions/mean_terminated_length": 587.0037841796875,
      "completions/min_length": 124.0,
      "completions/min_terminated_length": 124.0,
      "epoch": 14.774344023323614,
      "grad_norm": 0.26309913396835327,
      "learning_rate": 1e-06,
      "loss": -0.0339,
      "num_tokens": 857599387.0,
      "reward": 0.59375,
      "reward_std": 0.14484810829162598,
      "rewards/verify_math_reward/mean": 0.59375,
      "rewards/verify_math_reward/std": 0.4914066195487976,
      "step": 1581
    },
    {
      "clip_ratio/high_max": 0.0015159290342126042,
      "clip_ratio/high_mean": 0.0004449128691703663,
      "clip_ratio/low_mean": 0.00022011106489117083,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006650239320151741,
      "completions/clipped_ratio": 0.109375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2786.0,
      "completions/mean_length": 924.404052734375,
      "completions/mean_terminated_length": 534.9097900390625,
      "completions/min_length": 129.0,
      "completions/min_terminated_length": 129.0,
      "epoch": 14.783673469387756,
      "grad_norm": 0.22830374538898468,
      "learning_rate": 1e-06,
      "loss": -0.0226,
      "num_tokens": 858118613.0,
      "reward": 0.6104910969734192,
      "reward_std": 0.10585013777017593,
      "rewards/verify_math_reward/mean": 0.6104910969734192,
      "rewards/verify_math_reward/std": 0.48791128396987915,
      "step": 1582
    },
    {
      "clip_ratio/high_max": 0.0013959056595922448,
      "clip_ratio/high_mean": 0.0004791517267221934,
      "clip_ratio/low_mean": 0.00022531271088155336,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007044644298730418,
      "completions/clipped_ratio": 0.1216517857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1790.0,
      "completions/mean_length": 937.1038208007812,
      "completions/mean_terminated_length": 499.5946350097656,
      "completions/min_length": 174.0,
      "completions/min_terminated_length": 174.0,
      "epoch": 14.793002915451895,
      "grad_norm": 0.26100870966911316,
      "learning_rate": 1e-06,
      "loss": -0.036,
      "num_tokens": 858596634.0,
      "reward": 0.7198660969734192,
      "reward_std": 0.09923359751701355,
      "rewards/verify_math_reward/mean": 0.7198660969734192,
      "rewards/verify_math_reward/std": 0.44931527972221375,
      "step": 1583
    },
    {
      "clip_ratio/high_max": 0.002612857635540422,
      "clip_ratio/high_mean": 0.0008548300629627192,
      "clip_ratio/low_mean": 0.00039178794963845576,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0012466180451156106,
      "completions/clipped_ratio": 0.1517857142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3931.0,
      "completions/mean_length": 1072.2757568359375,
      "completions/mean_terminated_length": 531.1881713867188,
      "completions/min_length": 164.0,
      "completions/min_terminated_length": 164.0,
      "epoch": 14.802332361516035,
      "grad_norm": 0.30906954407691956,
      "learning_rate": 1e-06,
      "loss": -0.086,
      "num_tokens": 859086321.0,
      "reward": 0.621651828289032,
      "reward_std": 0.14759187400341034,
      "rewards/verify_math_reward/mean": 0.6216517686843872,
      "rewards/verify_math_reward/std": 0.485245943069458,
      "step": 1584
    },
    {
      "clip_ratio/high_max": 0.002269463788252324,
      "clip_ratio/high_mean": 0.0009261393788619898,
      "clip_ratio/low_mean": 0.0004527127066467074,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0013788520627713297,
      "completions/clipped_ratio": 0.1707589285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2527.0,
      "completions/mean_length": 1170.0770263671875,
      "completions/mean_terminated_length": 567.5652465820312,
      "completions/min_length": 120.0,
      "completions/min_terminated_length": 120.0,
      "epoch": 14.811661807580174,
      "grad_norm": 0.2833002209663391,
      "learning_rate": 1e-06,
      "loss": -0.1193,
      "num_tokens": 859604014.0,
      "reward": 0.6127232313156128,
      "reward_std": 0.16645805537700653,
      "rewards/verify_math_reward/mean": 0.6127232313156128,
      "rewards/verify_math_reward/std": 0.4873998463153839,
      "step": 1585
    },
    {
      "clip_ratio/high_max": 0.001749017865222413,
      "clip_ratio/high_mean": 0.0006315624268609099,
      "clip_ratio/low_mean": 0.00038260512474153074,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001014167552057188,
      "completions/clipped_ratio": 0.125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3774.0,
      "completions/mean_length": 987.0491333007812,
      "completions/mean_terminated_length": 542.9132690429688,
      "completions/min_length": 145.0,
      "completions/min_terminated_length": 145.0,
      "epoch": 14.820991253644316,
      "grad_norm": 0.7678061723709106,
      "learning_rate": 1e-06,
      "loss": -0.0335,
      "num_tokens": 860122314.0,
      "reward": 0.684151828289032,
      "reward_std": 0.12050653249025345,
      "rewards/verify_math_reward/mean": 0.6841517686843872,
      "rewards/verify_math_reward/std": 0.4651124179363251,
      "step": 1586
    },
    {
      "clip_ratio/high_max": 0.0014779198972973973,
      "clip_ratio/high_mean": 0.0005677122862834949,
      "clip_ratio/low_mean": 0.00037268871847118135,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.00094040101612336,
      "completions/clipped_ratio": 0.1238839285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3414.0,
      "completions/mean_length": 1022.0201416015625,
      "completions/mean_terminated_length": 587.3554077148438,
      "completions/min_length": 129.0,
      "completions/min_terminated_length": 129.0,
      "epoch": 14.830320699708455,
      "grad_norm": 0.3296527862548828,
      "learning_rate": 1e-06,
      "loss": -0.0528,
      "num_tokens": 860668564.0,
      "reward": 0.6037946939468384,
      "reward_std": 0.15135657787322998,
      "rewards/verify_math_reward/mean": 0.6037946343421936,
      "rewards/verify_math_reward/std": 0.48938122391700745,
      "step": 1587
    },
    {
      "clip_ratio/high_max": 0.002380060795985628,
      "clip_ratio/high_mean": 0.0007494002784369513,
      "clip_ratio/low_mean": 0.0005925272116655833,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0013419274910120293,
      "completions/clipped_ratio": 0.1060267857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3152.0,
      "completions/mean_length": 887.763427734375,
      "completions/mean_terminated_length": 507.26092529296875,
      "completions/min_length": 145.0,
      "completions/min_terminated_length": 145.0,
      "epoch": 14.839650145772595,
      "grad_norm": 0.7693892121315002,
      "learning_rate": 1e-06,
      "loss": -0.045,
      "num_tokens": 861167424.0,
      "reward": 0.7053571939468384,
      "reward_std": 0.13398967683315277,
      "rewards/verify_math_reward/mean": 0.7053571343421936,
      "rewards/verify_math_reward/std": 0.45613667368888855,
      "step": 1588
    },
    {
      "clip_ratio/high_max": 0.0017301673724432476,
      "clip_ratio/high_mean": 0.0006455211268985295,
      "clip_ratio/low_mean": 0.00035498673514666734,
      "clip_ratio/low_min": 1.2913223145005759e-05,
      "clip_ratio/region_mean": 0.001000507858407218,
      "completions/clipped_ratio": 0.1417410714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3771.0,
      "completions/mean_length": 1087.9129638671875,
      "completions/mean_terminated_length": 591.1287231445312,
      "completions/min_length": 132.0,
      "completions/min_terminated_length": 132.0,
      "epoch": 14.848979591836734,
      "grad_norm": 0.26049551367759705,
      "learning_rate": 1e-06,
      "loss": -0.0553,
      "num_tokens": 861710810.0,
      "reward": 0.590401828289032,
      "reward_std": 0.13940368592739105,
      "rewards/verify_math_reward/mean": 0.5904017686843872,
      "rewards/verify_math_reward/std": 0.49203425645828247,
      "step": 1589
    },
    {
      "clip_ratio/high_max": 0.0015018580361356726,
      "clip_ratio/high_mean": 0.00042192500177407055,
      "clip_ratio/low_mean": 0.0002517023790460371,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006736273935530335,
      "completions/clipped_ratio": 0.1328125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2102.0,
      "completions/mean_length": 1017.771240234375,
      "completions/mean_terminated_length": 546.3307495117188,
      "completions/min_length": 133.0,
      "completions/min_terminated_length": 133.0,
      "epoch": 14.858309037900874,
      "grad_norm": 0.19188618659973145,
      "learning_rate": 1e-06,
      "loss": -0.0583,
      "num_tokens": 862228093.0,
      "reward": 0.6127232313156128,
      "reward_std": 0.1025739535689354,
      "rewards/verify_math_reward/mean": 0.6127232313156128,
      "rewards/verify_math_reward/std": 0.4873998463153839,
      "step": 1590
    },
    {
      "clip_ratio/high_max": 0.0018410165939712897,
      "clip_ratio/high_mean": 0.0006286225598159945,
      "clip_ratio/low_mean": 0.0005019946329412051,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0011306172164040618,
      "completions/clipped_ratio": 0.1238839285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2177.0,
      "completions/mean_length": 949.9855346679688,
      "completions/mean_terminated_length": 505.1350402832031,
      "completions/min_length": 150.0,
      "completions/min_terminated_length": 150.0,
      "epoch": 14.867638483965015,
      "grad_norm": 0.22820855677127838,
      "learning_rate": 1e-06,
      "loss": -0.0593,
      "num_tokens": 862712840.0,
      "reward": 0.6674107313156128,
      "reward_std": 0.1474849283695221,
      "rewards/verify_math_reward/mean": 0.6674107313156128,
      "rewards/verify_math_reward/std": 0.47140392661094666,
      "step": 1591
    },
    {
      "clip_ratio/high_max": 0.002091548369207885,
      "clip_ratio/high_mean": 0.0008278338173113298,
      "clip_ratio/low_mean": 0.0003703655561366759,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001198199337522965,
      "completions/clipped_ratio": 0.09375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3829.0,
      "completions/mean_length": 830.4263916015625,
      "completions/mean_terminated_length": 492.6083679199219,
      "completions/min_length": 131.0,
      "completions/min_terminated_length": 131.0,
      "epoch": 14.876967930029155,
      "grad_norm": 0.3046199083328247,
      "learning_rate": 1e-06,
      "loss": -0.068,
      "num_tokens": 863200670.0,
      "reward": 0.7366071939468384,
      "reward_std": 0.16104431450366974,
      "rewards/verify_math_reward/mean": 0.7366071343421936,
      "rewards/verify_math_reward/std": 0.44071969389915466,
      "step": 1592
    },
    {
      "clip_ratio/high_max": 0.0031508898828178644,
      "clip_ratio/high_mean": 0.0009709434707474429,
      "clip_ratio/low_mean": 0.00040443729994876776,
      "clip_ratio/low_min": 1.72986437974032e-05,
      "clip_ratio/region_mean": 0.0013753807579632849,
      "completions/clipped_ratio": 0.1417410714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3477.0,
      "completions/mean_length": 1046.149658203125,
      "completions/mean_terminated_length": 542.4681396484375,
      "completions/min_length": 147.0,
      "completions/min_terminated_length": 147.0,
      "epoch": 14.886297376093294,
      "grad_norm": 0.655681312084198,
      "learning_rate": 1e-06,
      "loss": -0.0493,
      "num_tokens": 863714060.0,
      "reward": 0.6462053656578064,
      "reward_std": 0.15871354937553406,
      "rewards/verify_math_reward/mean": 0.6462053656578064,
      "rewards/verify_math_reward/std": 0.478413462638855,
      "step": 1593
    },
    {
      "clip_ratio/high_max": 0.0019156091511831619,
      "clip_ratio/high_mean": 0.00046137945810187375,
      "clip_ratio/low_mean": 0.00014914218763806275,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006105216471041786,
      "completions/clipped_ratio": 0.1618303571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2171.0,
      "completions/mean_length": 1073.6395263671875,
      "completions/mean_terminated_length": 490.0945129394531,
      "completions/min_length": 103.0,
      "completions/min_terminated_length": 103.0,
      "epoch": 14.895626822157434,
      "grad_norm": 0.23443014919757843,
      "learning_rate": 1e-06,
      "loss": -0.0352,
      "num_tokens": 864174401.0,
      "reward": 0.6361607313156128,
      "reward_std": 0.06891624629497528,
      "rewards/verify_math_reward/mean": 0.6361607313156128,
      "rewards/verify_math_reward/std": 0.4813718795776367,
      "step": 1594
    },
    {
      "clip_ratio/high_max": 0.0020316538721090183,
      "clip_ratio/high_mean": 0.0007326751583605073,
      "clip_ratio/low_mean": 0.0002692601053695398,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010019352775998414,
      "completions/clipped_ratio": 0.1350446428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3215.0,
      "completions/mean_length": 1024.97216796875,
      "completions/mean_terminated_length": 545.4954833984375,
      "completions/min_length": 151.0,
      "completions/min_terminated_length": 151.0,
      "epoch": 14.904956268221575,
      "grad_norm": 0.319247305393219,
      "learning_rate": 1e-06,
      "loss": -0.0554,
      "num_tokens": 864687792.0,
      "reward": 0.6662946939468384,
      "reward_std": 0.12433795630931854,
      "rewards/verify_math_reward/mean": 0.6662946343421936,
      "rewards/verify_math_reward/std": 0.47179922461509705,
      "step": 1595
    },
    {
      "clip_ratio/high_max": 0.0016947853328019846,
      "clip_ratio/high_mean": 0.0006471200695159496,
      "clip_ratio/low_mean": 0.00041615926420490723,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010632793600962032,
      "completions/clipped_ratio": 0.1104910714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3002.0,
      "completions/mean_length": 967.8850708007812,
      "completions/mean_terminated_length": 579.32373046875,
      "completions/min_length": 154.0,
      "completions/min_terminated_length": 154.0,
      "epoch": 14.914285714285715,
      "grad_norm": 0.3197633624076843,
      "learning_rate": 1e-06,
      "loss": -0.0531,
      "num_tokens": 865235209.0,
      "reward": 0.625,
      "reward_std": 0.15398269891738892,
      "rewards/verify_math_reward/mean": 0.625,
      "rewards/verify_math_reward/std": 0.48439329862594604,
      "step": 1596
    },
    {
      "clip_ratio/high_max": 0.00191968068247661,
      "clip_ratio/high_mean": 0.0006238934201974189,
      "clip_ratio/low_mean": 0.00027419712159826304,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008980905367934611,
      "completions/clipped_ratio": 0.1004464285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2439.0,
      "completions/mean_length": 859.529052734375,
      "completions/mean_terminated_length": 498.136474609375,
      "completions/min_length": 153.0,
      "completions/min_terminated_length": 153.0,
      "epoch": 14.923615160349854,
      "grad_norm": 0.23354937136173248,
      "learning_rate": 1e-06,
      "loss": -0.0534,
      "num_tokens": 865718387.0,
      "reward": 0.7120535969734192,
      "reward_std": 0.11114512383937836,
      "rewards/verify_math_reward/mean": 0.7120535969734192,
      "rewards/verify_math_reward/std": 0.4530589282512665,
      "step": 1597
    },
    {
      "clip_ratio/high_max": 0.0017583142580406275,
      "clip_ratio/high_mean": 0.0007182293265941553,
      "clip_ratio/low_mean": 0.0003164928714340931,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010347222105338005,
      "completions/clipped_ratio": 0.1328125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2242.0,
      "completions/mean_length": 1035.08154296875,
      "completions/mean_terminated_length": 566.2921752929688,
      "completions/min_length": 133.0,
      "completions/min_terminated_length": 133.0,
      "epoch": 14.932944606413994,
      "grad_norm": 0.2821706533432007,
      "learning_rate": 1e-06,
      "loss": -0.061,
      "num_tokens": 866242380.0,
      "reward": 0.6540178656578064,
      "reward_std": 0.12148292362689972,
      "rewards/verify_math_reward/mean": 0.6540178656578064,
      "rewards/verify_math_reward/std": 0.4759531021118164,
      "step": 1598
    },
    {
      "clip_ratio/high_max": 0.0017521736481285188,
      "clip_ratio/high_mean": 0.0005596852179223788,
      "clip_ratio/low_mean": 0.00033856697450573847,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.00089825220311468,
      "completions/clipped_ratio": 0.1618303571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3748.0,
      "completions/mean_length": 1095.7020263671875,
      "completions/mean_terminated_length": 516.416748046875,
      "completions/min_length": 131.0,
      "completions/min_terminated_length": 131.0,
      "epoch": 14.942274052478133,
      "grad_norm": 1.2675962448120117,
      "learning_rate": 1e-06,
      "loss": -0.0356,
      "num_tokens": 866722681.0,
      "reward": 0.6339285969734192,
      "reward_std": 0.11592016369104385,
      "rewards/verify_math_reward/mean": 0.6339285969734192,
      "rewards/verify_math_reward/std": 0.48199835419654846,
      "step": 1599
    },
    {
      "clip_ratio/high_max": 0.0015007805759523762,
      "clip_ratio/high_mean": 0.0005371225406634039,
      "clip_ratio/low_mean": 0.00045821217690900085,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009953347180271521,
      "completions/clipped_ratio": 0.1506696428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3126.0,
      "completions/mean_length": 1096.5570068359375,
      "completions/mean_terminated_length": 564.4612426757812,
      "completions/min_length": 180.0,
      "completions/min_terminated_length": 180.0,
      "epoch": 14.951603498542275,
      "grad_norm": 0.36327141523361206,
      "learning_rate": 1e-06,
      "loss": -0.0548,
      "num_tokens": 867252636.0,
      "reward": 0.5613839626312256,
      "reward_std": 0.13583439588546753,
      "rewards/verify_math_reward/mean": 0.5613839030265808,
      "rewards/verify_math_reward/std": 0.496494859457016,
      "step": 1600
    },
    {
      "epoch": 14.951603498542275,
      "step": 1600,
      "total_flos": 0.0,
      "train_loss": 21.70611201519991,
      "train_runtime": 127568.9117,
      "train_samples_per_second": 11.238,
      "train_steps_per_second": 0.013
    }
  ],
  "logging_steps": 1,
  "max_steps": 1600,
  "num_input_tokens_seen": 867252636,
  "num_train_epochs": 15,
  "save_steps": 160,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 0.0,
  "train_batch_size": 8,
  "trial_name": null,
  "trial_params": null
}