{
  "best_global_step": null,
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 14.951603498542275,
  "eval_steps": 500,
  "global_step": 1600,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0200892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2283.0,
      "completions/mean_length": 607.34375,
      "completions/mean_terminated_length": 535.8223266601562,
      "completions/min_length": 22.0,
      "completions/min_terminated_length": 22.0,
      "epoch": 0.009329446064139942,
      "grad_norm": 0.18171170353889465,
      "learning_rate": 1e-06,
      "loss": -0.0102,
      "num_tokens": 556956.0,
      "reward": 0.5368303656578064,
      "reward_std": 0.27554163336753845,
      "rewards/verify_math_reward/mean": 0.5368303656578064,
      "rewards/verify_math_reward/std": 0.49892017245292664,
      "step": 1
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3530.0,
      "completions/mean_length": 591.0435791015625,
      "completions/mean_terminated_length": 539.441650390625,
      "completions/min_length": 35.0,
      "completions/min_terminated_length": 35.0,
      "epoch": 0.018658892128279883,
      "grad_norm": 0.14002105593681335,
      "learning_rate": 1e-06,
      "loss": 0.0057,
      "num_tokens": 1120539.0,
      "reward": 0.4587053656578064,
      "reward_std": 0.23826707899570465,
      "rewards/verify_math_reward/mean": 0.4587053656578064,
      "rewards/verify_math_reward/std": 0.49857014417648315,
      "step": 2
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2046.0,
      "completions/mean_length": 570.7991333007812,
      "completions/mean_terminated_length": 531.0112915039062,
      "completions/min_length": 67.0,
      "completions/min_terminated_length": 67.0,
      "epoch": 0.027988338192419825,
      "grad_norm": 0.14252738654613495,
      "learning_rate": 1e-06,
      "loss": 0.004,
      "num_tokens": 1689559.0,
      "reward": 0.504464328289032,
      "reward_std": 0.23642486333847046,
      "rewards/verify_math_reward/mean": 0.5044642686843872,
      "rewards/verify_math_reward/std": 0.5002593398094177,
      "step": 3
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1987.0,
      "completions/mean_length": 552.765625,
      "completions/mean_terminated_length": 520.8446044921875,
      "completions/min_length": 14.0,
      "completions/min_terminated_length": 14.0,
      "epoch": 0.037317784256559766,
      "grad_norm": 0.15027225017547607,
      "learning_rate": 1e-06,
      "loss": 0.0021,
      "num_tokens": 2231501.0,
      "reward": 0.5457589626312256,
      "reward_std": 0.2344599962234497,
      "rewards/verify_math_reward/mean": 0.5457589030265808,
      "rewards/verify_math_reward/std": 0.4981797933578491,
      "step": 4
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3696.0,
      "completions/mean_length": 605.4364013671875,
      "completions/mean_terminated_length": 550.0306396484375,
      "completions/min_length": 19.0,
      "completions/min_terminated_length": 19.0,
      "epoch": 0.04664723032069971,
      "grad_norm": 0.16336442530155182,
      "learning_rate": 1e-06,
      "loss": 0.0162,
      "num_tokens": 2800196.0,
      "reward": 0.4977678656578064,
      "reward_std": 0.25547540187835693,
      "rewards/verify_math_reward/mean": 0.4977678656578064,
      "rewards/verify_math_reward/std": 0.5002743005752563,
      "step": 5
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3311.0,
      "completions/mean_length": 582.796875,
      "completions/mean_terminated_length": 551.1464233398438,
      "completions/min_length": 105.0,
      "completions/min_terminated_length": 105.0,
      "epoch": 0.05597667638483965,
      "grad_norm": 0.14060573279857635,
      "learning_rate": 1e-06,
      "loss": 0.0086,
      "num_tokens": 3380038.0,
      "reward": 0.5145089626312256,
      "reward_std": 0.245405375957489,
      "rewards/verify_math_reward/mean": 0.5145089030265808,
      "rewards/verify_math_reward/std": 0.5000685453414917,
      "step": 6
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3680.0,
      "completions/mean_length": 562.1138916015625,
      "completions/mean_terminated_length": 526.2570190429688,
      "completions/min_length": 6.0,
      "completions/min_terminated_length": 6.0,
      "epoch": 0.0653061224489796,
      "grad_norm": 0.15395274758338928,
      "learning_rate": 1e-06,
      "loss": 0.0007,
      "num_tokens": 3945284.0,
      "reward": 0.535714328289032,
      "reward_std": 0.23634566366672516,
      "rewards/verify_math_reward/mean": 0.5357142686843872,
      "rewards/verify_math_reward/std": 0.4990014135837555,
      "step": 7
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3486.0,
      "completions/mean_length": 633.5502319335938,
      "completions/mean_terminated_length": 578.5906982421875,
      "completions/min_length": 73.0,
      "completions/min_terminated_length": 73.0,
      "epoch": 0.07463556851311953,
      "grad_norm": 0.12867768108844757,
      "learning_rate": 1e-06,
      "loss": 0.0068,
      "num_tokens": 4529993.0,
      "reward": 0.5089285969734192,
      "reward_std": 0.23856060206890106,
      "rewards/verify_math_reward/mean": 0.5089285969734192,
      "rewards/verify_math_reward/std": 0.5001994967460632,
      "step": 8
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3523.0,
      "completions/mean_length": 651.9174194335938,
      "completions/mean_terminated_length": 573.2853393554688,
      "completions/min_length": 121.0,
      "completions/min_terminated_length": 121.0,
      "epoch": 0.08396501457725948,
      "grad_norm": 0.12435610592365265,
      "learning_rate": 1e-06,
      "loss": 0.0116,
      "num_tokens": 5121167.0,
      "reward": 0.5290178656578064,
      "reward_std": 0.2126762568950653,
      "rewards/verify_math_reward/mean": 0.5290178656578064,
      "rewards/verify_math_reward/std": 0.49943605065345764,
      "step": 9
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2807.0,
      "completions/mean_length": 589.6484375,
      "completions/mean_terminated_length": 550.0733642578125,
      "completions/min_length": 93.0,
      "completions/min_terminated_length": 93.0,
      "epoch": 0.09329446064139942,
      "grad_norm": 0.13147082924842834,
      "learning_rate": 1e-06,
      "loss": -0.0089,
      "num_tokens": 5703772.0,
      "reward": 0.5189732313156128,
      "reward_std": 0.21905823051929474,
      "rewards/verify_math_reward/mean": 0.5189732313156128,
      "rewards/verify_math_reward/std": 0.49991893768310547,
      "step": 10
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0078125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2577.0,
      "completions/mean_length": 601.9185791015625,
      "completions/mean_terminated_length": 574.4061279296875,
      "completions/min_length": 3.0,
      "completions/min_terminated_length": 3.0,
      "epoch": 0.10262390670553936,
      "grad_norm": 0.11295190453529358,
      "learning_rate": 1e-06,
      "loss": 0.0141,
      "num_tokens": 6298763.0,
      "reward": 0.4899553656578064,
      "reward_std": 0.1918206363916397,
      "rewards/verify_math_reward/mean": 0.4899553656578064,
      "rewards/verify_math_reward/std": 0.5001782774925232,
      "step": 11
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0200892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3135.0,
      "completions/mean_length": 600.5826416015625,
      "completions/mean_terminated_length": 528.9225463867188,
      "completions/min_length": 50.0,
      "completions/min_terminated_length": 50.0,
      "epoch": 0.1119533527696793,
      "grad_norm": 0.12936817109584808,
      "learning_rate": 1e-06,
      "loss": 0.0068,
      "num_tokens": 6865637.0,
      "reward": 0.5368303656578064,
      "reward_std": 0.20336057245731354,
      "rewards/verify_math_reward/mean": 0.5368303656578064,
      "rewards/verify_math_reward/std": 0.49892017245292664,
      "step": 12
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0078125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2718.0,
      "completions/mean_length": 603.872802734375,
      "completions/mean_terminated_length": 576.375732421875,
      "completions/min_length": 101.0,
      "completions/min_terminated_length": 101.0,
      "epoch": 0.12128279883381925,
      "grad_norm": 0.11916936188936234,
      "learning_rate": 1e-06,
      "loss": 0.014,
      "num_tokens": 7466027.0,
      "reward": 0.5736607313156128,
      "reward_std": 0.21582452952861786,
      "rewards/verify_math_reward/mean": 0.5736607313156128,
      "rewards/verify_math_reward/std": 0.4948205351829529,
      "step": 13
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2803.0,
      "completions/mean_length": 567.6171875,
      "completions/mean_terminated_length": 527.79345703125,
      "completions/min_length": 154.0,
      "completions/min_terminated_length": 154.0,
      "epoch": 0.1306122448979592,
      "grad_norm": 0.12818068265914917,
      "learning_rate": 1e-06,
      "loss": 0.0052,
      "num_tokens": 8029764.0,
      "reward": 0.5680803656578064,
      "reward_std": 0.22064122557640076,
      "rewards/verify_math_reward/mean": 0.5680803656578064,
      "rewards/verify_math_reward/std": 0.4956200420856476,
      "step": 14
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3235.0,
      "completions/mean_length": 637.6975708007812,
      "completions/mean_terminated_length": 570.8134155273438,
      "completions/min_length": 148.0,
      "completions/min_terminated_length": 148.0,
      "epoch": 0.13994169096209913,
      "grad_norm": 0.1150139793753624,
      "learning_rate": 1e-06,
      "loss": -0.0025,
      "num_tokens": 8624973.0,
      "reward": 0.5569196939468384,
      "reward_std": 0.1802103966474533,
      "rewards/verify_math_reward/mean": 0.5569196343421936,
      "rewards/verify_math_reward/std": 0.4970270097255707,
      "step": 15
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3444.0,
      "completions/mean_length": 630.1038208007812,
      "completions/mean_terminated_length": 563.0728149414062,
      "completions/min_length": 102.0,
      "completions/min_terminated_length": 102.0,
      "epoch": 0.14927113702623906,
      "grad_norm": 0.12789832055568695,
      "learning_rate": 1e-06,
      "loss": 0.0077,
      "num_tokens": 9211378.0,
      "reward": 0.5424107313156128,
      "reward_std": 0.21835143864154816,
      "rewards/verify_math_reward/mean": 0.5424107313156128,
      "rewards/verify_math_reward/std": 0.4984763562679291,
      "step": 16
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.005580357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3065.0,
      "completions/mean_length": 569.8939819335938,
      "completions/mean_terminated_length": 550.1066284179688,
      "completions/min_length": 106.0,
      "completions/min_terminated_length": 106.0,
      "epoch": 0.158600583090379,
      "grad_norm": 0.13087671995162964,
      "learning_rate": 1e-06,
      "loss": 0.0131,
      "num_tokens": 9787147.0,
      "reward": 0.5647321939468384,
      "reward_std": 0.2232327163219452,
      "rewards/verify_math_reward/mean": 0.5647321343421936,
      "rewards/verify_math_reward/std": 0.49606895446777344,
      "step": 17
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.024553571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3330.0,
      "completions/mean_length": 656.6607666015625,
      "completions/mean_terminated_length": 570.0869140625,
      "completions/min_length": 82.0,
      "completions/min_terminated_length": 82.0,
      "epoch": 0.16793002915451896,
      "grad_norm": 0.12154269218444824,
      "learning_rate": 1e-06,
      "loss": 0.0173,
      "num_tokens": 10366635.0,
      "reward": 0.5267857313156128,
      "reward_std": 0.22320063412189484,
      "rewards/verify_math_reward/mean": 0.5267857313156128,
      "rewards/verify_math_reward/std": 0.4995608627796173,
      "step": 18
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0200892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3149.0,
      "completions/mean_length": 601.3092041015625,
      "completions/mean_terminated_length": 529.6640014648438,
      "completions/min_length": 16.0,
      "completions/min_terminated_length": 16.0,
      "epoch": 0.1772594752186589,
      "grad_norm": 0.13360120356082916,
      "learning_rate": 1e-06,
      "loss": -0.0023,
      "num_tokens": 10926136.0,
      "reward": 0.5892857313156128,
      "reward_std": 0.25400978326797485,
      "rewards/verify_math_reward/mean": 0.5892857313156128,
      "rewards/verify_math_reward/std": 0.49223825335502625,
      "step": 19
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.005580357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3689.0,
      "completions/mean_length": 558.7589721679688,
      "completions/mean_terminated_length": 538.9091186523438,
      "completions/min_length": 11.0,
      "completions/min_terminated_length": 11.0,
      "epoch": 0.18658892128279883,
      "grad_norm": 0.12295672297477722,
      "learning_rate": 1e-06,
      "loss": 0.007,
      "num_tokens": 11497472.0,
      "reward": 0.6350446939468384,
      "reward_std": 0.21098628640174866,
      "rewards/verify_math_reward/mean": 0.6350446343421936,
      "rewards/verify_math_reward/std": 0.481686532497406,
      "step": 20
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3967.0,
      "completions/mean_length": 580.6529541015625,
      "completions/mean_terminated_length": 528.8980712890625,
      "completions/min_length": 71.0,
      "completions/min_terminated_length": 71.0,
      "epoch": 0.19591836734693877,
      "grad_norm": 0.13105490803718567,
      "learning_rate": 1e-06,
      "loss": 0.0106,
      "num_tokens": 12046681.0,
      "reward": 0.5814732313156128,
      "reward_std": 0.20046527683734894,
      "rewards/verify_math_reward/mean": 0.5814732313156128,
      "rewards/verify_math_reward/std": 0.4935930073261261,
      "step": 21
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3375.0,
      "completions/mean_length": 636.5267944335938,
      "completions/mean_terminated_length": 593.5277099609375,
      "completions/min_length": 46.0,
      "completions/min_terminated_length": 46.0,
      "epoch": 0.20524781341107873,
      "grad_norm": 0.12085764110088348,
      "learning_rate": 1e-06,
      "loss": 0.0143,
      "num_tokens": 12667385.0,
      "reward": 0.5245535969734192,
      "reward_std": 0.220902681350708,
      "rewards/verify_math_reward/mean": 0.5245535969734192,
      "rewards/verify_math_reward/std": 0.4996756613254547,
      "step": 22
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3876.0,
      "completions/mean_length": 608.3582763671875,
      "completions/mean_terminated_length": 552.9989013671875,
      "completions/min_length": 125.0,
      "completions/min_terminated_length": 125.0,
      "epoch": 0.21457725947521866,
      "grad_norm": 0.1213921383023262,
      "learning_rate": 1e-06,
      "loss": -0.0045,
      "num_tokens": 13239466.0,
      "reward": 0.6328125,
      "reward_std": 0.18021151423454285,
      "rewards/verify_math_reward/mean": 0.6328125,
      "rewards/verify_math_reward/std": 0.48230743408203125,
      "step": 23
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2871.0,
      "completions/mean_length": 634.1551513671875,
      "completions/mean_terminated_length": 579.2052001953125,
      "completions/min_length": 135.0,
      "completions/min_terminated_length": 135.0,
      "epoch": 0.2239067055393586,
      "grad_norm": 0.11899662017822266,
      "learning_rate": 1e-06,
      "loss": 0.0154,
      "num_tokens": 13834621.0,
      "reward": 0.5290178656578064,
      "reward_std": 0.2045544981956482,
      "rewards/verify_math_reward/mean": 0.5290178656578064,
      "rewards/verify_math_reward/std": 0.49943602085113525,
      "step": 24
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2828.0,
      "completions/mean_length": 634.7824096679688,
      "completions/mean_terminated_length": 587.7975463867188,
      "completions/min_length": 86.0,
      "completions/min_terminated_length": 86.0,
      "epoch": 0.23323615160349853,
      "grad_norm": 0.11786917597055435,
      "learning_rate": 1e-06,
      "loss": 0.0117,
      "num_tokens": 14439466.0,
      "reward": 0.5424107313156128,
      "reward_std": 0.19234946370124817,
      "rewards/verify_math_reward/mean": 0.5424107313156128,
      "rewards/verify_math_reward/std": 0.4984763562679291,
      "step": 25
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3850.0,
      "completions/mean_length": 616.411865234375,
      "completions/mean_terminated_length": 569.1776123046875,
      "completions/min_length": 119.0,
      "completions/min_terminated_length": 119.0,
      "epoch": 0.2425655976676385,
      "grad_norm": 0.12758877873420715,
      "learning_rate": 1e-06,
      "loss": 0.0069,
      "num_tokens": 15030091.0,
      "reward": 0.5111607313156128,
      "reward_std": 0.1912982016801834,
      "rewards/verify_math_reward/mean": 0.5111607313156128,
      "rewards/verify_math_reward/std": 0.5001546144485474,
      "step": 26
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1912.0,
      "completions/mean_length": 587.1652221679688,
      "completions/mean_terminated_length": 543.5525512695312,
      "completions/min_length": 173.0,
      "completions/min_terminated_length": 173.0,
      "epoch": 0.2518950437317784,
      "grad_norm": 0.13884863257408142,
      "learning_rate": 1e-06,
      "loss": 0.0073,
      "num_tokens": 15599255.0,
      "reward": 0.5636160969734192,
      "reward_std": 0.22218075394630432,
      "rewards/verify_math_reward/mean": 0.5636160969734192,
      "rewards/verify_math_reward/std": 0.49621346592903137,
      "step": 27
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3205.0,
      "completions/mean_length": 576.9386596679688,
      "completions/mean_terminated_length": 545.2353515625,
      "completions/min_length": 49.0,
      "completions/min_terminated_length": 49.0,
      "epoch": 0.2612244897959184,
      "grad_norm": 0.13604635000228882,
      "learning_rate": 1e-06,
      "loss": 0.0071,
      "num_tokens": 16173416.0,
      "reward": 0.5379464626312256,
      "reward_std": 0.2260870635509491,
      "rewards/verify_math_reward/mean": 0.5379464030265808,
      "rewards/verify_math_reward/std": 0.4988364577293396,
      "step": 28
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3413.0,
      "completions/mean_length": 643.078125,
      "completions/mean_terminated_length": 564.2442626953125,
      "completions/min_length": 133.0,
      "completions/min_terminated_length": 133.0,
      "epoch": 0.2705539358600583,
      "grad_norm": 0.12634027004241943,
      "learning_rate": 1e-06,
      "loss": 0.0017,
      "num_tokens": 16756678.0,
      "reward": 0.4910714626312256,
      "reward_std": 0.19989962875843048,
      "rewards/verify_math_reward/mean": 0.4910714328289032,
      "rewards/verify_math_reward/std": 0.5001994967460632,
      "step": 29
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3281.0,
      "completions/mean_length": 587.6339721679688,
      "completions/mean_terminated_length": 548.0361328125,
      "completions/min_length": 162.0,
      "completions/min_terminated_length": 162.0,
      "epoch": 0.27988338192419826,
      "grad_norm": 0.12438934296369553,
      "learning_rate": 1e-06,
      "loss": 0.025,
      "num_tokens": 17337566.0,
      "reward": 0.5412946939468384,
      "reward_std": 0.22138941287994385,
      "rewards/verify_math_reward/mean": 0.5412946343421936,
      "rewards/verify_math_reward/std": 0.49857014417648315,
      "step": 30
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2446.0,
      "completions/mean_length": 660.9810791015625,
      "completions/mean_terminated_length": 614.351806640625,
      "completions/min_length": 139.0,
      "completions/min_terminated_length": 139.0,
      "epoch": 0.2892128279883382,
      "grad_norm": 0.11914543807506561,
      "learning_rate": 1e-06,
      "loss": 0.0042,
      "num_tokens": 17965533.0,
      "reward": 0.5457589626312256,
      "reward_std": 0.21530599892139435,
      "rewards/verify_math_reward/mean": 0.5457589030265808,
      "rewards/verify_math_reward/std": 0.4981797933578491,
      "step": 31
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.006696428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3930.0,
      "completions/mean_length": 568.6819458007812,
      "completions/mean_terminated_length": 544.9022827148438,
      "completions/min_length": 5.0,
      "completions/min_terminated_length": 5.0,
      "epoch": 0.29854227405247813,
      "grad_norm": 0.13532161712646484,
      "learning_rate": 1e-06,
      "loss": 0.0072,
      "num_tokens": 18541336.0,
      "reward": 0.5736607313156128,
      "reward_std": 0.2328890562057495,
      "rewards/verify_math_reward/mean": 0.5736607313156128,
      "rewards/verify_math_reward/std": 0.4948205351829529,
      "step": 32
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2846.0,
      "completions/mean_length": 557.8928833007812,
      "completions/mean_terminated_length": 517.9593505859375,
      "completions/min_length": 144.0,
      "completions/min_terminated_length": 144.0,
      "epoch": 0.30787172011661806,
      "grad_norm": 0.1307281255722046,
      "learning_rate": 1e-06,
      "loss": 0.0152,
      "num_tokens": 19086784.0,
      "reward": 0.5334821939468384,
      "reward_std": 0.20485760271549225,
      "rewards/verify_math_reward/mean": 0.5334821343421936,
      "rewards/verify_math_reward/std": 0.49915632605552673,
      "step": 33
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4079.0,
      "completions/mean_length": 610.513427734375,
      "completions/mean_terminated_length": 571.173828125,
      "completions/min_length": 145.0,
      "completions/min_terminated_length": 145.0,
      "epoch": 0.317201166180758,
      "grad_norm": 0.13105987012386322,
      "learning_rate": 1e-06,
      "loss": 0.0043,
      "num_tokens": 19676236.0,
      "reward": 0.5390625,
      "reward_std": 0.2295122593641281,
      "rewards/verify_math_reward/mean": 0.5390625,
      "rewards/verify_math_reward/std": 0.4987502098083496,
      "step": 34
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3472.0,
      "completions/mean_length": 640.3928833007812,
      "completions/mean_terminated_length": 573.5608520507812,
      "completions/min_length": 158.0,
      "completions/min_terminated_length": 158.0,
      "epoch": 0.32653061224489793,
      "grad_norm": 0.10946747660636902,
      "learning_rate": 1e-06,
      "loss": -0.0098,
      "num_tokens": 20264052.0,
      "reward": 0.5714285969734192,
      "reward_std": 0.16145730018615723,
      "rewards/verify_math_reward/mean": 0.5714285969734192,
      "rewards/verify_math_reward/std": 0.49514803290367126,
      "step": 35
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2592.0,
      "completions/mean_length": 659.5335083007812,
      "completions/mean_terminated_length": 593.0716552734375,
      "completions/min_length": 182.0,
      "completions/min_terminated_length": 182.0,
      "epoch": 0.3358600583090379,
      "grad_norm": 0.1137261837720871,
      "learning_rate": 1e-06,
      "loss": 0.0114,
      "num_tokens": 20878186.0,
      "reward": 0.5625,
      "reward_std": 0.20282670855522156,
      "rewards/verify_math_reward/mean": 0.5625,
      "rewards/verify_math_reward/std": 0.49635544419288635,
      "step": 36
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3530.0,
      "completions/mean_length": 612.9152221679688,
      "completions/mean_terminated_length": 545.5517578125,
      "completions/min_length": 103.0,
      "completions/min_terminated_length": 103.0,
      "epoch": 0.34518950437317786,
      "grad_norm": 0.1323130577802658,
      "learning_rate": 1e-06,
      "loss": 0.0043,
      "num_tokens": 21434454.0,
      "reward": 0.559151828289032,
      "reward_std": 0.20256778597831726,
      "rewards/verify_math_reward/mean": 0.5591517686843872,
      "rewards/verify_math_reward/std": 0.496766060590744,
      "step": 37
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3922.0,
      "completions/mean_length": 625.1027221679688,
      "completions/mean_terminated_length": 566.0068359375,
      "completions/min_length": 115.0,
      "completions/min_terminated_length": 115.0,
      "epoch": 0.3545189504373178,
      "grad_norm": 0.12597277760505676,
      "learning_rate": 1e-06,
      "loss": 0.0033,
      "num_tokens": 22022442.0,
      "reward": 0.546875,
      "reward_std": 0.19915145635604858,
      "rewards/verify_math_reward/mean": 0.546875,
      "rewards/verify_math_reward/std": 0.4980759024620056,
      "step": 38
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2314.0,
      "completions/mean_length": 588.6495971679688,
      "completions/mean_terminated_length": 545.0553588867188,
      "completions/min_length": 128.0,
      "completions/min_terminated_length": 128.0,
      "epoch": 0.3638483965014577,
      "grad_norm": 0.13477082550525665,
      "learning_rate": 1e-06,
      "loss": 0.003,
      "num_tokens": 22600680.0,
      "reward": 0.5602678656578064,
      "reward_std": 0.21226690709590912,
      "rewards/verify_math_reward/mean": 0.5602678656578064,
      "rewards/verify_math_reward/std": 0.4966317117214203,
      "step": 39
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2219.0,
      "completions/mean_length": 589.9631958007812,
      "completions/mean_terminated_length": 554.388916015625,
      "completions/min_length": 128.0,
      "completions/min_terminated_length": 128.0,
      "epoch": 0.37317784256559766,
      "grad_norm": 0.12254554778337479,
      "learning_rate": 1e-06,
      "loss": 0.0026,
      "num_tokens": 23194775.0,
      "reward": 0.520089328289032,
      "reward_std": 0.19366033375263214,
      "rewards/verify_math_reward/mean": 0.5200892686843872,
      "rewards/verify_math_reward/std": 0.4998753070831299,
      "step": 40
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.025669642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2182.0,
      "completions/mean_length": 680.0234375,
      "completions/mean_terminated_length": 590.0263671875,
      "completions/min_length": 155.0,
      "completions/min_terminated_length": 155.0,
      "epoch": 0.3825072886297376,
      "grad_norm": 0.1197601780295372,
      "learning_rate": 1e-06,
      "loss": 0.0007,
      "num_tokens": 23811652.0,
      "reward": 0.4977678656578064,
      "reward_std": 0.20985764265060425,
      "rewards/verify_math_reward/mean": 0.4977678656578064,
      "rewards/verify_math_reward/std": 0.5002742409706116,
      "step": 41
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3504.0,
      "completions/mean_length": 616.5502319335938,
      "completions/mean_terminated_length": 553.2874755859375,
      "completions/min_length": 113.0,
      "completions/min_terminated_length": 113.0,
      "epoch": 0.39183673469387753,
      "grad_norm": 0.11636195331811905,
      "learning_rate": 1e-06,
      "loss": -0.0031,
      "num_tokens": 24389745.0,
      "reward": 0.512276828289032,
      "reward_std": 0.18678346276283264,
      "rewards/verify_math_reward/mean": 0.5122767686843872,
      "rewards/verify_math_reward/std": 0.500128448009491,
      "step": 42
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2795.0,
      "completions/mean_length": 610.9486694335938,
      "completions/mean_terminated_length": 555.63037109375,
      "completions/min_length": 132.0,
      "completions/min_terminated_length": 132.0,
      "epoch": 0.40116618075801747,
      "grad_norm": 0.1344902515411377,
      "learning_rate": 1e-06,
      "loss": 0.0045,
      "num_tokens": 24965947.0,
      "reward": 0.5613839626312256,
      "reward_std": 0.2333090901374817,
      "rewards/verify_math_reward/mean": 0.5613839030265808,
      "rewards/verify_math_reward/std": 0.496494859457016,
      "step": 43
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2116.0,
      "completions/mean_length": 544.6819458007812,
      "completions/mean_terminated_length": 508.6482238769531,
      "completions/min_length": 130.0,
      "completions/min_terminated_length": 130.0,
      "epoch": 0.41049562682215746,
      "grad_norm": 0.12748591601848602,
      "learning_rate": 1e-06,
      "loss": 0.0113,
      "num_tokens": 25503310.0,
      "reward": 0.6004464626312256,
      "reward_std": 0.20628975331783295,
      "rewards/verify_math_reward/mean": 0.6004464030265808,
      "rewards/verify_math_reward/std": 0.49008017778396606,
      "step": 44
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2274.0,
      "completions/mean_length": 618.0100708007812,
      "completions/mean_terminated_length": 570.7975463867188,
      "completions/min_length": 125.0,
      "completions/min_terminated_length": 125.0,
      "epoch": 0.4198250728862974,
      "grad_norm": 0.1190551146864891,
      "learning_rate": 1e-06,
      "loss": 0.0086,
      "num_tokens": 26101287.0,
      "reward": 0.5736607313156128,
      "reward_std": 0.20565037429332733,
      "rewards/verify_math_reward/mean": 0.5736607313156128,
      "rewards/verify_math_reward/std": 0.4948205351829529,
      "step": 45
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2312.0,
      "completions/mean_length": 644.5792846679688,
      "completions/mean_terminated_length": 609.5591430664062,
      "completions/min_length": 148.0,
      "completions/min_terminated_length": 148.0,
      "epoch": 0.4291545189504373,
      "grad_norm": 0.11842440068721771,
      "learning_rate": 1e-06,
      "loss": -0.0001,
      "num_tokens": 26730414.0,
      "reward": 0.5479910969734192,
      "reward_std": 0.23724929988384247,
      "rewards/verify_math_reward/mean": 0.5479910969734192,
      "rewards/verify_math_reward/std": 0.49796950817108154,
      "step": 46
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.024553571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3814.0,
      "completions/mean_length": 678.6328125,
      "completions/mean_terminated_length": 592.6121215820312,
      "completions/min_length": 114.0,
      "completions/min_terminated_length": 114.0,
      "epoch": 0.43848396501457726,
      "grad_norm": 0.12601913511753082,
      "learning_rate": 1e-06,
      "loss": 0.0027,
      "num_tokens": 27333253.0,
      "reward": 0.5334821939468384,
      "reward_std": 0.23067805171012878,
      "rewards/verify_math_reward/mean": 0.5334821343421936,
      "rewards/verify_math_reward/std": 0.49915632605552673,
      "step": 47
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3334.0,
      "completions/mean_length": 610.5089721679688,
      "completions/mean_terminated_length": 543.0989379882812,
      "completions/min_length": 131.0,
      "completions/min_terminated_length": 131.0,
      "epoch": 0.4478134110787172,
      "grad_norm": 0.12337189167737961,
      "learning_rate": 1e-06,
      "loss": -0.0083,
      "num_tokens": 27905285.0,
      "reward": 0.543526828289032,
      "reward_std": 0.16296179592609406,
      "rewards/verify_math_reward/mean": 0.5435267686843872,
      "rewards/verify_math_reward/std": 0.49838000535964966,
      "step": 48
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2660.0,
      "completions/mean_length": 612.9564819335938,
      "completions/mean_terminated_length": 561.67724609375,
      "completions/min_length": 178.0,
      "completions/min_terminated_length": 178.0,
      "epoch": 0.45714285714285713,
      "grad_norm": 0.11862468719482422,
      "learning_rate": 1e-06,
      "loss": 0.0058,
      "num_tokens": 28484262.0,
      "reward": 0.5111607313156128,
      "reward_std": 0.1951705813407898,
      "rewards/verify_math_reward/mean": 0.5111607313156128,
      "rewards/verify_math_reward/std": 0.5001546144485474,
      "step": 49
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.033482142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4030.0,
      "completions/mean_length": 694.6563110351562,
      "completions/mean_terminated_length": 576.8267822265625,
      "completions/min_length": 135.0,
      "completions/min_terminated_length": 135.0,
      "epoch": 0.46647230320699706,
      "grad_norm": 0.13872234523296356,
      "learning_rate": 1e-06,
      "loss": -0.0054,
      "num_tokens": 29086330.0,
      "reward": 0.527901828289032,
      "reward_std": 0.22931794822216034,
      "rewards/verify_math_reward/mean": 0.5279017686843872,
      "rewards/verify_math_reward/std": 0.49949970841407776,
      "step": 50
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.021205357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3270.0,
      "completions/mean_length": 643.7589721679688,
      "completions/mean_terminated_length": 568.9669189453125,
      "completions/min_length": 150.0,
      "completions/min_terminated_length": 150.0,
      "epoch": 0.47580174927113705,
      "grad_norm": 0.12871721386909485,
      "learning_rate": 1e-06,
      "loss": 0.0112,
      "num_tokens": 29666522.0,
      "reward": 0.598214328289032,
      "reward_std": 0.21057121455669403,
      "rewards/verify_math_reward/mean": 0.5982142686843872,
      "rewards/verify_math_reward/std": 0.49053287506103516,
      "step": 51
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3218.0,
      "completions/mean_length": 611.7120971679688,
      "completions/mean_terminated_length": 544.3253784179688,
      "completions/min_length": 137.0,
      "completions/min_terminated_length": 137.0,
      "epoch": 0.485131195335277,
      "grad_norm": 0.12275160849094391,
      "learning_rate": 1e-06,
      "loss": 0.0077,
      "num_tokens": 30224672.0,
      "reward": 0.621651828289032,
      "reward_std": 0.2090653032064438,
      "rewards/verify_math_reward/mean": 0.6216517686843872,
      "rewards/verify_math_reward/std": 0.4852459728717804,
      "step": 52
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3963.0,
      "completions/mean_length": 617.1171875,
      "completions/mean_terminated_length": 549.8350219726562,
      "completions/min_length": 135.0,
      "completions/min_terminated_length": 135.0,
      "epoch": 0.4944606413994169,
      "grad_norm": 0.12363526970148087,
      "learning_rate": 1e-06,
      "loss": -0.012,
      "num_tokens": 30793729.0,
      "reward": 0.5680803656578064,
      "reward_std": 0.20000769197940826,
      "rewards/verify_math_reward/mean": 0.5680803656578064,
      "rewards/verify_math_reward/std": 0.4956200420856476,
      "step": 53
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4037.0,
      "completions/mean_length": 544.0301513671875,
      "completions/mean_terminated_length": 512.0303955078125,
      "completions/min_length": 116.0,
      "completions/min_terminated_length": 116.0,
      "epoch": 0.5037900874635568,
      "grad_norm": 0.1251416653394699,
      "learning_rate": 1e-06,
      "loss": 0.0105,
      "num_tokens": 31338252.0,
      "reward": 0.6037946939468384,
      "reward_std": 0.19257515668869019,
      "rewards/verify_math_reward/mean": 0.6037946343421936,
      "rewards/verify_math_reward/std": 0.48938122391700745,
      "step": 54
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0078125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3845.0,
      "completions/mean_length": 544.0301513671875,
      "completions/mean_terminated_length": 516.0618896484375,
      "completions/min_length": 154.0,
      "completions/min_terminated_length": 154.0,
      "epoch": 0.5131195335276968,
      "grad_norm": 0.14017212390899658,
      "learning_rate": 1e-06,
      "loss": 0.0266,
      "num_tokens": 31891423.0,
      "reward": 0.5892857313156128,
      "reward_std": 0.21756118535995483,
      "rewards/verify_math_reward/mean": 0.5892857313156128,
      "rewards/verify_math_reward/std": 0.49223825335502625,
      "step": 55
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2310.0,
      "completions/mean_length": 613.091552734375,
      "completions/mean_terminated_length": 545.7315063476562,
      "completions/min_length": 99.0,
      "completions/min_terminated_length": 99.0,
      "epoch": 0.5224489795918368,
      "grad_norm": 0.1301651895046234,
      "learning_rate": 1e-06,
      "loss": 0.0095,
      "num_tokens": 32462529.0,
      "reward": 0.4966517984867096,
      "reward_std": 0.20038999617099762,
      "rewards/verify_math_reward/mean": 0.4966517984867096,
      "rewards/verify_math_reward/std": 0.5002680420875549,
      "step": 56
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0078125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3428.0,
      "completions/mean_length": 555.2567138671875,
      "completions/mean_terminated_length": 527.3768310546875,
      "completions/min_length": 127.0,
      "completions/min_terminated_length": 127.0,
      "epoch": 0.5317784256559767,
      "grad_norm": 0.13453331589698792,
      "learning_rate": 1e-06,
      "loss": 0.0261,
      "num_tokens": 33018119.0,
      "reward": 0.5625,
      "reward_std": 0.19399915635585785,
      "rewards/verify_math_reward/mean": 0.5625,
      "rewards/verify_math_reward/std": 0.49635544419288635,
      "step": 57
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3956.0,
      "completions/mean_length": 577.25,
      "completions/mean_terminated_length": 533.51416015625,
      "completions/min_length": 131.0,
      "completions/min_terminated_length": 131.0,
      "epoch": 0.5411078717201167,
      "grad_norm": 0.11944576352834702,
      "learning_rate": 1e-06,
      "loss": 0.0122,
      "num_tokens": 33580143.0,
      "reward": 0.566964328289032,
      "reward_std": 0.18291178345680237,
      "rewards/verify_math_reward/mean": 0.5669642686843872,
      "rewards/verify_math_reward/std": 0.49577224254608154,
      "step": 58
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3082.0,
      "completions/mean_length": 566.3058471679688,
      "completions/mean_terminated_length": 518.3914184570312,
      "completions/min_length": 132.0,
      "completions/min_terminated_length": 132.0,
      "epoch": 0.5504373177842565,
      "grad_norm": 0.1279263198375702,
      "learning_rate": 1e-06,
      "loss": 0.0063,
      "num_tokens": 34132545.0,
      "reward": 0.5881696939468384,
      "reward_std": 0.17480847239494324,
      "rewards/verify_math_reward/mean": 0.5881696343421936,
      "rewards/verify_math_reward/std": 0.4924396276473999,
      "step": 59
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3234.0,
      "completions/mean_length": 564.146240234375,
      "completions/mean_terminated_length": 532.3276977539062,
      "completions/min_length": 141.0,
      "completions/min_terminated_length": 141.0,
      "epoch": 0.5597667638483965,
      "grad_norm": 0.12249712646007538,
      "learning_rate": 1e-06,
      "loss": 0.0118,
      "num_tokens": 34690260.0,
      "reward": 0.598214328289032,
      "reward_std": 0.15969882905483246,
      "rewards/verify_math_reward/mean": 0.5982142686843872,
      "rewards/verify_math_reward/std": 0.49053290486335754,
      "step": 60
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0200892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2483.0,
      "completions/mean_length": 660.6473388671875,
      "completions/mean_terminated_length": 590.2186889648438,
      "completions/min_length": 144.0,
      "completions/min_terminated_length": 144.0,
      "epoch": 0.5690962099125364,
      "grad_norm": 0.11897089332342148,
      "learning_rate": 1e-06,
      "loss": 0.0007,
      "num_tokens": 35292496.0,
      "reward": 0.5323660969734192,
      "reward_std": 0.19846788048744202,
      "rewards/verify_math_reward/mean": 0.5323660969734192,
      "rewards/verify_math_reward/std": 0.4992299973964691,
      "step": 61
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.021205357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2778.0,
      "completions/mean_length": 656.9498291015625,
      "completions/mean_terminated_length": 582.4435424804688,
      "completions/min_length": 159.0,
      "completions/min_terminated_length": 159.0,
      "epoch": 0.5784256559766764,
      "grad_norm": 0.1298826038837433,
      "learning_rate": 1e-06,
      "loss": 0.0021,
      "num_tokens": 35887267.0,
      "reward": 0.5267857313156128,
      "reward_std": 0.23829957842826843,
      "rewards/verify_math_reward/mean": 0.5267857313156128,
      "rewards/verify_math_reward/std": 0.4995608329772949,
      "step": 62
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3655.0,
      "completions/mean_length": 590.6328125,
      "completions/mean_terminated_length": 547.0632934570312,
      "completions/min_length": 37.0,
      "completions/min_terminated_length": 37.0,
      "epoch": 0.5877551020408164,
      "grad_norm": 0.13611359894275665,
      "learning_rate": 1e-06,
      "loss": 0.0117,
      "num_tokens": 36450074.0,
      "reward": 0.5915178656578064,
      "reward_std": 0.1956934630870819,
      "rewards/verify_math_reward/mean": 0.5915178656578064,
      "rewards/verify_math_reward/std": 0.49182769656181335,
      "step": 63
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4028.0,
      "completions/mean_length": 636.396240234375,
      "completions/mean_terminated_length": 569.4868774414062,
      "completions/min_length": 114.0,
      "completions/min_terminated_length": 114.0,
      "epoch": 0.5970845481049563,
      "grad_norm": 0.13279348611831665,
      "learning_rate": 1e-06,
      "loss": 0.0047,
      "num_tokens": 37040421.0,
      "reward": 0.6272321939468384,
      "reward_std": 0.19043126702308655,
      "rewards/verify_math_reward/mean": 0.6272321343421936,
      "rewards/verify_math_reward/std": 0.4838111698627472,
      "step": 64
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3001.0,
      "completions/mean_length": 611.6652221679688,
      "completions/mean_terminated_length": 576.3111572265625,
      "completions/min_length": 147.0,
      "completions/min_terminated_length": 147.0,
      "epoch": 0.6064139941690962,
      "grad_norm": 0.12394732981920242,
      "learning_rate": 1e-06,
      "loss": 0.0235,
      "num_tokens": 37649289.0,
      "reward": 0.5334821939468384,
      "reward_std": 0.18341170251369476,
      "rewards/verify_math_reward/mean": 0.5334821343421936,
      "rewards/verify_math_reward/std": 0.49915632605552673,
      "step": 65
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3913.0,
      "completions/mean_length": 571.1038208007812,
      "completions/mean_terminated_length": 527.2915649414062,
      "completions/min_length": 128.0,
      "completions/min_terminated_length": 128.0,
      "epoch": 0.6157434402332361,
      "grad_norm": 0.13319340348243713,
      "learning_rate": 1e-06,
      "loss": 0.008,
      "num_tokens": 38214478.0,
      "reward": 0.5926339626312256,
      "reward_std": 0.2032838761806488,
      "rewards/verify_math_reward/mean": 0.5926339030265808,
      "rewards/verify_math_reward/std": 0.49161845445632935,
      "step": 66
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3479.0,
      "completions/mean_length": 696.8739013671875,
      "completions/mean_terminated_length": 619.2682495117188,
      "completions/min_length": 114.0,
      "completions/min_terminated_length": 114.0,
      "epoch": 0.6250728862973761,
      "grad_norm": 0.11537881940603256,
      "learning_rate": 1e-06,
      "loss": -0.004,
      "num_tokens": 38852629.0,
      "reward": 0.5178571939468384,
      "reward_std": 0.20324109494686127,
      "rewards/verify_math_reward/mean": 0.5178571343421936,
      "rewards/verify_math_reward/std": 0.4999600946903229,
      "step": 67
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3630.0,
      "completions/mean_length": 590.625,
      "completions/mean_terminated_length": 555.0574951171875,
      "completions/min_length": 124.0,
      "completions/min_terminated_length": 124.0,
      "epoch": 0.634402332361516,
      "grad_norm": 0.12531507015228271,
      "learning_rate": 1e-06,
      "loss": 0.0086,
      "num_tokens": 39428589.0,
      "reward": 0.5524553656578064,
      "reward_std": 0.19242683053016663,
      "rewards/verify_math_reward/mean": 0.5524553656578064,
      "rewards/verify_math_reward/std": 0.49751853942871094,
      "step": 68
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.024553571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3846.0,
      "completions/mean_length": 674.203125,
      "completions/mean_terminated_length": 588.0709228515625,
      "completions/min_length": 141.0,
      "completions/min_terminated_length": 141.0,
      "epoch": 0.643731778425656,
      "grad_norm": 0.12898904085159302,
      "learning_rate": 1e-06,
      "loss": 0.0107,
      "num_tokens": 40032931.0,
      "reward": 0.5491071939468384,
      "reward_std": 0.23394668102264404,
      "rewards/verify_math_reward/mean": 0.5491071343421936,
      "rewards/verify_math_reward/std": 0.49786055088043213,
      "step": 69
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2719.0,
      "completions/mean_length": 611.7890625,
      "completions/mean_terminated_length": 572.4638671875,
      "completions/min_length": 155.0,
      "completions/min_terminated_length": 155.0,
      "epoch": 0.6530612244897959,
      "grad_norm": 0.13397973775863647,
      "learning_rate": 1e-06,
      "loss": -0.0002,
      "num_tokens": 40630174.0,
      "reward": 0.5323660969734192,
      "reward_std": 0.2358924299478531,
      "rewards/verify_math_reward/mean": 0.5323660969734192,
      "rewards/verify_math_reward/std": 0.4992299973964691,
      "step": 70
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3893.0,
      "completions/mean_length": 600.4498291015625,
      "completions/mean_terminated_length": 560.9966430664062,
      "completions/min_length": 116.0,
      "completions/min_terminated_length": 116.0,
      "epoch": 0.6623906705539359,
      "grad_norm": 0.13122980296611786,
      "learning_rate": 1e-06,
      "loss": 0.0221,
      "num_tokens": 41226801.0,
      "reward": 0.5368303656578064,
      "reward_std": 0.22053246200084686,
      "rewards/verify_math_reward/mean": 0.5368303656578064,
      "rewards/verify_math_reward/std": 0.49892017245292664,
      "step": 71
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0234375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3377.0,
      "completions/mean_length": 663.4989013671875,
      "completions/mean_terminated_length": 581.1188354492188,
      "completions/min_length": 143.0,
      "completions/min_terminated_length": 143.0,
      "epoch": 0.6717201166180758,
      "grad_norm": 0.12445461750030518,
      "learning_rate": 1e-06,
      "loss": -0.0051,
      "num_tokens": 41832720.0,
      "reward": 0.5334821939468384,
      "reward_std": 0.2016706019639969,
      "rewards/verify_math_reward/mean": 0.5334821343421936,
      "rewards/verify_math_reward/std": 0.49915632605552673,
      "step": 72
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2948.0,
      "completions/mean_length": 617.943115234375,
      "completions/mean_terminated_length": 562.73583984375,
      "completions/min_length": 81.0,
      "completions/min_terminated_length": 81.0,
      "epoch": 0.6810495626822157,
      "grad_norm": 0.11597079783678055,
      "learning_rate": 1e-06,
      "loss": 0.0058,
      "num_tokens": 42425829.0,
      "reward": 0.5558035969734192,
      "reward_std": 0.17191274464130402,
      "rewards/verify_math_reward/mean": 0.5558035969734192,
      "rewards/verify_math_reward/std": 0.49715372920036316,
      "step": 73
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.029017857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3584.0,
      "completions/mean_length": 697.904052734375,
      "completions/mean_terminated_length": 596.3517456054688,
      "completions/min_length": 131.0,
      "completions/min_terminated_length": 131.0,
      "epoch": 0.6903790087463557,
      "grad_norm": 0.11661992222070694,
      "learning_rate": 1e-06,
      "loss": -0.0,
      "num_tokens": 43034359.0,
      "reward": 0.5580357313156128,
      "reward_std": 0.20861980319023132,
      "rewards/verify_math_reward/mean": 0.5580357313156128,
      "rewards/verify_math_reward/std": 0.49689778685569763,
      "step": 74
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.021205357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3494.0,
      "completions/mean_length": 652.15625,
      "completions/mean_terminated_length": 577.546142578125,
      "completions/min_length": 140.0,
      "completions/min_terminated_length": 140.0,
      "epoch": 0.6997084548104956,
      "grad_norm": 0.11767545342445374,
      "learning_rate": 1e-06,
      "loss": -0.0002,
      "num_tokens": 43625931.0,
      "reward": 0.5245535969734192,
      "reward_std": 0.18878154456615448,
      "rewards/verify_math_reward/mean": 0.5245535969734192,
      "rewards/verify_math_reward/std": 0.4996756613254547,
      "step": 75
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2363.0,
      "completions/mean_length": 672.21875,
      "completions/mean_terminated_length": 594.0502319335938,
      "completions/min_length": 132.0,
      "completions/min_terminated_length": 132.0,
      "epoch": 0.7090379008746356,
      "grad_norm": 0.13513846695423126,
      "learning_rate": 1e-06,
      "loss": 0.011,
      "num_tokens": 44239559.0,
      "reward": 0.535714328289032,
      "reward_std": 0.2354377806186676,
      "rewards/verify_math_reward/mean": 0.5357142686843872,
      "rewards/verify_math_reward/std": 0.4990014135837555,
      "step": 76
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2531.0,
      "completions/mean_length": 601.7366333007812,
      "completions/mean_terminated_length": 542.242919921875,
      "completions/min_length": 130.0,
      "completions/min_terminated_length": 130.0,
      "epoch": 0.7183673469387755,
      "grad_norm": 0.14092093706130981,
      "learning_rate": 1e-06,
      "loss": -0.004,
      "num_tokens": 44808899.0,
      "reward": 0.5580357313156128,
      "reward_std": 0.23581615090370178,
      "rewards/verify_math_reward/mean": 0.5580357313156128,
      "rewards/verify_math_reward/std": 0.49689781665802,
      "step": 77
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3195.0,
      "completions/mean_length": 643.294677734375,
      "completions/mean_terminated_length": 564.4657592773438,
      "completions/min_length": 139.0,
      "completions/min_terminated_length": 139.0,
      "epoch": 0.7276967930029155,
      "grad_norm": 0.13623066246509552,
      "learning_rate": 1e-06,
      "loss": 0.021,
      "num_tokens": 45389283.0,
      "reward": 0.5770089626312256,
      "reward_std": 0.2100435197353363,
      "rewards/verify_math_reward/mean": 0.5770089030265808,
      "rewards/verify_math_reward/std": 0.4943099319934845,
      "step": 78
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4059.0,
      "completions/mean_length": 576.1495971679688,
      "completions/mean_terminated_length": 540.4351196289062,
      "completions/min_length": 132.0,
      "completions/min_terminated_length": 132.0,
      "epoch": 0.7370262390670554,
      "grad_norm": 0.126764714717865,
      "learning_rate": 1e-06,
      "loss": 0.0076,
      "num_tokens": 45957737.0,
      "reward": 0.590401828289032,
      "reward_std": 0.20973819494247437,
      "rewards/verify_math_reward/mean": 0.5904017686843872,
      "rewards/verify_math_reward/std": 0.49203425645828247,
      "step": 79
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2471.0,
      "completions/mean_length": 566.1886596679688,
      "completions/mean_terminated_length": 522.3152465820312,
      "completions/min_length": 127.0,
      "completions/min_terminated_length": 127.0,
      "epoch": 0.7463556851311953,
      "grad_norm": 0.1301468461751938,
      "learning_rate": 1e-06,
      "loss": -0.0019,
      "num_tokens": 46510034.0,
      "reward": 0.6127232313156128,
      "reward_std": 0.2131231427192688,
      "rewards/verify_math_reward/mean": 0.6127232313156128,
      "rewards/verify_math_reward/std": 0.4873998463153839,
      "step": 80
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.021205357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3503.0,
      "completions/mean_length": 674.513427734375,
      "completions/mean_terminated_length": 600.3876953125,
      "completions/min_length": 130.0,
      "completions/min_terminated_length": 130.0,
      "epoch": 0.7556851311953353,
      "grad_norm": 0.1248849481344223,
      "learning_rate": 1e-06,
      "loss": 0.0033,
      "num_tokens": 47132390.0,
      "reward": 0.5580357313156128,
      "reward_std": 0.19696861505508423,
      "rewards/verify_math_reward/mean": 0.5580357313156128,
      "rewards/verify_math_reward/std": 0.49689778685569763,
      "step": 81
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.025669642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3460.0,
      "completions/mean_length": 651.6629638671875,
      "completions/mean_terminated_length": 560.918701171875,
      "completions/min_length": 127.0,
      "completions/min_terminated_length": 127.0,
      "epoch": 0.7650145772594752,
      "grad_norm": 0.12517298758029938,
      "learning_rate": 1e-06,
      "loss": 0.0125,
      "num_tokens": 47716240.0,
      "reward": 0.546875,
      "reward_std": 0.18370595574378967,
      "rewards/verify_math_reward/mean": 0.546875,
      "rewards/verify_math_reward/std": 0.4980759024620056,
      "step": 82
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2290.0,
      "completions/mean_length": 594.9397583007812,
      "completions/mean_terminated_length": 563.398681640625,
      "completions/min_length": 117.0,
      "completions/min_terminated_length": 117.0,
      "epoch": 0.7743440233236152,
      "grad_norm": 0.10509128123521805,
      "learning_rate": 1e-06,
      "loss": -0.0038,
      "num_tokens": 48301754.0,
      "reward": 0.6439732313156128,
      "reward_std": 0.16037102043628693,
      "rewards/verify_math_reward/mean": 0.6439732313156128,
      "rewards/verify_math_reward/std": 0.47909072041511536,
      "step": 83
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0234375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3104.0,
      "completions/mean_length": 653.5301513671875,
      "completions/mean_terminated_length": 570.9108276367188,
      "completions/min_length": 140.0,
      "completions/min_terminated_length": 140.0,
      "epoch": 0.7836734693877551,
      "grad_norm": 0.12554019689559937,
      "learning_rate": 1e-06,
      "loss": -0.0036,
      "num_tokens": 48896917.0,
      "reward": 0.4899553656578064,
      "reward_std": 0.21440306305885315,
      "rewards/verify_math_reward/mean": 0.4899553656578064,
      "rewards/verify_math_reward/std": 0.5001782774925232,
      "step": 84
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3494.0,
      "completions/mean_length": 586.4754638671875,
      "completions/mean_terminated_length": 550.8657836914062,
      "completions/min_length": 77.0,
      "completions/min_terminated_length": 77.0,
      "epoch": 0.793002915451895,
      "grad_norm": 0.12394072115421295,
      "learning_rate": 1e-06,
      "loss": 0.0059,
      "num_tokens": 49475263.0,
      "reward": 0.5602678656578064,
      "reward_std": 0.21271198987960815,
      "rewards/verify_math_reward/mean": 0.5602678656578064,
      "rewards/verify_math_reward/std": 0.4966317415237427,
      "step": 85
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0078125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2100.0,
      "completions/mean_length": 582.677490234375,
      "completions/mean_terminated_length": 555.0135498046875,
      "completions/min_length": 121.0,
      "completions/min_terminated_length": 121.0,
      "epoch": 0.8023323615160349,
      "grad_norm": 0.1227576732635498,
      "learning_rate": 1e-06,
      "loss": -0.0033,
      "num_tokens": 50055534.0,
      "reward": 0.6004464626312256,
      "reward_std": 0.1935526728630066,
      "rewards/verify_math_reward/mean": 0.6004464030265808,
      "rewards/verify_math_reward/std": 0.49008017778396606,
      "step": 86
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3584.0,
      "completions/mean_length": 563.5546875,
      "completions/mean_terminated_length": 519.6486206054688,
      "completions/min_length": 125.0,
      "completions/min_terminated_length": 125.0,
      "epoch": 0.8116618075801749,
      "grad_norm": 0.13397268950939178,
      "learning_rate": 1e-06,
      "loss": 0.0165,
      "num_tokens": 50596087.0,
      "reward": 0.6160714626312256,
      "reward_std": 0.18411780893802643,
      "rewards/verify_math_reward/mean": 0.6160714030265808,
      "rewards/verify_math_reward/std": 0.486612468957901,
      "step": 87
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3707.0,
      "completions/mean_length": 590.1886596679688,
      "completions/mean_terminated_length": 538.5741577148438,
      "completions/min_length": 113.0,
      "completions/min_terminated_length": 113.0,
      "epoch": 0.8209912536443149,
      "grad_norm": 0.12742318212985992,
      "learning_rate": 1e-06,
      "loss": 0.0228,
      "num_tokens": 51151720.0,
      "reward": 0.6383928656578064,
      "reward_std": 0.1874246895313263,
      "rewards/verify_math_reward/mean": 0.6383928656578064,
      "rewards/verify_math_reward/std": 0.4807341992855072,
      "step": 88
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2388.0,
      "completions/mean_length": 634.989990234375,
      "completions/mean_terminated_length": 580.0532836914062,
      "completions/min_length": 152.0,
      "completions/min_terminated_length": 152.0,
      "epoch": 0.8303206997084548,
      "grad_norm": 0.11321911215782166,
      "learning_rate": 1e-06,
      "loss": 0.0106,
      "num_tokens": 51765175.0,
      "reward": 0.5334821939468384,
      "reward_std": 0.1803976595401764,
      "rewards/verify_math_reward/mean": 0.5334821343421936,
      "rewards/verify_math_reward/std": 0.49915632605552673,
      "step": 89
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4055.0,
      "completions/mean_length": 622.1741333007812,
      "completions/mean_terminated_length": 578.9966430664062,
      "completions/min_length": 141.0,
      "completions/min_terminated_length": 141.0,
      "epoch": 0.8396501457725948,
      "grad_norm": 0.11763538420200348,
      "learning_rate": 1e-06,
      "loss": 0.012,
      "num_tokens": 52365139.0,
      "reward": 0.5803571939468384,
      "reward_std": 0.1898646205663681,
      "rewards/verify_math_reward/mean": 0.5803571343421936,
      "rewards/verify_math_reward/std": 0.4937761425971985,
      "step": 90
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.025669642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3428.0,
      "completions/mean_length": 649.9017944335938,
      "completions/mean_terminated_length": 559.1111450195312,
      "completions/min_length": 138.0,
      "completions/min_terminated_length": 138.0,
      "epoch": 0.8489795918367347,
      "grad_norm": 0.13605010509490967,
      "learning_rate": 1e-06,
      "loss": 0.0007,
      "num_tokens": 52941203.0,
      "reward": 0.6015625,
      "reward_std": 0.2129797637462616,
      "rewards/verify_math_reward/mean": 0.6015625,
      "rewards/verify_math_reward/std": 0.48984986543655396,
      "step": 91
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2929.0,
      "completions/mean_length": 624.7131958007812,
      "completions/mean_terminated_length": 581.5672607421875,
      "completions/min_length": 99.0,
      "completions/min_terminated_length": 99.0,
      "epoch": 0.8583090379008746,
      "grad_norm": 0.12498349696397781,
      "learning_rate": 1e-06,
      "loss": 0.0055,
      "num_tokens": 53542970.0,
      "reward": 0.6071428656578064,
      "reward_std": 0.2092207968235016,
      "rewards/verify_math_reward/mean": 0.6071428656578064,
      "rewards/verify_math_reward/std": 0.48865827918052673,
      "step": 92
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3373.0,
      "completions/mean_length": 602.7600708007812,
      "completions/mean_terminated_length": 547.3118286132812,
      "completions/min_length": 137.0,
      "completions/min_terminated_length": 137.0,
      "epoch": 0.8676384839650145,
      "grad_norm": 0.1420130878686905,
      "learning_rate": 1e-06,
      "loss": 0.0202,
      "num_tokens": 54123235.0,
      "reward": 0.590401828289032,
      "reward_std": 0.2405879944562912,
      "rewards/verify_math_reward/mean": 0.5904017686843872,
      "rewards/verify_math_reward/std": 0.49203425645828247,
      "step": 93
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3199.0,
      "completions/mean_length": 605.966552734375,
      "completions/mean_terminated_length": 574.5247802734375,
      "completions/min_length": 115.0,
      "completions/min_terminated_length": 115.0,
      "epoch": 0.8769679300291545,
      "grad_norm": 0.1178143247961998,
      "learning_rate": 1e-06,
      "loss": 0.0067,
      "num_tokens": 54710165.0,
      "reward": 0.6037946939468384,
      "reward_std": 0.1704389452934265,
      "rewards/verify_math_reward/mean": 0.6037946343421936,
      "rewards/verify_math_reward/std": 0.48938119411468506,
      "step": 94
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3513.0,
      "completions/mean_length": 654.724365234375,
      "completions/mean_terminated_length": 600.1008911132812,
      "completions/min_length": 128.0,
      "completions/min_terminated_length": 128.0,
      "epoch": 0.8862973760932945,
      "grad_norm": 0.12943097949028015,
      "learning_rate": 1e-06,
      "loss": -0.0036,
      "num_tokens": 55326950.0,
      "reward": 0.5457589626312256,
      "reward_std": 0.22068330645561218,
      "rewards/verify_math_reward/mean": 0.5457589030265808,
      "rewards/verify_math_reward/std": 0.4981798231601715,
      "step": 95
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2576.0,
      "completions/mean_length": 638.4888916015625,
      "completions/mean_terminated_length": 583.6077270507812,
      "completions/min_length": 159.0,
      "completions/min_terminated_length": 159.0,
      "epoch": 0.8956268221574344,
      "grad_norm": 0.13328658044338226,
      "learning_rate": 1e-06,
      "loss": 0.007,
      "num_tokens": 55925148.0,
      "reward": 0.5725446939468384,
      "reward_std": 0.23788981139659882,
      "rewards/verify_math_reward/mean": 0.5725446343421936,
      "rewards/verify_math_reward/std": 0.49498558044433594,
      "step": 96
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2864.0,
      "completions/mean_length": 639.8125,
      "completions/mean_terminated_length": 572.96923828125,
      "completions/min_length": 97.0,
      "completions/min_terminated_length": 97.0,
      "epoch": 0.9049562682215744,
      "grad_norm": 0.13662846386432648,
      "learning_rate": 1e-06,
      "loss": 0.0088,
      "num_tokens": 56520084.0,
      "reward": 0.559151828289032,
      "reward_std": 0.22278834879398346,
      "rewards/verify_math_reward/mean": 0.5591517686843872,
      "rewards/verify_math_reward/std": 0.496766060590744,
      "step": 97
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4074.0,
      "completions/mean_length": 695.6563110351562,
      "completions/mean_terminated_length": 637.7616577148438,
      "completions/min_length": 152.0,
      "completions/min_terminated_length": 152.0,
      "epoch": 0.9142857142857143,
      "grad_norm": 0.10658743232488632,
      "learning_rate": 1e-06,
      "loss": 0.0127,
      "num_tokens": 57167880.0,
      "reward": 0.4966517984867096,
      "reward_std": 0.17559011280536652,
      "rewards/verify_math_reward/mean": 0.4966517984867096,
      "rewards/verify_math_reward/std": 0.5002680420875549,
      "step": 98
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.025669642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3235.0,
      "completions/mean_length": 671.685302734375,
      "completions/mean_terminated_length": 581.468505859375,
      "completions/min_length": 134.0,
      "completions/min_terminated_length": 134.0,
      "epoch": 0.9236151603498542,
      "grad_norm": 0.12809064984321594,
      "learning_rate": 1e-06,
      "loss": 0.0032,
      "num_tokens": 57774718.0,
      "reward": 0.5022321939468384,
      "reward_std": 0.21432848274707794,
      "rewards/verify_math_reward/mean": 0.5022321343421936,
      "rewards/verify_math_reward/std": 0.5002743005752563,
      "step": 99
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0279017857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3224.0,
      "completions/mean_length": 675.546875,
      "completions/mean_terminated_length": 577.370849609375,
      "completions/min_length": 143.0,
      "completions/min_terminated_length": 143.0,
      "epoch": 0.9329446064139941,
      "grad_norm": 0.12401802837848663,
      "learning_rate": 1e-06,
      "loss": -0.0041,
      "num_tokens": 58367872.0,
      "reward": 0.5792410969734192,
      "reward_std": 0.19261088967323303,
      "rewards/verify_math_reward/mean": 0.5792410969734192,
      "rewards/verify_math_reward/std": 0.49395665526390076,
      "step": 100
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3526.0,
      "completions/mean_length": 658.6004638671875,
      "completions/mean_terminated_length": 580.1209716796875,
      "completions/min_length": 122.0,
      "completions/min_terminated_length": 122.0,
      "epoch": 0.9422740524781341,
      "grad_norm": 0.10858191549777985,
      "learning_rate": 1e-06,
      "loss": 0.0103,
      "num_tokens": 58961578.0,
      "reward": 0.566964328289032,
      "reward_std": 0.17187067866325378,
      "rewards/verify_math_reward/mean": 0.5669642686843872,
      "rewards/verify_math_reward/std": 0.49577224254608154,
      "step": 101
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3564.0,
      "completions/mean_length": 645.5279541015625,
      "completions/mean_terminated_length": 582.7920532226562,
      "completions/min_length": 115.0,
      "completions/min_terminated_length": 115.0,
      "epoch": 0.9516034985422741,
      "grad_norm": 0.11887253820896149,
      "learning_rate": 1e-06,
      "loss": 0.0105,
      "num_tokens": 59574347.0,
      "reward": 0.5412946939468384,
      "reward_std": 0.17633940279483795,
      "rewards/verify_math_reward/mean": 0.5412946343421936,
      "rewards/verify_math_reward/std": 0.49857014417648315,
      "step": 102
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.024553571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4019.0,
      "completions/mean_length": 734.6295166015625,
      "completions/mean_terminated_length": 650.0182495117188,
      "completions/min_length": 160.0,
      "completions/min_terminated_length": 160.0,
      "epoch": 0.960932944606414,
      "grad_norm": 0.11316835135221481,
      "learning_rate": 1e-06,
      "loss": 0.0076,
      "num_tokens": 60229927.0,
      "reward": 0.5256696939468384,
      "reward_std": 0.17341090738773346,
      "rewards/verify_math_reward/mean": 0.5256696343421936,
      "rewards/verify_math_reward/std": 0.4996195137500763,
      "step": 103
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1838.0,
      "completions/mean_length": 612.4420166015625,
      "completions/mean_terminated_length": 557.1473999023438,
      "completions/min_length": 86.0,
      "completions/min_terminated_length": 86.0,
      "epoch": 0.970262390670554,
      "grad_norm": 0.14979751408100128,
      "learning_rate": 1e-06,
      "loss": -0.0068,
      "num_tokens": 60814275.0,
      "reward": 0.582589328289032,
      "reward_std": 0.23582008481025696,
      "rewards/verify_math_reward/mean": 0.5825892686843872,
      "rewards/verify_math_reward/std": 0.493407279253006,
      "step": 104
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4081.0,
      "completions/mean_length": 603.3671875,
      "completions/mean_terminated_length": 543.9012451171875,
      "completions/min_length": 141.0,
      "completions/min_terminated_length": 141.0,
      "epoch": 0.9795918367346939,
      "grad_norm": 0.12644144892692566,
      "learning_rate": 1e-06,
      "loss": -0.0001,
      "num_tokens": 61381348.0,
      "reward": 0.527901828289032,
      "reward_std": 0.20850147306919098,
      "rewards/verify_math_reward/mean": 0.5279017686843872,
      "rewards/verify_math_reward/std": 0.49949970841407776,
      "step": 105
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1928.0,
      "completions/mean_length": 615.6998291015625,
      "completions/mean_terminated_length": 552.4215698242188,
      "completions/min_length": 127.0,
      "completions/min_terminated_length": 127.0,
      "epoch": 0.9889212827988338,
      "grad_norm": 0.12566089630126953,
      "learning_rate": 1e-06,
      "loss": -0.0041,
      "num_tokens": 61949671.0,
      "reward": 0.5758928656578064,
      "reward_std": 0.18565760552883148,
      "rewards/verify_math_reward/mean": 0.5758928656578064,
      "rewards/verify_math_reward/std": 0.49448272585868835,
      "step": 106
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008522727272727293,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3541.0,
      "completions/mean_length": 548.3125,
      "completions/mean_terminated_length": 517.816650390625,
      "completions/min_length": 130.0,
      "completions/min_terminated_length": 130.0,
      "epoch": 0.9982507288629737,
      "grad_norm": 0.13114283978939056,
      "learning_rate": 1e-06,
      "loss": 0.0255,
      "num_tokens": 62530659.0,
      "reward": 0.559151828289032,
      "reward_std": 0.2181578129529953,
      "rewards/verify_math_reward/mean": 0.5591517686843872,
      "rewards/verify_math_reward/std": 0.496766060590744,
      "step": 107
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.005580357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3522.0,
      "completions/mean_length": 603.1596069335938,
      "completions/mean_terminated_length": 583.5589599609375,
      "completions/min_length": 150.0,
      "completions/min_terminated_length": 150.0,
      "epoch": 1.00932944606414,
      "grad_norm": 0.1275864988565445,
      "learning_rate": 1e-06,
      "loss": 0.024,
      "num_tokens": 63141034.0,
      "reward": 0.5658482313156128,
      "reward_std": 0.2159428894519806,
      "rewards/verify_math_reward/mean": 0.5658482313156128,
      "rewards/verify_math_reward/std": 0.49592188000679016,
      "step": 108
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0200892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3996.0,
      "completions/mean_length": 634.6194458007812,
      "completions/mean_terminated_length": 563.6572265625,
      "completions/min_length": 155.0,
      "completions/min_terminated_length": 155.0,
      "epoch": 1.01865889212828,
      "grad_norm": 0.13994581997394562,
      "learning_rate": 1e-06,
      "loss": 0.0099,
      "num_tokens": 63732373.0,
      "reward": 0.5725446939468384,
      "reward_std": 0.21012048423290253,
      "rewards/verify_math_reward/mean": 0.5725446343421936,
      "rewards/verify_math_reward/std": 0.49498558044433594,
      "step": 109
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2561.0,
      "completions/mean_length": 627.6272583007812,
      "completions/mean_terminated_length": 576.56396484375,
      "completions/min_length": 156.0,
      "completions/min_terminated_length": 156.0,
      "epoch": 1.0279883381924197,
      "grad_norm": 0.12112820893526077,
      "learning_rate": 1e-06,
      "loss": 0.0016,
      "num_tokens": 64334479.0,
      "reward": 0.5368303656578064,
      "reward_std": 0.2135361284017563,
      "rewards/verify_math_reward/mean": 0.5368303656578064,
      "rewards/verify_math_reward/std": 0.49892017245292664,
      "step": 110
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.021205357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2995.0,
      "completions/mean_length": 633.6339721679688,
      "completions/mean_terminated_length": 558.62255859375,
      "completions/min_length": 126.0,
      "completions/min_terminated_length": 126.0,
      "epoch": 1.0373177842565597,
      "grad_norm": 0.11971090734004974,
      "learning_rate": 1e-06,
      "loss": -0.0065,
      "num_tokens": 64928767.0,
      "reward": 0.5133928656578064,
      "reward_std": 0.17792311310768127,
      "rewards/verify_math_reward/mean": 0.5133928656578064,
      "rewards/verify_math_reward/std": 0.500099778175354,
      "step": 111
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4014.0,
      "completions/mean_length": 585.0714721679688,
      "completions/mean_terminated_length": 549.4475708007812,
      "completions/min_length": 94.0,
      "completions/min_terminated_length": 94.0,
      "epoch": 1.0466472303206997,
      "grad_norm": 0.14180590212345123,
      "learning_rate": 1e-06,
      "loss": -0.0017,
      "num_tokens": 65510703.0,
      "reward": 0.598214328289032,
      "reward_std": 0.2285270094871521,
      "rewards/verify_math_reward/mean": 0.5982142686843872,
      "rewards/verify_math_reward/std": 0.49053287506103516,
      "step": 112
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3056.0,
      "completions/mean_length": 645.7053833007812,
      "completions/mean_terminated_length": 578.97607421875,
      "completions/min_length": 147.0,
      "completions/min_terminated_length": 147.0,
      "epoch": 1.0559766763848397,
      "grad_norm": 0.11551226675510406,
      "learning_rate": 1e-06,
      "loss": -0.0045,
      "num_tokens": 66110255.0,
      "reward": 0.5424107313156128,
      "reward_std": 0.19425876438617706,
      "rewards/verify_math_reward/mean": 0.5424107313156128,
      "rewards/verify_math_reward/std": 0.4984763562679291,
      "step": 113
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.029017857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3007.0,
      "completions/mean_length": 666.7623291015625,
      "completions/mean_terminated_length": 564.279296875,
      "completions/min_length": 133.0,
      "completions/min_terminated_length": 133.0,
      "epoch": 1.0653061224489795,
      "grad_norm": 0.12090034782886505,
      "learning_rate": 1e-06,
      "loss": 0.0016,
      "num_tokens": 66712626.0,
      "reward": 0.5234375,
      "reward_std": 0.17652484774589539,
      "rewards/verify_math_reward/mean": 0.5234375,
      "rewards/verify_math_reward/std": 0.49972933530807495,
      "step": 114
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.021205357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4093.0,
      "completions/mean_length": 633.575927734375,
      "completions/mean_terminated_length": 558.5632934570312,
      "completions/min_length": 140.0,
      "completions/min_terminated_length": 140.0,
      "epoch": 1.0746355685131195,
      "grad_norm": 0.12432066351175308,
      "learning_rate": 1e-06,
      "loss": 0.022,
      "num_tokens": 67290398.0,
      "reward": 0.5636160969734192,
      "reward_std": 0.1777704507112503,
      "rewards/verify_math_reward/mean": 0.5636160969734192,
      "rewards/verify_math_reward/std": 0.49621346592903137,
      "step": 115
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3250.0,
      "completions/mean_length": 621.9553833007812,
      "completions/mean_terminated_length": 562.805908203125,
      "completions/min_length": 135.0,
      "completions/min_terminated_length": 135.0,
      "epoch": 1.0839650145772595,
      "grad_norm": 0.1291705071926117,
      "learning_rate": 1e-06,
      "loss": 0.0068,
      "num_tokens": 67874070.0,
      "reward": 0.6439732313156128,
      "reward_std": 0.19227458536624908,
      "rewards/verify_math_reward/mean": 0.6439732313156128,
      "rewards/verify_math_reward/std": 0.47909069061279297,
      "step": 116
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0078125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3373.0,
      "completions/mean_length": 586.6328125,
      "completions/mean_terminated_length": 559.0,
      "completions/min_length": 118.0,
      "completions/min_terminated_length": 118.0,
      "epoch": 1.0932944606413995,
      "grad_norm": 0.13343921303749084,
      "learning_rate": 1e-06,
      "loss": 0.0146,
      "num_tokens": 68456141.0,
      "reward": 0.5770089626312256,
      "reward_std": 0.21312315762043,
      "rewards/verify_math_reward/mean": 0.5770089030265808,
      "rewards/verify_math_reward/std": 0.4943099319934845,
      "step": 117
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2638.0,
      "completions/mean_length": 611.771240234375,
      "completions/mean_terminated_length": 560.4744873046875,
      "completions/min_length": 103.0,
      "completions/min_terminated_length": 103.0,
      "epoch": 1.1026239067055394,
      "grad_norm": 0.1323866993188858,
      "learning_rate": 1e-06,
      "loss": 0.0177,
      "num_tokens": 69025392.0,
      "reward": 0.6238839626312256,
      "reward_std": 0.21440446376800537,
      "rewards/verify_math_reward/mean": 0.6238839030265808,
      "rewards/verify_math_reward/std": 0.48468026518821716,
      "step": 118
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3523.0,
      "completions/mean_length": 611.7801513671875,
      "completions/mean_terminated_length": 548.4306640625,
      "completions/min_length": 106.0,
      "completions/min_terminated_length": 106.0,
      "epoch": 1.1119533527696792,
      "grad_norm": 0.13896460831165314,
      "learning_rate": 1e-06,
      "loss": -0.0015,
      "num_tokens": 69581635.0,
      "reward": 0.6383928656578064,
      "reward_std": 0.20760175585746765,
      "rewards/verify_math_reward/mean": 0.6383928656578064,
      "rewards/verify_math_reward/std": 0.4807341992855072,
      "step": 119
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2819.0,
      "completions/mean_length": 638.6317138671875,
      "completions/mean_terminated_length": 575.7704467773438,
      "completions/min_length": 124.0,
      "completions/min_terminated_length": 124.0,
      "epoch": 1.1212827988338192,
      "grad_norm": 0.13054411113262177,
      "learning_rate": 1e-06,
      "loss": 0.0059,
      "num_tokens": 70184953.0,
      "reward": 0.5703125,
      "reward_std": 0.2216501384973526,
      "rewards/verify_math_reward/mean": 0.5703125,
      "rewards/verify_math_reward/std": 0.49530795216560364,
      "step": 120
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3805.0,
      "completions/mean_length": 637.3147583007812,
      "completions/mean_terminated_length": 582.4149780273438,
      "completions/min_length": 122.0,
      "completions/min_terminated_length": 122.0,
      "epoch": 1.1306122448979592,
      "grad_norm": 0.13472609221935272,
      "learning_rate": 1e-06,
      "loss": 0.0215,
      "num_tokens": 70795843.0,
      "reward": 0.5345982313156128,
      "reward_std": 0.2102688103914261,
      "rewards/verify_math_reward/mean": 0.5345982313156128,
      "rewards/verify_math_reward/std": 0.4990801215171814,
      "step": 121
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2207.0,
      "completions/mean_length": 612.4185791015625,
      "completions/mean_terminated_length": 561.13134765625,
      "completions/min_length": 101.0,
      "completions/min_terminated_length": 101.0,
      "epoch": 1.1399416909620992,
      "grad_norm": 0.11735182255506516,
      "learning_rate": 1e-06,
      "loss": 0.011,
      "num_tokens": 71369890.0,
      "reward": 0.625,
      "reward_std": 0.1767519861459732,
      "rewards/verify_math_reward/mean": 0.625,
      "rewards/verify_math_reward/std": 0.48439329862594604,
      "step": 122
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.029017857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2966.0,
      "completions/mean_length": 681.1607666015625,
      "completions/mean_terminated_length": 579.1080322265625,
      "completions/min_length": 131.0,
      "completions/min_terminated_length": 131.0,
      "epoch": 1.149271137026239,
      "grad_norm": 0.13733011484146118,
      "learning_rate": 1e-06,
      "loss": 0.0132,
      "num_tokens": 71963802.0,
      "reward": 0.5881696939468384,
      "reward_std": 0.23228992521762848,
      "rewards/verify_math_reward/mean": 0.5881696343421936,
      "rewards/verify_math_reward/std": 0.4924395978450775,
      "step": 123
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3791.0,
      "completions/mean_length": 597.818115234375,
      "completions/mean_terminated_length": 566.3029174804688,
      "completions/min_length": 99.0,
      "completions/min_terminated_length": 99.0,
      "epoch": 1.158600583090379,
      "grad_norm": 0.12670235335826874,
      "learning_rate": 1e-06,
      "loss": 0.0112,
      "num_tokens": 72556191.0,
      "reward": 0.6104910969734192,
      "reward_std": 0.17949683964252472,
      "rewards/verify_math_reward/mean": 0.6104910969734192,
      "rewards/verify_math_reward/std": 0.48791128396987915,
      "step": 124
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2245.0,
      "completions/mean_length": 664.1272583007812,
      "completions/mean_terminated_length": 585.77392578125,
      "completions/min_length": 124.0,
      "completions/min_terminated_length": 124.0,
      "epoch": 1.167930029154519,
      "grad_norm": 0.10632211714982986,
      "learning_rate": 1e-06,
      "loss": -0.007,
      "num_tokens": 73159673.0,
      "reward": 0.4854910969734192,
      "reward_std": 0.1410633772611618,
      "rewards/verify_math_reward/mean": 0.4854910671710968,
      "rewards/verify_math_reward/std": 0.5000686049461365,
      "step": 125
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2799.0,
      "completions/mean_length": 583.8627319335938,
      "completions/mean_terminated_length": 552.2218627929688,
      "completions/min_length": 162.0,
      "completions/min_terminated_length": 162.0,
      "epoch": 1.177259475218659,
      "grad_norm": 0.1307123452425003,
      "learning_rate": 1e-06,
      "loss": 0.0067,
      "num_tokens": 73738526.0,
      "reward": 0.598214328289032,
      "reward_std": 0.18712298572063446,
      "rewards/verify_math_reward/mean": 0.5982142686843872,
      "rewards/verify_math_reward/std": 0.49053287506103516,
      "step": 126
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.021205357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2390.0,
      "completions/mean_length": 627.5111694335938,
      "completions/mean_terminated_length": 552.3671264648438,
      "completions/min_length": 139.0,
      "completions/min_terminated_length": 139.0,
      "epoch": 1.186588921282799,
      "grad_norm": 0.126788929104805,
      "learning_rate": 1e-06,
      "loss": 0.008,
      "num_tokens": 74305552.0,
      "reward": 0.6238839626312256,
      "reward_std": 0.18539589643478394,
      "rewards/verify_math_reward/mean": 0.6238839030265808,
      "rewards/verify_math_reward/std": 0.48468026518821716,
      "step": 127
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2035.0,
      "completions/mean_length": 620.9576416015625,
      "completions/mean_terminated_length": 565.7982177734375,
      "completions/min_length": 136.0,
      "completions/min_terminated_length": 136.0,
      "epoch": 1.1959183673469387,
      "grad_norm": 0.13120092451572418,
      "learning_rate": 1e-06,
      "loss": 0.0231,
      "num_tokens": 74896506.0,
      "reward": 0.5558035969734192,
      "reward_std": 0.20354419946670532,
      "rewards/verify_math_reward/mean": 0.5558035969734192,
      "rewards/verify_math_reward/std": 0.49715372920036316,
      "step": 128
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2196.0,
      "completions/mean_length": 628.6295166015625,
      "completions/mean_terminated_length": 565.5863647460938,
      "completions/min_length": 158.0,
      "completions/min_terminated_length": 158.0,
      "epoch": 1.2052478134110787,
      "grad_norm": 0.13632048666477203,
      "learning_rate": 1e-06,
      "loss": 0.0155,
      "num_tokens": 75487310.0,
      "reward": 0.559151828289032,
      "reward_std": 0.2307412326335907,
      "rewards/verify_math_reward/mean": 0.5591517686843872,
      "rewards/verify_math_reward/std": 0.496766060590744,
      "step": 129
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3733.0,
      "completions/mean_length": 681.984375,
      "completions/mean_terminated_length": 604.0387573242188,
      "completions/min_length": 130.0,
      "completions/min_terminated_length": 130.0,
      "epoch": 1.2145772594752187,
      "grad_norm": 0.14439013600349426,
      "learning_rate": 1e-06,
      "loss": 0.007,
      "num_tokens": 76109592.0,
      "reward": 0.5167410969734192,
      "reward_std": 0.23601117730140686,
      "rewards/verify_math_reward/mean": 0.5167410969734192,
      "rewards/verify_math_reward/std": 0.4999987483024597,
      "step": 130
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3954.0,
      "completions/mean_length": 575.3717041015625,
      "completions/mean_terminated_length": 527.580322265625,
      "completions/min_length": 127.0,
      "completions/min_terminated_length": 127.0,
      "epoch": 1.2239067055393587,
      "grad_norm": 0.12474658340215683,
      "learning_rate": 1e-06,
      "loss": 0.0298,
      "num_tokens": 76661125.0,
      "reward": 0.625,
      "reward_std": 0.1924593597650528,
      "rewards/verify_math_reward/mean": 0.625,
      "rewards/verify_math_reward/std": 0.48439329862594604,
      "step": 131
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3872.0,
      "completions/mean_length": 568.6439819335938,
      "completions/mean_terminated_length": 528.8318481445312,
      "completions/min_length": 128.0,
      "completions/min_terminated_length": 128.0,
      "epoch": 1.2332361516034984,
      "grad_norm": 0.13131001591682434,
      "learning_rate": 1e-06,
      "loss": -0.0006,
      "num_tokens": 77215166.0,
      "reward": 0.598214328289032,
      "reward_std": 0.17836818099021912,
      "rewards/verify_math_reward/mean": 0.5982142686843872,
      "rewards/verify_math_reward/std": 0.49053290486335754,
      "step": 132
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3578.0,
      "completions/mean_length": 632.4017944335938,
      "completions/mean_terminated_length": 553.3241577148438,
      "completions/min_length": 106.0,
      "completions/min_terminated_length": 106.0,
      "epoch": 1.2425655976676384,
      "grad_norm": 0.11237182468175888,
      "learning_rate": 1e-06,
      "loss": 0.0118,
      "num_tokens": 77790750.0,
      "reward": 0.5524553656578064,
      "reward_std": 0.16529689729213715,
      "rewards/verify_math_reward/mean": 0.5524553656578064,
      "rewards/verify_math_reward/std": 0.49751853942871094,
      "step": 133
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3082.0,
      "completions/mean_length": 606.552490234375,
      "completions/mean_terminated_length": 559.1843872070312,
      "completions/min_length": 133.0,
      "completions/min_terminated_length": 133.0,
      "epoch": 1.2518950437317784,
      "grad_norm": 0.13661354780197144,
      "learning_rate": 1e-06,
      "loss": 0.0068,
      "num_tokens": 78366333.0,
      "reward": 0.6049107313156128,
      "reward_std": 0.23499269783496857,
      "rewards/verify_math_reward/mean": 0.6049107313156128,
      "rewards/verify_math_reward/std": 0.48914292454719543,
      "step": 134
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2681.0,
      "completions/mean_length": 581.9185791015625,
      "completions/mean_terminated_length": 522.08740234375,
      "completions/min_length": 130.0,
      "completions/min_terminated_length": 130.0,
      "epoch": 1.2612244897959184,
      "grad_norm": 0.14530333876609802,
      "learning_rate": 1e-06,
      "loss": -0.0065,
      "num_tokens": 78910308.0,
      "reward": 0.6540178656578064,
      "reward_std": 0.2191763073205948,
      "rewards/verify_math_reward/mean": 0.6540178656578064,
      "rewards/verify_math_reward/std": 0.4759531021118164,
      "step": 135
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3982.0,
      "completions/mean_length": 599.6875,
      "completions/mean_terminated_length": 544.1904907226562,
      "completions/min_length": 126.0,
      "completions/min_terminated_length": 126.0,
      "epoch": 1.2705539358600584,
      "grad_norm": 0.1232718899846077,
      "learning_rate": 1e-06,
      "loss": 0.0086,
      "num_tokens": 79481884.0,
      "reward": 0.5915178656578064,
      "reward_std": 0.18761266767978668,
      "rewards/verify_math_reward/mean": 0.5915178656578064,
      "rewards/verify_math_reward/std": 0.49182769656181335,
      "step": 136
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0200892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2723.0,
      "completions/mean_length": 619.9185791015625,
      "completions/mean_terminated_length": 548.6549072265625,
      "completions/min_length": 137.0,
      "completions/min_terminated_length": 137.0,
      "epoch": 1.2798833819241984,
      "grad_norm": 0.12724876403808594,
      "learning_rate": 1e-06,
      "loss": 0.0119,
      "num_tokens": 80049827.0,
      "reward": 0.5870535969734192,
      "reward_std": 0.17885534465312958,
      "rewards/verify_math_reward/mean": 0.5870535969734192,
      "rewards/verify_math_reward/std": 0.49263837933540344,
      "step": 137
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3823.0,
      "completions/mean_length": 608.0346069335938,
      "completions/mean_terminated_length": 564.681396484375,
      "completions/min_length": 110.0,
      "completions/min_terminated_length": 110.0,
      "epoch": 1.2892128279883381,
      "grad_norm": 0.12700584530830383,
      "learning_rate": 1e-06,
      "loss": 0.0174,
      "num_tokens": 80641882.0,
      "reward": 0.5703125,
      "reward_std": 0.1734876036643982,
      "rewards/verify_math_reward/mean": 0.5703125,
      "rewards/verify_math_reward/std": 0.49530795216560364,
      "step": 138
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3850.0,
      "completions/mean_length": 638.7388916015625,
      "completions/mean_terminated_length": 579.8751831054688,
      "completions/min_length": 149.0,
      "completions/min_terminated_length": 149.0,
      "epoch": 1.2985422740524781,
      "grad_norm": 0.1467692255973816,
      "learning_rate": 1e-06,
      "loss": 0.0294,
      "num_tokens": 81248536.0,
      "reward": 0.5223214626312256,
      "reward_std": 0.2576557397842407,
      "rewards/verify_math_reward/mean": 0.5223214030265808,
      "rewards/verify_math_reward/std": 0.49978047609329224,
      "step": 139
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.021205357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2631.0,
      "completions/mean_length": 651.279052734375,
      "completions/mean_terminated_length": 576.64990234375,
      "completions/min_length": 142.0,
      "completions/min_terminated_length": 142.0,
      "epoch": 1.3078717201166181,
      "grad_norm": 0.13139992952346802,
      "learning_rate": 1e-06,
      "loss": -0.0033,
      "num_tokens": 81847314.0,
      "reward": 0.5267857313156128,
      "reward_std": 0.19381622970104218,
      "rewards/verify_math_reward/mean": 0.5267857313156128,
      "rewards/verify_math_reward/std": 0.4995608627796173,
      "step": 140
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0200892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3093.0,
      "completions/mean_length": 652.3225708007812,
      "completions/mean_terminated_length": 581.7232666015625,
      "completions/min_length": 160.0,
      "completions/min_terminated_length": 160.0,
      "epoch": 1.3172011661807579,
      "grad_norm": 0.13410133123397827,
      "learning_rate": 1e-06,
      "loss": 0.0038,
      "num_tokens": 82451995.0,
      "reward": 0.5267857313156128,
      "reward_std": 0.21913281083106995,
      "rewards/verify_math_reward/mean": 0.5267857313156128,
      "rewards/verify_math_reward/std": 0.4995608329772949,
      "step": 141
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3262.0,
      "completions/mean_length": 647.2578125,
      "completions/mean_terminated_length": 584.5534057617188,
      "completions/min_length": 139.0,
      "completions/min_terminated_length": 139.0,
      "epoch": 1.3265306122448979,
      "grad_norm": 0.1401214450597763,
      "learning_rate": 1e-06,
      "loss": 0.0033,
      "num_tokens": 83059810.0,
      "reward": 0.5412946939468384,
      "reward_std": 0.21963205933570862,
      "rewards/verify_math_reward/mean": 0.5412946343421936,
      "rewards/verify_math_reward/std": 0.49857014417648315,
      "step": 142
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.029017857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2717.0,
      "completions/mean_length": 675.0245971679688,
      "completions/mean_terminated_length": 572.7885131835938,
      "completions/min_length": 157.0,
      "completions/min_terminated_length": 157.0,
      "epoch": 1.3358600583090379,
      "grad_norm": 0.12698782980442047,
      "learning_rate": 1e-06,
      "loss": -0.0047,
      "num_tokens": 83649704.0,
      "reward": 0.5111607313156128,
      "reward_std": 0.1937370002269745,
      "rewards/verify_math_reward/mean": 0.5111607313156128,
      "rewards/verify_math_reward/std": 0.5001546144485474,
      "step": 143
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0234375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4035.0,
      "completions/mean_length": 700.0234985351562,
      "completions/mean_terminated_length": 618.52001953125,
      "completions/min_length": 146.0,
      "completions/min_terminated_length": 146.0,
      "epoch": 1.3451895043731779,
      "grad_norm": 0.11701101064682007,
      "learning_rate": 1e-06,
      "loss": -0.0078,
      "num_tokens": 84287125.0,
      "reward": 0.5234375,
      "reward_std": 0.2080129235982895,
      "rewards/verify_math_reward/mean": 0.5234375,
      "rewards/verify_math_reward/std": 0.49972933530807495,
      "step": 144
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0267857142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3662.0,
      "completions/mean_length": 635.4364013671875,
      "completions/mean_terminated_length": 540.1914672851562,
      "completions/min_length": 130.0,
      "completions/min_terminated_length": 130.0,
      "epoch": 1.3545189504373178,
      "grad_norm": 0.13555040955543518,
      "learning_rate": 1e-06,
      "loss": 0.0045,
      "num_tokens": 84844828.0,
      "reward": 0.629464328289032,
      "reward_std": 0.2072654515504837,
      "rewards/verify_math_reward/mean": 0.6294642686843872,
      "rewards/verify_math_reward/std": 0.4832179844379425,
      "step": 145
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.024553571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2974.0,
      "completions/mean_length": 657.4631958007812,
      "completions/mean_terminated_length": 570.9096069335938,
      "completions/min_length": 117.0,
      "completions/min_terminated_length": 117.0,
      "epoch": 1.3638483965014578,
      "grad_norm": 0.1325100064277649,
      "learning_rate": 1e-06,
      "loss": 0.0042,
      "num_tokens": 85447187.0,
      "reward": 0.5625,
      "reward_std": 0.20921938121318817,
      "rewards/verify_math_reward/mean": 0.5625,
      "rewards/verify_math_reward/std": 0.49635544419288635,
      "step": 146
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.029017857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3577.0,
      "completions/mean_length": 678.9074096679688,
      "completions/mean_terminated_length": 576.787353515625,
      "completions/min_length": 110.0,
      "completions/min_terminated_length": 110.0,
      "epoch": 1.3731778425655976,
      "grad_norm": 0.13249272108078003,
      "learning_rate": 1e-06,
      "loss": 0.0106,
      "num_tokens": 86036600.0,
      "reward": 0.5390625,
      "reward_std": 0.2029387205839157,
      "rewards/verify_math_reward/mean": 0.5390625,
      "rewards/verify_math_reward/std": 0.4987502098083496,
      "step": 147
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.021205357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3334.0,
      "completions/mean_length": 663.9330444335938,
      "completions/mean_terminated_length": 589.5780639648438,
      "completions/min_length": 161.0,
      "completions/min_terminated_length": 161.0,
      "epoch": 1.3825072886297376,
      "grad_norm": 0.1341022551059723,
      "learning_rate": 1e-06,
      "loss": 0.0189,
      "num_tokens": 86637108.0,
      "reward": 0.5636160969734192,
      "reward_std": 0.24502448737621307,
      "rewards/verify_math_reward/mean": 0.5636160969734192,
      "rewards/verify_math_reward/std": 0.49621346592903137,
      "step": 148
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2786.0,
      "completions/mean_length": 613.3303833007812,
      "completions/mean_terminated_length": 533.8173217773438,
      "completions/min_length": 151.0,
      "completions/min_terminated_length": 151.0,
      "epoch": 1.3918367346938776,
      "grad_norm": 0.1319631189107895,
      "learning_rate": 1e-06,
      "loss": 0.0151,
      "num_tokens": 87189356.0,
      "reward": 0.582589328289032,
      "reward_std": 0.1983163207769394,
      "rewards/verify_math_reward/mean": 0.5825892686843872,
      "rewards/verify_math_reward/std": 0.4934072494506836,
      "step": 149
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2903.0,
      "completions/mean_length": 597.7578125,
      "completions/mean_terminated_length": 550.2703857421875,
      "completions/min_length": 157.0,
      "completions/min_terminated_length": 157.0,
      "epoch": 1.4011661807580174,
      "grad_norm": 0.11776057630777359,
      "learning_rate": 1e-06,
      "loss": -0.0026,
      "num_tokens": 87772051.0,
      "reward": 0.6361607313156128,
      "reward_std": 0.16224895417690277,
      "rewards/verify_math_reward/mean": 0.6361607313156128,
      "rewards/verify_math_reward/std": 0.4813718795776367,
      "step": 150
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.021205357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3784.0,
      "completions/mean_length": 643.935302734375,
      "completions/mean_terminated_length": 569.1470947265625,
      "completions/min_length": 133.0,
      "completions/min_terminated_length": 133.0,
      "epoch": 1.4104956268221573,
      "grad_norm": 0.125766783952713,
      "learning_rate": 1e-06,
      "loss": 0.0211,
      "num_tokens": 88368393.0,
      "reward": 0.5558035969734192,
      "reward_std": 0.1846805065870285,
      "rewards/verify_math_reward/mean": 0.5558035969734192,
      "rewards/verify_math_reward/std": 0.49715372920036316,
      "step": 151
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2478.0,
      "completions/mean_length": 611.4464721679688,
      "completions/mean_terminated_length": 531.890380859375,
      "completions/min_length": 123.0,
      "completions/min_terminated_length": 123.0,
      "epoch": 1.4198250728862973,
      "grad_norm": 0.14089730381965637,
      "learning_rate": 1e-06,
      "loss": 0.0136,
      "num_tokens": 88918809.0,
      "reward": 0.6551339626312256,
      "reward_std": 0.20252712070941925,
      "rewards/verify_math_reward/mean": 0.6551339030265808,
      "rewards/verify_math_reward/std": 0.4755900502204895,
      "step": 152
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2443.0,
      "completions/mean_length": 591.8225708007812,
      "completions/mean_terminated_length": 544.2545776367188,
      "completions/min_length": 145.0,
      "completions/min_terminated_length": 145.0,
      "epoch": 1.4291545189504373,
      "grad_norm": 0.12657928466796875,
      "learning_rate": 1e-06,
      "loss": -0.0097,
      "num_tokens": 89489154.0,
      "reward": 0.5814732313156128,
      "reward_std": 0.18768639862537384,
      "rewards/verify_math_reward/mean": 0.5814732313156128,
      "rewards/verify_math_reward/std": 0.4935929775238037,
      "step": 153
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2491.0,
      "completions/mean_length": 605.3995971679688,
      "completions/mean_terminated_length": 541.93408203125,
      "completions/min_length": 113.0,
      "completions/min_terminated_length": 113.0,
      "epoch": 1.4384839650145773,
      "grad_norm": 0.11196082085371017,
      "learning_rate": 1e-06,
      "loss": 0.0126,
      "num_tokens": 90048584.0,
      "reward": 0.613839328289032,
      "reward_std": 0.1641671359539032,
      "rewards/verify_math_reward/mean": 0.6138392686843872,
      "rewards/verify_math_reward/std": 0.48714008927345276,
      "step": 154
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4092.0,
      "completions/mean_length": 565.0457763671875,
      "completions/mean_terminated_length": 521.158203125,
      "completions/min_length": 127.0,
      "completions/min_terminated_length": 127.0,
      "epoch": 1.4478134110787173,
      "grad_norm": 0.1285495012998581,
      "learning_rate": 1e-06,
      "loss": 0.006,
      "num_tokens": 90594137.0,
      "reward": 0.6417410969734192,
      "reward_std": 0.19358547031879425,
      "rewards/verify_math_reward/mean": 0.6417410969734192,
      "rewards/verify_math_reward/std": 0.47975656390190125,
      "step": 155
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4058.0,
      "completions/mean_length": 587.7210083007812,
      "completions/mean_terminated_length": 540.0972900390625,
      "completions/min_length": 153.0,
      "completions/min_terminated_length": 153.0,
      "epoch": 1.457142857142857,
      "grad_norm": 0.1286974847316742,
      "learning_rate": 1e-06,
      "loss": 0.0104,
      "num_tokens": 91160887.0,
      "reward": 0.5948660969734192,
      "reward_std": 0.20106007158756256,
      "rewards/verify_math_reward/mean": 0.5948660969734192,
      "rewards/verify_math_reward/std": 0.49119213223457336,
      "step": 156
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.021205357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3553.0,
      "completions/mean_length": 668.7020263671875,
      "completions/mean_terminated_length": 594.4503784179688,
      "completions/min_length": 132.0,
      "completions/min_terminated_length": 132.0,
      "epoch": 1.466472303206997,
      "grad_norm": 0.1116761788725853,
      "learning_rate": 1e-06,
      "loss": -0.0036,
      "num_tokens": 91773724.0,
      "reward": 0.5412946939468384,
      "reward_std": 0.18829120695590973,
      "rewards/verify_math_reward/mean": 0.5412946343421936,
      "rewards/verify_math_reward/std": 0.49857014417648315,
      "step": 157
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3258.0,
      "completions/mean_length": 657.099365234375,
      "completions/mean_terminated_length": 578.5855712890625,
      "completions/min_length": 150.0,
      "completions/min_terminated_length": 150.0,
      "epoch": 1.475801749271137,
      "grad_norm": 0.12376651912927628,
      "learning_rate": 1e-06,
      "loss": 0.008,
      "num_tokens": 92371421.0,
      "reward": 0.5546875,
      "reward_std": 0.1896056979894638,
      "rewards/verify_math_reward/mean": 0.5546875,
      "rewards/verify_math_reward/std": 0.4972778558731079,
      "step": 158
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.021205357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3460.0,
      "completions/mean_length": 658.6517944335938,
      "completions/mean_terminated_length": 584.1824340820312,
      "completions/min_length": 133.0,
      "completions/min_terminated_length": 133.0,
      "epoch": 1.485131195335277,
      "grad_norm": 0.12960033118724823,
      "learning_rate": 1e-06,
      "loss": -0.0033,
      "num_tokens": 92962293.0,
      "reward": 0.598214328289032,
      "reward_std": 0.18077604472637177,
      "rewards/verify_math_reward/mean": 0.5982142686843872,
      "rewards/verify_math_reward/std": 0.49053287506103516,
      "step": 159
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3950.0,
      "completions/mean_length": 625.9453125,
      "completions/mean_terminated_length": 582.814697265625,
      "completions/min_length": 122.0,
      "completions/min_terminated_length": 122.0,
      "epoch": 1.4944606413994168,
      "grad_norm": 0.11636026948690414,
      "learning_rate": 1e-06,
      "loss": 0.0016,
      "num_tokens": 93568188.0,
      "reward": 0.6116071939468384,
      "reward_std": 0.19178421795368195,
      "rewards/verify_math_reward/mean": 0.6116071343421936,
      "rewards/verify_math_reward/std": 0.4876568913459778,
      "step": 160
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0267857142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3769.0,
      "completions/mean_length": 677.3683471679688,
      "completions/mean_terminated_length": 583.2775268554688,
      "completions/min_length": 113.0,
      "completions/min_terminated_length": 113.0,
      "epoch": 1.5037900874635568,
      "grad_norm": 0.12696631252765656,
      "learning_rate": 1e-06,
      "loss": -0.0043,
      "num_tokens": 94159198.0,
      "reward": 0.606026828289032,
      "reward_std": 0.20361904799938202,
      "rewards/verify_math_reward/mean": 0.6060267686843872,
      "rewards/verify_math_reward/std": 0.48890191316604614,
      "step": 161
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.021205357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3322.0,
      "completions/mean_length": 651.8348388671875,
      "completions/mean_terminated_length": 577.2177734375,
      "completions/min_length": 84.0,
      "completions/min_terminated_length": 84.0,
      "epoch": 1.5131195335276968,
      "grad_norm": 0.12342068552970886,
      "learning_rate": 1e-06,
      "loss": 0.0034,
      "num_tokens": 94760858.0,
      "reward": 0.543526828289032,
      "reward_std": 0.19497555494308472,
      "rewards/verify_math_reward/mean": 0.5435267686843872,
      "rewards/verify_math_reward/std": 0.49838000535964966,
      "step": 162
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0234375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2867.0,
      "completions/mean_length": 660.2600708007812,
      "completions/mean_terminated_length": 577.8023071289062,
      "completions/min_length": 163.0,
      "completions/min_terminated_length": 163.0,
      "epoch": 1.5224489795918368,
      "grad_norm": 0.12338287383317947,
      "learning_rate": 1e-06,
      "loss": 0.0085,
      "num_tokens": 95368051.0,
      "reward": 0.5078125,
      "reward_std": 0.2057993859052658,
      "rewards/verify_math_reward/mean": 0.5078125,
      "rewards/verify_math_reward/std": 0.5002182126045227,
      "step": 163
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2217.0,
      "completions/mean_length": 606.5949096679688,
      "completions/mean_terminated_length": 539.1091918945312,
      "completions/min_length": 147.0,
      "completions/min_terminated_length": 147.0,
      "epoch": 1.5317784256559768,
      "grad_norm": 0.11960810422897339,
      "learning_rate": 1e-06,
      "loss": 0.0062,
      "num_tokens": 95935416.0,
      "reward": 0.5625,
      "reward_std": 0.1624344140291214,
      "rewards/verify_math_reward/mean": 0.5625,
      "rewards/verify_math_reward/std": 0.49635544419288635,
      "step": 164
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2296.0,
      "completions/mean_length": 649.0111694335938,
      "completions/mean_terminated_length": 594.2970581054688,
      "completions/min_length": 127.0,
      "completions/min_terminated_length": 127.0,
      "epoch": 1.5411078717201168,
      "grad_norm": 0.12979981303215027,
      "learning_rate": 1e-06,
      "loss": 0.0009,
      "num_tokens": 96551474.0,
      "reward": 0.5491071939468384,
      "reward_std": 0.1957344114780426,
      "rewards/verify_math_reward/mean": 0.5491071343421936,
      "rewards/verify_math_reward/std": 0.49786055088043213,
      "step": 165
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3962.0,
      "completions/mean_length": 561.068115234375,
      "completions/mean_terminated_length": 521.1704711914062,
      "completions/min_length": 147.0,
      "completions/min_terminated_length": 147.0,
      "epoch": 1.5504373177842565,
      "grad_norm": 0.12584321200847626,
      "learning_rate": 1e-06,
      "loss": 0.0014,
      "num_tokens": 97106791.0,
      "reward": 0.5792410969734192,
      "reward_std": 0.16904886066913605,
      "rewards/verify_math_reward/mean": 0.5792410969734192,
      "rewards/verify_math_reward/std": 0.49395665526390076,
      "step": 166
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2662.0,
      "completions/mean_length": 604.646240234375,
      "completions/mean_terminated_length": 557.2522583007812,
      "completions/min_length": 141.0,
      "completions/min_terminated_length": 141.0,
      "epoch": 1.5597667638483965,
      "grad_norm": 0.13442009687423706,
      "learning_rate": 1e-06,
      "loss": 0.0044,
      "num_tokens": 97682306.0,
      "reward": 0.6082589626312256,
      "reward_std": 0.22616049647331238,
      "rewards/verify_math_reward/mean": 0.6082589030265808,
      "rewards/verify_math_reward/std": 0.4884119927883148,
      "step": 167
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4054.0,
      "completions/mean_length": 690.2366333007812,
      "completions/mean_terminated_length": 612.4794311523438,
      "completions/min_length": 134.0,
      "completions/min_terminated_length": 134.0,
      "epoch": 1.5690962099125363,
      "grad_norm": 0.10918771475553513,
      "learning_rate": 1e-06,
      "loss": -0.0092,
      "num_tokens": 98303182.0,
      "reward": 0.5736607313156128,
      "reward_std": 0.16732457280158997,
      "rewards/verify_math_reward/mean": 0.5736607313156128,
      "rewards/verify_math_reward/std": 0.4948205351829529,
      "step": 168
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.024553571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3861.0,
      "completions/mean_length": 645.9799194335938,
      "completions/mean_terminated_length": 559.1372680664062,
      "completions/min_length": 140.0,
      "completions/min_terminated_length": 140.0,
      "epoch": 1.5784256559766763,
      "grad_norm": 0.13866642117500305,
      "learning_rate": 1e-06,
      "loss": -0.0091,
      "num_tokens": 98879668.0,
      "reward": 0.6071428656578064,
      "reward_std": 0.20290479063987732,
      "rewards/verify_math_reward/mean": 0.6071428656578064,
      "rewards/verify_math_reward/std": 0.48865827918052673,
      "step": 169
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3954.0,
      "completions/mean_length": 650.765625,
      "completions/mean_terminated_length": 596.079345703125,
      "completions/min_length": 128.0,
      "completions/min_terminated_length": 128.0,
      "epoch": 1.5877551020408163,
      "grad_norm": 0.12869887053966522,
      "learning_rate": 1e-06,
      "loss": 0.0034,
      "num_tokens": 99494578.0,
      "reward": 0.609375,
      "reward_std": 0.2069612294435501,
      "rewards/verify_math_reward/mean": 0.609375,
      "rewards/verify_math_reward/std": 0.48816296458244324,
      "step": 170
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0234375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3807.0,
      "completions/mean_length": 655.984375,
      "completions/mean_terminated_length": 573.4240112304688,
      "completions/min_length": 140.0,
      "completions/min_terminated_length": 140.0,
      "epoch": 1.5970845481049563,
      "grad_norm": 0.1265682876110077,
      "learning_rate": 1e-06,
      "loss": -0.0008,
      "num_tokens": 100087412.0,
      "reward": 0.5602678656578064,
      "reward_std": 0.20218871533870697,
      "rewards/verify_math_reward/mean": 0.5602678656578064,
      "rewards/verify_math_reward/std": 0.4966317415237427,
      "step": 171
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3613.0,
      "completions/mean_length": 645.5424194335938,
      "completions/mean_terminated_length": 590.7732543945312,
      "completions/min_length": 150.0,
      "completions/min_terminated_length": 150.0,
      "epoch": 1.6064139941690962,
      "grad_norm": 0.1304820328950882,
      "learning_rate": 1e-06,
      "loss": 0.0132,
      "num_tokens": 100700226.0,
      "reward": 0.6194196939468384,
      "reward_std": 0.22792786359786987,
      "rewards/verify_math_reward/mean": 0.6194196343421936,
      "rewards/verify_math_reward/std": 0.48580074310302734,
      "step": 172
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0267857142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3721.0,
      "completions/mean_length": 650.1428833007812,
      "completions/mean_terminated_length": 555.302734375,
      "completions/min_length": 152.0,
      "completions/min_terminated_length": 152.0,
      "epoch": 1.6157434402332362,
      "grad_norm": 0.13903263211250305,
      "learning_rate": 1e-06,
      "loss": -0.0067,
      "num_tokens": 101264898.0,
      "reward": 0.6127232313156128,
      "reward_std": 0.20933659374713898,
      "rewards/verify_math_reward/mean": 0.6127232313156128,
      "rewards/verify_math_reward/std": 0.4873998463153839,
      "step": 173
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3639.0,
      "completions/mean_length": 602.0435791015625,
      "completions/mean_terminated_length": 546.5839233398438,
      "completions/min_length": 108.0,
      "completions/min_terminated_length": 108.0,
      "epoch": 1.6250728862973762,
      "grad_norm": 0.1289028376340866,
      "learning_rate": 1e-06,
      "loss": 0.0141,
      "num_tokens": 101837297.0,
      "reward": 0.59375,
      "reward_std": 0.18400652706623077,
      "rewards/verify_math_reward/mean": 0.59375,
      "rewards/verify_math_reward/std": 0.4914066195487976,
      "step": 174
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3603.0,
      "completions/mean_length": 646.989990234375,
      "completions/mean_terminated_length": 592.2437744140625,
      "completions/min_length": 139.0,
      "completions/min_terminated_length": 139.0,
      "epoch": 1.634402332361516,
      "grad_norm": 0.11323468387126923,
      "learning_rate": 1e-06,
      "loss": 0.0092,
      "num_tokens": 102468024.0,
      "reward": 0.5491071939468384,
      "reward_std": 0.17284639179706573,
      "rewards/verify_math_reward/mean": 0.5491071343421936,
      "rewards/verify_math_reward/std": 0.49786055088043213,
      "step": 175
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2894.0,
      "completions/mean_length": 644.4330444335938,
      "completions/mean_terminated_length": 601.5322265625,
      "completions/min_length": 165.0,
      "completions/min_terminated_length": 165.0,
      "epoch": 1.643731778425656,
      "grad_norm": 0.1305655539035797,
      "learning_rate": 1e-06,
      "loss": 0.0017,
      "num_tokens": 103090684.0,
      "reward": 0.5814732313156128,
      "reward_std": 0.21876651048660278,
      "rewards/verify_math_reward/mean": 0.5814732313156128,
      "rewards/verify_math_reward/std": 0.4935929775238037,
      "step": 176
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2166.0,
      "completions/mean_length": 627.2902221679688,
      "completions/mean_terminated_length": 568.2315673828125,
      "completions/min_length": 139.0,
      "completions/min_terminated_length": 139.0,
      "epoch": 1.6530612244897958,
      "grad_norm": 0.14653432369232178,
      "learning_rate": 1e-06,
      "loss": 0.0105,
      "num_tokens": 103681912.0,
      "reward": 0.551339328289032,
      "reward_std": 0.19617947936058044,
      "rewards/verify_math_reward/mean": 0.5513392686843872,
      "rewards/verify_math_reward/std": 0.4976350665092468,
      "step": 177
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0200892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2545.0,
      "completions/mean_length": 616.9732666015625,
      "completions/mean_terminated_length": 545.6492309570312,
      "completions/min_length": 135.0,
      "completions/min_terminated_length": 135.0,
      "epoch": 1.6623906705539357,
      "grad_norm": 0.1253194659948349,
      "learning_rate": 1e-06,
      "loss": 0.0071,
      "num_tokens": 104244448.0,
      "reward": 0.6316964626312256,
      "reward_std": 0.19009242951869965,
      "rewards/verify_math_reward/mean": 0.6316964030265808,
      "rewards/verify_math_reward/std": 0.4826137125492096,
      "step": 178
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0267857142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2242.0,
      "completions/mean_length": 634.7545166015625,
      "completions/mean_terminated_length": 539.4907836914062,
      "completions/min_length": 115.0,
      "completions/min_terminated_length": 115.0,
      "epoch": 1.6717201166180757,
      "grad_norm": 0.12943314015865326,
      "learning_rate": 1e-06,
      "loss": -0.0054,
      "num_tokens": 104797068.0,
      "reward": 0.5881696939468384,
      "reward_std": 0.19813157618045807,
      "rewards/verify_math_reward/mean": 0.5881696343421936,
      "rewards/verify_math_reward/std": 0.4924395978450775,
      "step": 179
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0267857142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3817.0,
      "completions/mean_length": 629.8092041015625,
      "completions/mean_terminated_length": 534.4093627929688,
      "completions/min_length": 142.0,
      "completions/min_terminated_length": 142.0,
      "epoch": 1.6810495626822157,
      "grad_norm": 0.12720946967601776,
      "learning_rate": 1e-06,
      "loss": -0.0101,
      "num_tokens": 105349977.0,
      "reward": 0.6049107313156128,
      "reward_std": 0.18771639466285706,
      "rewards/verify_math_reward/mean": 0.6049107313156128,
      "rewards/verify_math_reward/std": 0.48914292454719543,
      "step": 180
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.025669642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4078.0,
      "completions/mean_length": 647.8828125,
      "completions/mean_terminated_length": 557.0390014648438,
      "completions/min_length": 137.0,
      "completions/min_terminated_length": 137.0,
      "epoch": 1.6903790087463557,
      "grad_norm": 0.13440246880054474,
      "learning_rate": 1e-06,
      "loss": 0.0071,
      "num_tokens": 105919120.0,
      "reward": 0.6339285969734192,
      "reward_std": 0.19730742275714874,
      "rewards/verify_math_reward/mean": 0.6339285969734192,
      "rewards/verify_math_reward/std": 0.48199838399887085,
      "step": 181
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.03125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3583.0,
      "completions/mean_length": 728.0826416015625,
      "completions/mean_terminated_length": 619.4400634765625,
      "completions/min_length": 146.0,
      "completions/min_terminated_length": 146.0,
      "epoch": 1.6997084548104957,
      "grad_norm": 0.11702921986579895,
      "learning_rate": 1e-06,
      "loss": 0.0027,
      "num_tokens": 106561298.0,
      "reward": 0.5055803656578064,
      "reward_std": 0.17870266735553741,
      "rewards/verify_math_reward/mean": 0.5055803656578064,
      "rewards/verify_math_reward/std": 0.5002480745315552,
      "step": 182
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3089.0,
      "completions/mean_length": 538.7433471679688,
      "completions/mean_terminated_length": 498.59368896484375,
      "completions/min_length": 137.0,
      "completions/min_terminated_length": 137.0,
      "epoch": 1.7090379008746357,
      "grad_norm": 0.15399344265460968,
      "learning_rate": 1e-06,
      "loss": 0.0199,
      "num_tokens": 107098828.0,
      "reward": 0.637276828289032,
      "reward_std": 0.20012785494327545,
      "rewards/verify_math_reward/mean": 0.6372767686843872,
      "rewards/verify_math_reward/std": 0.481054425239563,
      "step": 183
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.024553571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3040.0,
      "completions/mean_length": 675.8214721679688,
      "completions/mean_terminated_length": 589.72998046875,
      "completions/min_length": 144.0,
      "completions/min_terminated_length": 144.0,
      "epoch": 1.7183673469387755,
      "grad_norm": 0.13055773079395294,
      "learning_rate": 1e-06,
      "loss": -0.0042,
      "num_tokens": 107707100.0,
      "reward": 0.5870535969734192,
      "reward_std": 0.1916026771068573,
      "rewards/verify_math_reward/mean": 0.5870535969734192,
      "rewards/verify_math_reward/std": 0.49263834953308105,
      "step": 184
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0200892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2774.0,
      "completions/mean_length": 665.2221069335938,
      "completions/mean_terminated_length": 594.8872680664062,
      "completions/min_length": 126.0,
      "completions/min_terminated_length": 126.0,
      "epoch": 1.7276967930029155,
      "grad_norm": 0.10972083359956741,
      "learning_rate": 1e-06,
      "loss": 0.0023,
      "num_tokens": 108315195.0,
      "reward": 0.5602678656578064,
      "reward_std": 0.1711532026529312,
      "rewards/verify_math_reward/mean": 0.5602678656578064,
      "rewards/verify_math_reward/std": 0.4966317117214203,
      "step": 185
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2784.0,
      "completions/mean_length": 629.7578125,
      "completions/mean_terminated_length": 570.7412109375,
      "completions/min_length": 108.0,
      "completions/min_terminated_length": 108.0,
      "epoch": 1.7370262390670554,
      "grad_norm": 0.13642588257789612,
      "learning_rate": 1e-06,
      "loss": 0.0118,
      "num_tokens": 108909866.0,
      "reward": 0.5323660969734192,
      "reward_std": 0.21319982409477234,
      "rewards/verify_math_reward/mean": 0.5323660969734192,
      "rewards/verify_math_reward/std": 0.4992299973964691,
      "step": 186
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2607.0,
      "completions/mean_length": 567.2142944335938,
      "completions/mean_terminated_length": 519.312255859375,
      "completions/min_length": 112.0,
      "completions/min_terminated_length": 112.0,
      "epoch": 1.7463556851311952,
      "grad_norm": 0.13879670202732086,
      "learning_rate": 1e-06,
      "loss": 0.0015,
      "num_tokens": 109456682.0,
      "reward": 0.6662946939468384,
      "reward_std": 0.18118861317634583,
      "rewards/verify_math_reward/mean": 0.6662946343421936,
      "rewards/verify_math_reward/std": 0.47179922461509705,
      "step": 187
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3396.0,
      "completions/mean_length": 672.8705444335938,
      "completions/mean_terminated_length": 606.6666259765625,
      "completions/min_length": 97.0,
      "completions/min_terminated_length": 97.0,
      "epoch": 1.7556851311953352,
      "grad_norm": 0.12180299311876297,
      "learning_rate": 1e-06,
      "loss": -0.0007,
      "num_tokens": 110080958.0,
      "reward": 0.5870535969734192,
      "reward_std": 0.1836727410554886,
      "rewards/verify_math_reward/mean": 0.5870535969734192,
      "rewards/verify_math_reward/std": 0.49263837933540344,
      "step": 188
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.030133928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3343.0,
      "completions/mean_length": 677.661865234375,
      "completions/mean_terminated_length": 571.453369140625,
      "completions/min_length": 130.0,
      "completions/min_terminated_length": 130.0,
      "epoch": 1.7650145772594752,
      "grad_norm": 0.15158618986606598,
      "learning_rate": 1e-06,
      "loss": -0.0238,
      "num_tokens": 110674239.0,
      "reward": 0.609375,
      "reward_std": 0.20200437307357788,
      "rewards/verify_math_reward/mean": 0.609375,
      "rewards/verify_math_reward/std": 0.48816296458244324,
      "step": 189
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.030133928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3283.0,
      "completions/mean_length": 710.3270263671875,
      "completions/mean_terminated_length": 605.1334838867188,
      "completions/min_length": 117.0,
      "completions/min_terminated_length": 117.0,
      "epoch": 1.7743440233236152,
      "grad_norm": 0.12813588976860046,
      "learning_rate": 1e-06,
      "loss": 0.0102,
      "num_tokens": 111284884.0,
      "reward": 0.5814732313156128,
      "reward_std": 0.2077547013759613,
      "rewards/verify_math_reward/mean": 0.5814732313156128,
      "rewards/verify_math_reward/std": 0.4935929775238037,
      "step": 190
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3707.0,
      "completions/mean_length": 588.7355346679688,
      "completions/mean_terminated_length": 537.0996704101562,
      "completions/min_length": 140.0,
      "completions/min_terminated_length": 140.0,
      "epoch": 1.7836734693877552,
      "grad_norm": 0.12251389771699905,
      "learning_rate": 1e-06,
      "loss": -0.0182,
      "num_tokens": 111849495.0,
      "reward": 0.6238839626312256,
      "reward_std": 0.14789608120918274,
      "rewards/verify_math_reward/mean": 0.6238839030265808,
      "rewards/verify_math_reward/std": 0.48468026518821716,
      "step": 191
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.021205357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2250.0,
      "completions/mean_length": 640.328125,
      "completions/mean_terminated_length": 565.4617919921875,
      "completions/min_length": 135.0,
      "completions/min_terminated_length": 135.0,
      "epoch": 1.7930029154518952,
      "grad_norm": 0.14359082281589508,
      "learning_rate": 1e-06,
      "loss": 0.0146,
      "num_tokens": 112429053.0,
      "reward": 0.6160714626312256,
      "reward_std": 0.21830935776233673,
      "rewards/verify_math_reward/mean": 0.6160714030265808,
      "rewards/verify_math_reward/std": 0.486612468957901,
      "step": 192
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0267857142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4041.0,
      "completions/mean_length": 700.6975708007812,
      "completions/mean_terminated_length": 607.2488403320312,
      "completions/min_length": 148.0,
      "completions/min_terminated_length": 148.0,
      "epoch": 1.802332361516035,
      "grad_norm": 0.1354796439409256,
      "learning_rate": 1e-06,
      "loss": -0.0025,
      "num_tokens": 113051070.0,
      "reward": 0.5502232313156128,
      "reward_std": 0.16180570423603058,
      "rewards/verify_math_reward/mean": 0.5502232313156128,
      "rewards/verify_math_reward/std": 0.49774909019470215,
      "step": 193
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0200892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4077.0,
      "completions/mean_length": 636.5826416015625,
      "completions/mean_terminated_length": 565.66064453125,
      "completions/min_length": 124.0,
      "completions/min_terminated_length": 124.0,
      "epoch": 1.811661807580175,
      "grad_norm": 0.13040165603160858,
      "learning_rate": 1e-06,
      "loss": 0.0081,
      "num_tokens": 113633176.0,
      "reward": 0.6116071939468384,
      "reward_std": 0.18179410696029663,
      "rewards/verify_math_reward/mean": 0.6116071343421936,
      "rewards/verify_math_reward/std": 0.48765692114830017,
      "step": 194
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3589.0,
      "completions/mean_length": 594.7522583007812,
      "completions/mean_terminated_length": 547.2239990234375,
      "completions/min_length": 128.0,
      "completions/min_terminated_length": 128.0,
      "epoch": 1.820991253644315,
      "grad_norm": 0.13067464530467987,
      "learning_rate": 1e-06,
      "loss": -0.0096,
      "num_tokens": 114201306.0,
      "reward": 0.5926339626312256,
      "reward_std": 0.1868947446346283,
      "rewards/verify_math_reward/mean": 0.5926339030265808,
      "rewards/verify_math_reward/std": 0.49161845445632935,
      "step": 195
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.021205357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3883.0,
      "completions/mean_length": 653.5803833007812,
      "completions/mean_terminated_length": 579.0010986328125,
      "completions/min_length": 143.0,
      "completions/min_terminated_length": 143.0,
      "epoch": 1.8303206997084547,
      "grad_norm": 0.12206802517175674,
      "learning_rate": 1e-06,
      "loss": -0.0098,
      "num_tokens": 114791962.0,
      "reward": 0.6316964626312256,
      "reward_std": 0.19723325967788696,
      "rewards/verify_math_reward/mean": 0.6316964030265808,
      "rewards/verify_math_reward/std": 0.4826137125492096,
      "step": 196
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.030133928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2613.0,
      "completions/mean_length": 739.4866333007812,
      "completions/mean_terminated_length": 635.1990966796875,
      "completions/min_length": 175.0,
      "completions/min_terminated_length": 175.0,
      "epoch": 1.8396501457725947,
      "grad_norm": 0.12905940413475037,
      "learning_rate": 1e-06,
      "loss": -0.0063,
      "num_tokens": 115444758.0,
      "reward": 0.4520089626312256,
      "reward_std": 0.2306734323501587,
      "rewards/verify_math_reward/mean": 0.4520089328289032,
      "rewards/verify_math_reward/std": 0.49796947836875916,
      "step": 197
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0345982142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3602.0,
      "completions/mean_length": 688.4989013671875,
      "completions/mean_terminated_length": 566.3803100585938,
      "completions/min_length": 130.0,
      "completions/min_terminated_length": 130.0,
      "epoch": 1.8489795918367347,
      "grad_norm": 0.14780296385288239,
      "learning_rate": 1e-06,
      "loss": 0.0027,
      "num_tokens": 116030213.0,
      "reward": 0.5234375,
      "reward_std": 0.19797931611537933,
      "rewards/verify_math_reward/mean": 0.5234375,
      "rewards/verify_math_reward/std": 0.49972933530807495,
      "step": 198
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.029017857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3771.0,
      "completions/mean_length": 653.6272583007812,
      "completions/mean_terminated_length": 550.751708984375,
      "completions/min_length": 141.0,
      "completions/min_terminated_length": 141.0,
      "epoch": 1.8583090379008746,
      "grad_norm": 0.12445133179426193,
      "learning_rate": 1e-06,
      "loss": 0.0057,
      "num_tokens": 116589935.0,
      "reward": 0.566964328289032,
      "reward_std": 0.17103557288646698,
      "rewards/verify_math_reward/mean": 0.5669642686843872,
      "rewards/verify_math_reward/std": 0.49577224254608154,
      "step": 199
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.024553571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3527.0,
      "completions/mean_length": 665.0982666015625,
      "completions/mean_terminated_length": 578.73681640625,
      "completions/min_length": 111.0,
      "completions/min_terminated_length": 111.0,
      "epoch": 1.8676384839650146,
      "grad_norm": 0.11815723031759262,
      "learning_rate": 1e-06,
      "loss": 0.0007,
      "num_tokens": 117187143.0,
      "reward": 0.637276828289032,
      "reward_std": 0.15263791382312775,
      "rewards/verify_math_reward/mean": 0.6372767686843872,
      "rewards/verify_math_reward/std": 0.481054425239563,
      "step": 200
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.03125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3769.0,
      "completions/mean_length": 669.4285888671875,
      "completions/mean_terminated_length": 558.8939819335938,
      "completions/min_length": 129.0,
      "completions/min_terminated_length": 129.0,
      "epoch": 1.8769679300291546,
      "grad_norm": 0.14831843972206116,
      "learning_rate": 1e-06,
      "loss": 0.0008,
      "num_tokens": 117752879.0,
      "reward": 0.5814732313156128,
      "reward_std": 0.2200452983379364,
      "rewards/verify_math_reward/mean": 0.5814732313156128,
      "rewards/verify_math_reward/std": 0.4935929775238037,
      "step": 201
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.03125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4032.0,
      "completions/mean_length": 737.7188110351562,
      "completions/mean_terminated_length": 629.3870849609375,
      "completions/min_length": 142.0,
      "completions/min_terminated_length": 142.0,
      "epoch": 1.8862973760932946,
      "grad_norm": 0.12336394190788269,
      "learning_rate": 1e-06,
      "loss": -0.0024,
      "num_tokens": 118379267.0,
      "reward": 0.5680803656578064,
      "reward_std": 0.20985834300518036,
      "rewards/verify_math_reward/mean": 0.5680803656578064,
      "rewards/verify_math_reward/std": 0.4956200420856476,
      "step": 202
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0267857142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2685.0,
      "completions/mean_length": 672.3348388671875,
      "completions/mean_terminated_length": 578.10546875,
      "completions/min_length": 118.0,
      "completions/min_terminated_length": 118.0,
      "epoch": 1.8956268221574344,
      "grad_norm": 0.1384689062833786,
      "learning_rate": 1e-06,
      "loss": 0.0078,
      "num_tokens": 118975167.0,
      "reward": 0.5546875,
      "reward_std": 0.21891483664512634,
      "rewards/verify_math_reward/mean": 0.5546875,
      "rewards/verify_math_reward/std": 0.4972778558731079,
      "step": 203
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3466.0,
      "completions/mean_length": 637.8761596679688,
      "completions/mean_terminated_length": 598.8453979492188,
      "completions/min_length": 118.0,
      "completions/min_terminated_length": 118.0,
      "epoch": 1.9049562682215744,
      "grad_norm": 0.137050598859787,
      "learning_rate": 1e-06,
      "loss": 0.0073,
      "num_tokens": 119602696.0,
      "reward": 0.6116071939468384,
      "reward_std": 0.21756118535995483,
      "rewards/verify_math_reward/mean": 0.6116071343421936,
      "rewards/verify_math_reward/std": 0.48765692114830017,
      "step": 204
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0267857142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2670.0,
      "completions/mean_length": 690.0234985351562,
      "completions/mean_terminated_length": 596.2809448242188,
      "completions/min_length": 132.0,
      "completions/min_terminated_length": 132.0,
      "epoch": 1.9142857142857141,
      "grad_norm": 0.11969118565320969,
      "learning_rate": 1e-06,
      "loss": 0.0068,
      "num_tokens": 120208469.0,
      "reward": 0.5234375,
      "reward_std": 0.1738969385623932,
      "rewards/verify_math_reward/mean": 0.5234375,
      "rewards/verify_math_reward/std": 0.49972933530807495,
      "step": 205
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3551.0,
      "completions/mean_length": 695.3069458007812,
      "completions/mean_terminated_length": 637.4063720703125,
      "completions/min_length": 131.0,
      "completions/min_terminated_length": 131.0,
      "epoch": 1.9236151603498541,
      "grad_norm": 0.12369006127119064,
      "learning_rate": 1e-06,
      "loss": 0.0218,
      "num_tokens": 120849648.0,
      "reward": 0.5647321939468384,
      "reward_std": 0.18701057136058807,
      "rewards/verify_math_reward/mean": 0.5647321343421936,
      "rewards/verify_math_reward/std": 0.49606895446777344,
      "step": 206
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0200892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4088.0,
      "completions/mean_length": 658.7913208007812,
      "completions/mean_terminated_length": 588.3246459960938,
      "completions/min_length": 130.0,
      "completions/min_terminated_length": 130.0,
      "epoch": 1.9329446064139941,
      "grad_norm": 0.13501164317131042,
      "learning_rate": 1e-06,
      "loss": -0.0006,
      "num_tokens": 121459645.0,
      "reward": 0.613839328289032,
      "reward_std": 0.18873807787895203,
      "rewards/verify_math_reward/mean": 0.6138392686843872,
      "rewards/verify_math_reward/std": 0.48714008927345276,
      "step": 207
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4045.0,
      "completions/mean_length": 699.2801513671875,
      "completions/mean_terminated_length": 641.447265625,
      "completions/min_length": 163.0,
      "completions/min_terminated_length": 163.0,
      "epoch": 1.9422740524781341,
      "grad_norm": 0.13782964646816254,
      "learning_rate": 1e-06,
      "loss": 0.0111,
      "num_tokens": 122115752.0,
      "reward": 0.5457589626312256,
      "reward_std": 0.2335277497768402,
      "rewards/verify_math_reward/mean": 0.5457589030265808,
      "rewards/verify_math_reward/std": 0.4981797933578491,
      "step": 208
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4076.0,
      "completions/mean_length": 584.8817138671875,
      "completions/mean_terminated_length": 525.10107421875,
      "completions/min_length": 126.0,
      "completions/min_terminated_length": 126.0,
      "epoch": 1.951603498542274,
      "grad_norm": 0.1312982738018036,
      "learning_rate": 1e-06,
      "loss": 0.01,
      "num_tokens": 122663590.0,
      "reward": 0.6595982313156128,
      "reward_std": 0.17908497154712677,
      "rewards/verify_math_reward/mean": 0.6595982313156128,
      "rewards/verify_math_reward/std": 0.4741089344024658,
      "step": 209
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0200892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3579.0,
      "completions/mean_length": 621.958740234375,
      "completions/mean_terminated_length": 550.7369384765625,
      "completions/min_length": 127.0,
      "completions/min_terminated_length": 127.0,
      "epoch": 1.960932944606414,
      "grad_norm": 0.1384764015674591,
      "learning_rate": 1e-06,
      "loss": 0.0076,
      "num_tokens": 123229761.0,
      "reward": 0.6808035969734192,
      "reward_std": 0.16187915205955505,
      "rewards/verify_math_reward/mean": 0.6808035969734192,
      "rewards/verify_math_reward/std": 0.4664256274700165,
      "step": 210
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2659.0,
      "completions/mean_length": 607.6451416015625,
      "completions/mean_terminated_length": 568.2731323242188,
      "completions/min_length": 159.0,
      "completions/min_terminated_length": 159.0,
      "epoch": 1.970262390670554,
      "grad_norm": 0.13262683153152466,
      "learning_rate": 1e-06,
      "loss": 0.0041,
      "num_tokens": 123823171.0,
      "reward": 0.6149553656578064,
      "reward_std": 0.20038609206676483,
      "rewards/verify_math_reward/mean": 0.6149553656578064,
      "rewards/verify_math_reward/std": 0.4868776500225067,
      "step": 211
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0279017857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3672.0,
      "completions/mean_length": 634.1652221679688,
      "completions/mean_terminated_length": 534.8013916015625,
      "completions/min_length": 135.0,
      "completions/min_terminated_length": 135.0,
      "epoch": 1.9795918367346939,
      "grad_norm": 0.12873639166355133,
      "learning_rate": 1e-06,
      "loss": -0.0015,
      "num_tokens": 124373519.0,
      "reward": 0.6171875,
      "reward_std": 0.18344198167324066,
      "rewards/verify_math_reward/mean": 0.6171875,
      "rewards/verify_math_reward/std": 0.4863446056842804,
      "step": 212
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.021205357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2938.0,
      "completions/mean_length": 606.8125,
      "completions/mean_terminated_length": 531.2200317382812,
      "completions/min_length": 141.0,
      "completions/min_terminated_length": 141.0,
      "epoch": 1.9889212827988338,
      "grad_norm": 0.12236711382865906,
      "learning_rate": 1e-06,
      "loss": 0.0205,
      "num_tokens": 124932151.0,
      "reward": 0.6316964626312256,
      "reward_std": 0.16930918395519257,
      "rewards/verify_math_reward/mean": 0.6316964030265808,
      "rewards/verify_math_reward/std": 0.4826137125492096,
      "step": 213
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.014204545454545414,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3598.0,
      "completions/mean_length": 618.5653686523438,
      "completions/mean_terminated_length": 568.4581909179688,
      "completions/min_length": 165.0,
      "completions/min_terminated_length": 165.0,
      "epoch": 1.9982507288629736,
      "grad_norm": 0.11970631778240204,
      "learning_rate": 1e-06,
      "loss": 0.0103,
      "num_tokens": 125525642.0,
      "reward": 0.6484375,
      "reward_std": 0.1632571667432785,
      "rewards/verify_math_reward/mean": 0.6484375,
      "rewards/verify_math_reward/std": 0.4777248501777649,
      "step": 214
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0267857142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3812.0,
      "completions/mean_length": 659.1395263671875,
      "completions/mean_terminated_length": 564.5469970703125,
      "completions/min_length": 81.0,
      "completions/min_terminated_length": 81.0,
      "epoch": 2.00932944606414,
      "grad_norm": 0.13896454870700836,
      "learning_rate": 1e-06,
      "loss": -0.0104,
      "num_tokens": 126106415.0,
      "reward": 0.606026828289032,
      "reward_std": 0.2027532458305359,
      "rewards/verify_math_reward/mean": 0.6060267686843872,
      "rewards/verify_math_reward/std": 0.48890194296836853,
      "step": 215
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.036830357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3344.0,
      "completions/mean_length": 714.7076416015625,
      "completions/mean_terminated_length": 585.4113159179688,
      "completions/min_length": 153.0,
      "completions/min_terminated_length": 153.0,
      "epoch": 2.01865889212828,
      "grad_norm": 0.1265566051006317,
      "learning_rate": 1e-06,
      "loss": -0.0014,
      "num_tokens": 126701305.0,
      "reward": 0.5725446939468384,
      "reward_std": 0.17724093794822693,
      "rewards/verify_math_reward/mean": 0.5725446343421936,
      "rewards/verify_math_reward/std": 0.49498558044433594,
      "step": 216
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.033482142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3856.0,
      "completions/mean_length": 676.4375,
      "completions/mean_terminated_length": 557.9769287109375,
      "completions/min_length": 138.0,
      "completions/min_terminated_length": 138.0,
      "epoch": 2.02798833819242,
      "grad_norm": 0.1350981891155243,
      "learning_rate": 1e-06,
      "loss": 0.0013,
      "num_tokens": 127278209.0,
      "reward": 0.5881696939468384,
      "reward_std": 0.1869703084230423,
      "rewards/verify_math_reward/mean": 0.5881696343421936,
      "rewards/verify_math_reward/std": 0.4924396276473999,
      "step": 217
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.021205357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3798.0,
      "completions/mean_length": 707.5971069335938,
      "completions/mean_terminated_length": 634.1881103515625,
      "completions/min_length": 152.0,
      "completions/min_terminated_length": 152.0,
      "epoch": 2.03731778425656,
      "grad_norm": 0.12518104910850525,
      "learning_rate": 1e-06,
      "loss": -0.001,
      "num_tokens": 127925704.0,
      "reward": 0.546875,
      "reward_std": 0.18457287549972534,
      "rewards/verify_math_reward/mean": 0.546875,
      "rewards/verify_math_reward/std": 0.4980759024620056,
      "step": 218
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0279017857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3915.0,
      "completions/mean_length": 621.2767944335938,
      "completions/mean_terminated_length": 521.5430297851562,
      "completions/min_length": 146.0,
      "completions/min_terminated_length": 146.0,
      "epoch": 2.0466472303206995,
      "grad_norm": 0.1590966433286667,
      "learning_rate": 1e-06,
      "loss": 0.0036,
      "num_tokens": 128467312.0,
      "reward": 0.6707589626312256,
      "reward_std": 0.2203381508588791,
      "rewards/verify_math_reward/mean": 0.6707589030265808,
      "rewards/verify_math_reward/std": 0.4702001214027405,
      "step": 219
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0390625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3369.0,
      "completions/mean_length": 749.3795166015625,
      "completions/mean_terminated_length": 613.3379516601562,
      "completions/min_length": 136.0,
      "completions/min_terminated_length": 136.0,
      "epoch": 2.0559766763848395,
      "grad_norm": 0.13837730884552002,
      "learning_rate": 1e-06,
      "loss": 0.001,
      "num_tokens": 129089932.0,
      "reward": 0.551339328289032,
      "reward_std": 0.23375487327575684,
      "rewards/verify_math_reward/mean": 0.5513392686843872,
      "rewards/verify_math_reward/std": 0.4976350665092468,
      "step": 220
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.029017857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2578.0,
      "completions/mean_length": 677.390625,
      "completions/mean_terminated_length": 575.2252807617188,
      "completions/min_length": 126.0,
      "completions/min_terminated_length": 126.0,
      "epoch": 2.0653061224489795,
      "grad_norm": 0.11362986266613007,
      "learning_rate": 1e-06,
      "loss": -0.0022,
      "num_tokens": 129678290.0,
      "reward": 0.6183035969734192,
      "reward_std": 0.15895067155361176,
      "rewards/verify_math_reward/mean": 0.6183035969734192,
      "rewards/verify_math_reward/std": 0.4860740303993225,
      "step": 221
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3582.0,
      "completions/mean_length": 685.7377319335938,
      "completions/mean_terminated_length": 607.8778076171875,
      "completions/min_length": 127.0,
      "completions/min_terminated_length": 127.0,
      "epoch": 2.0746355685131195,
      "grad_norm": 0.13333557546138763,
      "learning_rate": 1e-06,
      "loss": 0.0065,
      "num_tokens": 130302439.0,
      "reward": 0.5926339626312256,
      "reward_std": 0.18701240420341492,
      "rewards/verify_math_reward/mean": 0.5926339030265808,
      "rewards/verify_math_reward/std": 0.49161845445632935,
      "step": 222
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0234375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2646.0,
      "completions/mean_length": 625.0670166015625,
      "completions/mean_terminated_length": 541.7645874023438,
      "completions/min_length": 133.0,
      "completions/min_terminated_length": 133.0,
      "epoch": 2.0839650145772595,
      "grad_norm": 0.14015834033489227,
      "learning_rate": 1e-06,
      "loss": -0.0212,
      "num_tokens": 130877603.0,
      "reward": 0.6573660969734192,
      "reward_std": 0.16661031544208527,
      "rewards/verify_math_reward/mean": 0.6573660969734192,
      "rewards/verify_math_reward/std": 0.47485533356666565,
      "step": 223
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.036830357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2983.0,
      "completions/mean_length": 700.3861694335938,
      "completions/mean_terminated_length": 570.5422973632812,
      "completions/min_length": 173.0,
      "completions/min_terminated_length": 173.0,
      "epoch": 2.0932944606413995,
      "grad_norm": 0.14076173305511475,
      "learning_rate": 1e-06,
      "loss": -0.0149,
      "num_tokens": 131455685.0,
      "reward": 0.6473214626312256,
      "reward_std": 0.1885526329278946,
      "rewards/verify_math_reward/mean": 0.6473214030265808,
      "rewards/verify_math_reward/std": 0.47807058691978455,
      "step": 224
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0323660714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2877.0,
      "completions/mean_length": 722.5111694335938,
      "completions/mean_terminated_length": 609.6724243164062,
      "completions/min_length": 166.0,
      "completions/min_terminated_length": 166.0,
      "epoch": 2.1026239067055394,
      "grad_norm": 0.13184206187725067,
      "learning_rate": 1e-06,
      "loss": 0.0054,
      "num_tokens": 132082679.0,
      "reward": 0.5323660969734192,
      "reward_std": 0.2120707631111145,
      "rewards/verify_math_reward/mean": 0.5323660969734192,
      "rewards/verify_math_reward/std": 0.4992299973964691,
      "step": 225
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.021205357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4046.0,
      "completions/mean_length": 702.5938110351562,
      "completions/mean_terminated_length": 629.0763549804688,
      "completions/min_length": 140.0,
      "completions/min_terminated_length": 140.0,
      "epoch": 2.1119533527696794,
      "grad_norm": 0.1343090981245041,
      "learning_rate": 1e-06,
      "loss": 0.0128,
      "num_tokens": 132726227.0,
      "reward": 0.6004464626312256,
      "reward_std": 0.19554010033607483,
      "rewards/verify_math_reward/mean": 0.6004464030265808,
      "rewards/verify_math_reward/std": 0.49008017778396606,
      "step": 226
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.021205357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2959.0,
      "completions/mean_length": 628.0960083007812,
      "completions/mean_terminated_length": 552.9646606445312,
      "completions/min_length": 162.0,
      "completions/min_terminated_length": 162.0,
      "epoch": 2.1212827988338194,
      "grad_norm": 0.1360645443201065,
      "learning_rate": 1e-06,
      "loss": 0.0035,
      "num_tokens": 133293929.0,
      "reward": 0.6506696939468384,
      "reward_std": 0.18118861317634583,
      "rewards/verify_math_reward/mean": 0.6506696343421936,
      "rewards/verify_math_reward/std": 0.47702476382255554,
      "step": 227
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1966.0,
      "completions/mean_length": 652.671875,
      "completions/mean_terminated_length": 574.0570678710938,
      "completions/min_length": 154.0,
      "completions/min_terminated_length": 154.0,
      "epoch": 2.130612244897959,
      "grad_norm": 0.13314826786518097,
      "learning_rate": 1e-06,
      "loss": 0.0052,
      "num_tokens": 133885211.0,
      "reward": 0.6283482313156128,
      "reward_std": 0.20380564033985138,
      "rewards/verify_math_reward/mean": 0.6283482313156128,
      "rewards/verify_math_reward/std": 0.4835159480571747,
      "step": 228
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0345982142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3639.0,
      "completions/mean_length": 776.6741333007812,
      "completions/mean_terminated_length": 657.715576171875,
      "completions/min_length": 205.0,
      "completions/min_terminated_length": 205.0,
      "epoch": 2.139941690962099,
      "grad_norm": 0.11701515316963196,
      "learning_rate": 1e-06,
      "loss": 0.0059,
      "num_tokens": 134549071.0,
      "reward": 0.574776828289032,
      "reward_std": 0.17813995480537415,
      "rewards/verify_math_reward/mean": 0.5747767686843872,
      "rewards/verify_math_reward/std": 0.49465295672416687,
      "step": 229
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.030133928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3908.0,
      "completions/mean_length": 645.9732666015625,
      "completions/mean_terminated_length": 538.7802124023438,
      "completions/min_length": 94.0,
      "completions/min_terminated_length": 94.0,
      "epoch": 2.149271137026239,
      "grad_norm": 0.14322766661643982,
      "learning_rate": 1e-06,
      "loss": -0.0087,
      "num_tokens": 135102863.0,
      "reward": 0.6316964626312256,
      "reward_std": 0.1775447428226471,
      "rewards/verify_math_reward/mean": 0.6316964030265808,
      "rewards/verify_math_reward/std": 0.482613742351532,
      "step": 230
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.025669642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3102.0,
      "completions/mean_length": 658.2980346679688,
      "completions/mean_terminated_length": 567.7285766601562,
      "completions/min_length": 126.0,
      "completions/min_terminated_length": 126.0,
      "epoch": 2.158600583090379,
      "grad_norm": 0.13096362352371216,
      "learning_rate": 1e-06,
      "loss": -0.0072,
      "num_tokens": 135688226.0,
      "reward": 0.5948660969734192,
      "reward_std": 0.16340941190719604,
      "rewards/verify_math_reward/mean": 0.5948660969734192,
      "rewards/verify_math_reward/std": 0.49119213223457336,
      "step": 231
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0435267857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3176.0,
      "completions/mean_length": 758.7299194335938,
      "completions/mean_terminated_length": 606.8588256835938,
      "completions/min_length": 154.0,
      "completions/min_terminated_length": 154.0,
      "epoch": 2.167930029154519,
      "grad_norm": 0.12270905077457428,
      "learning_rate": 1e-06,
      "loss": -0.0064,
      "num_tokens": 136298680.0,
      "reward": 0.5323660969734192,
      "reward_std": 0.16604506969451904,
      "rewards/verify_math_reward/mean": 0.5323660969734192,
      "rewards/verify_math_reward/std": 0.4992299973964691,
      "step": 232
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.030133928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3398.0,
      "completions/mean_length": 720.1719360351562,
      "completions/mean_terminated_length": 615.2842407226562,
      "completions/min_length": 144.0,
      "completions/min_terminated_length": 144.0,
      "epoch": 2.177259475218659,
      "grad_norm": 0.12233763188123703,
      "learning_rate": 1e-06,
      "loss": 0.0078,
      "num_tokens": 136924626.0,
      "reward": 0.578125,
      "reward_std": 0.17878004908561707,
      "rewards/verify_math_reward/mean": 0.578125,
      "rewards/verify_math_reward/std": 0.4941346049308777,
      "step": 233
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0234375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3202.0,
      "completions/mean_length": 639.75,
      "completions/mean_terminated_length": 556.7999877929688,
      "completions/min_length": 154.0,
      "completions/min_terminated_length": 154.0,
      "epoch": 2.186588921282799,
      "grad_norm": 0.13310506939888,
      "learning_rate": 1e-06,
      "loss": -0.0099,
      "num_tokens": 137519090.0,
      "reward": 0.5401785969734192,
      "reward_std": 0.2103782743215561,
      "rewards/verify_math_reward/mean": 0.5401785969734192,
      "rewards/verify_math_reward/std": 0.49866142868995667,
      "step": 234
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2326.0,
      "completions/mean_length": 583.7109375,
      "completions/mean_terminated_length": 536.0328369140625,
      "completions/min_length": 156.0,
      "completions/min_terminated_length": 156.0,
      "epoch": 2.195918367346939,
      "grad_norm": 0.13909703493118286,
      "learning_rate": 1e-06,
      "loss": 0.0191,
      "num_tokens": 138080239.0,
      "reward": 0.6383928656578064,
      "reward_std": 0.189639613032341,
      "rewards/verify_math_reward/mean": 0.6383928656578064,
      "rewards/verify_math_reward/std": 0.4807341694831848,
      "step": 235
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3518.0,
      "completions/mean_length": 615.6217041015625,
      "completions/mean_terminated_length": 556.3643798828125,
      "completions/min_length": 115.0,
      "completions/min_terminated_length": 115.0,
      "epoch": 2.205247813411079,
      "grad_norm": 0.133534237742424,
      "learning_rate": 1e-06,
      "loss": -0.0069,
      "num_tokens": 138659828.0,
      "reward": 0.6328125,
      "reward_std": 0.18250201642513275,
      "rewards/verify_math_reward/mean": 0.6328125,
      "rewards/verify_math_reward/std": 0.48230743408203125,
      "step": 236
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0200892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3193.0,
      "completions/mean_length": 655.09375,
      "completions/mean_terminated_length": 584.55126953125,
      "completions/min_length": 107.0,
      "completions/min_terminated_length": 107.0,
      "epoch": 2.2145772594752184,
      "grad_norm": 0.13340818881988525,
      "learning_rate": 1e-06,
      "loss": 0.0142,
      "num_tokens": 139256472.0,
      "reward": 0.578125,
      "reward_std": 0.21609443426132202,
      "rewards/verify_math_reward/mean": 0.578125,
      "rewards/verify_math_reward/std": 0.4941346049308777,
      "step": 237
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0267857142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3982.0,
      "completions/mean_length": 655.091552734375,
      "completions/mean_terminated_length": 560.3875732421875,
      "completions/min_length": 136.0,
      "completions/min_terminated_length": 136.0,
      "epoch": 2.2239067055393584,
      "grad_norm": 0.11444026231765747,
      "learning_rate": 1e-06,
      "loss": 0.0053,
      "num_tokens": 139826866.0,
      "reward": 0.5703125,
      "reward_std": 0.1689378321170807,
      "rewards/verify_math_reward/mean": 0.5703125,
      "rewards/verify_math_reward/std": 0.49530795216560364,
      "step": 238
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.033482142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2182.0,
      "completions/mean_length": 667.599365234375,
      "completions/mean_terminated_length": 548.8325805664062,
      "completions/min_length": 131.0,
      "completions/min_terminated_length": 131.0,
      "epoch": 2.2332361516034984,
      "grad_norm": 0.14029905200004578,
      "learning_rate": 1e-06,
      "loss": 0.0062,
      "num_tokens": 140393347.0,
      "reward": 0.6104910969734192,
      "reward_std": 0.1975356638431549,
      "rewards/verify_math_reward/mean": 0.6104910969734192,
      "rewards/verify_math_reward/std": 0.48791128396987915,
      "step": 239
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0323660714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3994.0,
      "completions/mean_length": 680.1283569335938,
      "completions/mean_terminated_length": 565.8719482421875,
      "completions/min_length": 133.0,
      "completions/min_terminated_length": 133.0,
      "epoch": 2.2425655976676384,
      "grad_norm": 0.11632180213928223,
      "learning_rate": 1e-06,
      "loss": 0.004,
      "num_tokens": 140959102.0,
      "reward": 0.6707589626312256,
      "reward_std": 0.15634779632091522,
      "rewards/verify_math_reward/mean": 0.6707589030265808,
      "rewards/verify_math_reward/std": 0.4702001214027405,
      "step": 240
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.029017857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2092.0,
      "completions/mean_length": 658.4642944335938,
      "completions/mean_terminated_length": 555.7333374023438,
      "completions/min_length": 156.0,
      "completions/min_terminated_length": 156.0,
      "epoch": 2.2518950437317784,
      "grad_norm": 0.14235104620456696,
      "learning_rate": 1e-06,
      "loss": 0.0108,
      "num_tokens": 141531070.0,
      "reward": 0.6116071939468384,
      "reward_std": 0.18994523584842682,
      "rewards/verify_math_reward/mean": 0.6116071343421936,
      "rewards/verify_math_reward/std": 0.4876568913459778,
      "step": 241
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0279017857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3761.0,
      "completions/mean_length": 656.7890625,
      "completions/mean_terminated_length": 558.0745849609375,
      "completions/min_length": 100.0,
      "completions/min_terminated_length": 100.0,
      "epoch": 2.2612244897959184,
      "grad_norm": 0.13869857788085938,
      "learning_rate": 1e-06,
      "loss": -0.0119,
      "num_tokens": 142097121.0,
      "reward": 0.6674107313156128,
      "reward_std": 0.19182021915912628,
      "rewards/verify_math_reward/mean": 0.6674107313156128,
      "rewards/verify_math_reward/std": 0.47140392661094666,
      "step": 242
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0401785714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3107.0,
      "completions/mean_length": 686.208740234375,
      "completions/mean_terminated_length": 543.4732666015625,
      "completions/min_length": 153.0,
      "completions/min_terminated_length": 153.0,
      "epoch": 2.2705539358600584,
      "grad_norm": 0.14248910546302795,
      "learning_rate": 1e-06,
      "loss": -0.0084,
      "num_tokens": 142650164.0,
      "reward": 0.5770089626312256,
      "reward_std": 0.18378081917762756,
      "rewards/verify_math_reward/mean": 0.5770089030265808,
      "rewards/verify_math_reward/std": 0.4943099319934845,
      "step": 243
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.033482142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3887.0,
      "completions/mean_length": 624.0346069335938,
      "completions/mean_terminated_length": 503.7586669921875,
      "completions/min_length": 75.0,
      "completions/min_terminated_length": 75.0,
      "epoch": 2.2798833819241984,
      "grad_norm": 0.1280028373003006,
      "learning_rate": 1e-06,
      "loss": -0.0021,
      "num_tokens": 143163483.0,
      "reward": 0.6238839626312256,
      "reward_std": 0.129900723695755,
      "rewards/verify_math_reward/mean": 0.6238839030265808,
      "rewards/verify_math_reward/std": 0.4846802353858948,
      "step": 244
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0345982142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3291.0,
      "completions/mean_length": 694.8717041015625,
      "completions/mean_terminated_length": 572.9815063476562,
      "completions/min_length": 134.0,
      "completions/min_terminated_length": 134.0,
      "epoch": 2.2892128279883384,
      "grad_norm": 0.1106211245059967,
      "learning_rate": 1e-06,
      "loss": 0.0024,
      "num_tokens": 143742368.0,
      "reward": 0.6037946939468384,
      "reward_std": 0.13598665595054626,
      "rewards/verify_math_reward/mean": 0.6037946343421936,
      "rewards/verify_math_reward/std": 0.48938122391700745,
      "step": 245
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.025669642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2396.0,
      "completions/mean_length": 672.7522583007812,
      "completions/mean_terminated_length": 582.5635986328125,
      "completions/min_length": 143.0,
      "completions/min_terminated_length": 143.0,
      "epoch": 2.298542274052478,
      "grad_norm": 0.1433972269296646,
      "learning_rate": 1e-06,
      "loss": 0.0177,
      "num_tokens": 144343938.0,
      "reward": 0.5691964626312256,
      "reward_std": 0.20079974830150604,
      "rewards/verify_math_reward/mean": 0.5691964030265808,
      "rewards/verify_math_reward/std": 0.4954652488231659,
      "step": 246
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2332.0,
      "completions/mean_length": 622.880615234375,
      "completions/mean_terminated_length": 567.751708984375,
      "completions/min_length": 92.0,
      "completions/min_terminated_length": 92.0,
      "epoch": 2.307871720116618,
      "grad_norm": 0.12620839476585388,
      "learning_rate": 1e-06,
      "loss": -0.0013,
      "num_tokens": 144940383.0,
      "reward": 0.5837053656578064,
      "reward_std": 0.15811511874198914,
      "rewards/verify_math_reward/mean": 0.5837053656578064,
      "rewards/verify_math_reward/std": 0.49321892857551575,
      "step": 247
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0279017857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3649.0,
      "completions/mean_length": 644.1685791015625,
      "completions/mean_terminated_length": 545.0918579101562,
      "completions/min_length": 173.0,
      "completions/min_terminated_length": 173.0,
      "epoch": 2.317201166180758,
      "grad_norm": 0.14193283021450043,
      "learning_rate": 1e-06,
      "loss": -0.0003,
      "num_tokens": 145506158.0,
      "reward": 0.6026785969734192,
      "reward_std": 0.19986753165721893,
      "rewards/verify_math_reward/mean": 0.6026785969734192,
      "rewards/verify_math_reward/std": 0.48961687088012695,
      "step": 248
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.041294642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3798.0,
      "completions/mean_length": 750.5111694335938,
      "completions/mean_terminated_length": 606.4097900390625,
      "completions/min_length": 118.0,
      "completions/min_terminated_length": 118.0,
      "epoch": 2.326530612244898,
      "grad_norm": 0.11254996061325073,
      "learning_rate": 1e-06,
      "loss": -0.0109,
      "num_tokens": 146117720.0,
      "reward": 0.574776828289032,
      "reward_std": 0.14635655283927917,
      "rewards/verify_math_reward/mean": 0.5747767686843872,
      "rewards/verify_math_reward/std": 0.49465295672416687,
      "step": 249
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.024553571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3998.0,
      "completions/mean_length": 660.3582763671875,
      "completions/mean_terminated_length": 573.8775634765625,
      "completions/min_length": 145.0,
      "completions/min_terminated_length": 145.0,
      "epoch": 2.335860058309038,
      "grad_norm": 0.13078922033309937,
      "learning_rate": 1e-06,
      "loss": 0.0002,
      "num_tokens": 146702353.0,
      "reward": 0.660714328289032,
      "reward_std": 0.19174326956272125,
      "rewards/verify_math_reward/mean": 0.6607142686843872,
      "rewards/verify_math_reward/std": 0.4737313687801361,
      "step": 250
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.036830357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3579.0,
      "completions/mean_length": 678.640625,
      "completions/mean_terminated_length": 547.9652099609375,
      "completions/min_length": 129.0,
      "completions/min_terminated_length": 129.0,
      "epoch": 2.345189504373178,
      "grad_norm": 0.13873854279518127,
      "learning_rate": 1e-06,
      "loss": -0.0014,
      "num_tokens": 147267087.0,
      "reward": 0.5725446939468384,
      "reward_std": 0.17719633877277374,
      "rewards/verify_math_reward/mean": 0.5725446343421936,
      "rewards/verify_math_reward/std": 0.49498558044433594,
      "step": 251
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0345982142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1744.0,
      "completions/mean_length": 649.4140625,
      "completions/mean_terminated_length": 525.894775390625,
      "completions/min_length": 128.0,
      "completions/min_terminated_length": 128.0,
      "epoch": 2.354518950437318,
      "grad_norm": 0.15454351902008057,
      "learning_rate": 1e-06,
      "loss": 0.0021,
      "num_tokens": 147807314.0,
      "reward": 0.6049107313156128,
      "reward_std": 0.186372309923172,
      "rewards/verify_math_reward/mean": 0.6049107313156128,
      "rewards/verify_math_reward/std": 0.48914292454719543,
      "step": 252
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.029017857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3047.0,
      "completions/mean_length": 666.1439819335938,
      "completions/mean_terminated_length": 563.6425170898438,
      "completions/min_length": 96.0,
      "completions/min_terminated_length": 96.0,
      "epoch": 2.363848396501458,
      "grad_norm": 0.11001133918762207,
      "learning_rate": 1e-06,
      "loss": -0.0008,
      "num_tokens": 148382819.0,
      "reward": 0.652901828289032,
      "reward_std": 0.13531264662742615,
      "rewards/verify_math_reward/mean": 0.6529017686843872,
      "rewards/verify_math_reward/std": 0.47631320357322693,
      "step": 253
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.024553571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4089.0,
      "completions/mean_length": 704.7913208007812,
      "completions/mean_terminated_length": 619.4290161132812,
      "completions/min_length": 166.0,
      "completions/min_terminated_length": 166.0,
      "epoch": 2.373177842565598,
      "grad_norm": 0.12934285402297974,
      "learning_rate": 1e-06,
      "loss": -0.0133,
      "num_tokens": 149006656.0,
      "reward": 0.5658482313156128,
      "reward_std": 0.18768611550331116,
      "rewards/verify_math_reward/mean": 0.5658482313156128,
      "rewards/verify_math_reward/std": 0.49592188000679016,
      "step": 254
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0279017857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3474.0,
      "completions/mean_length": 643.083740234375,
      "completions/mean_terminated_length": 543.9758911132812,
      "completions/min_length": 145.0,
      "completions/min_terminated_length": 145.0,
      "epoch": 2.3825072886297374,
      "grad_norm": 0.11873723566532135,
      "learning_rate": 1e-06,
      "loss": -0.0023,
      "num_tokens": 149565491.0,
      "reward": 0.6696428656578064,
      "reward_std": 0.1295202374458313,
      "rewards/verify_math_reward/mean": 0.6696428656578064,
      "rewards/verify_math_reward/std": 0.47060438990592957,
      "step": 255
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0323660714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3615.0,
      "completions/mean_length": 709.325927734375,
      "completions/mean_terminated_length": 596.046142578125,
      "completions/min_length": 143.0,
      "completions/min_terminated_length": 143.0,
      "epoch": 2.3918367346938774,
      "grad_norm": 0.12077955901622772,
      "learning_rate": 1e-06,
      "loss": -0.0145,
      "num_tokens": 150175959.0,
      "reward": 0.5703125,
      "reward_std": 0.16247829794883728,
      "rewards/verify_math_reward/mean": 0.5703125,
      "rewards/verify_math_reward/std": 0.49530795216560364,
      "step": 256
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.044642857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3995.0,
      "completions/mean_length": 744.130615234375,
      "completions/mean_terminated_length": 587.5011596679688,
      "completions/min_length": 113.0,
      "completions/min_terminated_length": 113.0,
      "epoch": 2.4011661807580174,
      "grad_norm": 0.14024953544139862,
      "learning_rate": 1e-06,
      "loss": -0.0139,
      "num_tokens": 150767372.0,
      "reward": 0.59375,
      "reward_std": 0.187199667096138,
      "rewards/verify_math_reward/mean": 0.59375,
      "rewards/verify_math_reward/std": 0.4914066195487976,
      "step": 257
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2258.0,
      "completions/mean_length": 600.0245971679688,
      "completions/mean_terminated_length": 536.4613647460938,
      "completions/min_length": 135.0,
      "completions/min_terminated_length": 135.0,
      "epoch": 2.4104956268221573,
      "grad_norm": 0.14611251652240753,
      "learning_rate": 1e-06,
      "loss": 0.0015,
      "num_tokens": 151324362.0,
      "reward": 0.65625,
      "reward_std": 0.20647519826889038,
      "rewards/verify_math_reward/mean": 0.65625,
      "rewards/verify_math_reward/std": 0.4752241373062134,
      "step": 258
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4094.0,
      "completions/mean_length": 626.0949096679688,
      "completions/mean_terminated_length": 546.873291015625,
      "completions/min_length": 129.0,
      "completions/min_terminated_length": 129.0,
      "epoch": 2.4198250728862973,
      "grad_norm": 0.13245181739330292,
      "learning_rate": 1e-06,
      "loss": -0.0051,
      "num_tokens": 151894959.0,
      "reward": 0.6506696939468384,
      "reward_std": 0.1780746579170227,
      "rewards/verify_math_reward/mean": 0.6506696343421936,
      "rewards/verify_math_reward/std": 0.47702476382255554,
      "step": 259
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.036830357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2869.0,
      "completions/mean_length": 704.2500610351562,
      "completions/mean_terminated_length": 574.5538940429688,
      "completions/min_length": 149.0,
      "completions/min_terminated_length": 149.0,
      "epoch": 2.4291545189504373,
      "grad_norm": 0.1365259885787964,
      "learning_rate": 1e-06,
      "loss": -0.006,
      "num_tokens": 152482431.0,
      "reward": 0.5736607313156128,
      "reward_std": 0.20718877017498016,
      "rewards/verify_math_reward/mean": 0.5736607313156128,
      "rewards/verify_math_reward/std": 0.4948205351829529,
      "step": 260
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.041294642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2463.0,
      "completions/mean_length": 711.029052734375,
      "completions/mean_terminated_length": 565.2269897460938,
      "completions/min_length": 95.0,
      "completions/min_terminated_length": 95.0,
      "epoch": 2.4384839650145773,
      "grad_norm": 0.1361416131258011,
      "learning_rate": 1e-06,
      "loss": -0.0225,
      "num_tokens": 153063241.0,
      "reward": 0.6127232313156128,
      "reward_std": 0.18179228901863098,
      "rewards/verify_math_reward/mean": 0.6127232313156128,
      "rewards/verify_math_reward/std": 0.4873998463153839,
      "step": 261
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.030133928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2678.0,
      "completions/mean_length": 657.9765625,
      "completions/mean_terminated_length": 551.156494140625,
      "completions/min_length": 172.0,
      "completions/min_terminated_length": 172.0,
      "epoch": 2.4478134110787173,
      "grad_norm": 0.14126794040203094,
      "learning_rate": 1e-06,
      "loss": -0.0006,
      "num_tokens": 153631036.0,
      "reward": 0.5881696939468384,
      "reward_std": 0.19235128164291382,
      "rewards/verify_math_reward/mean": 0.5881696343421936,
      "rewards/verify_math_reward/std": 0.4924395978450775,
      "step": 262
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.033482142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3345.0,
      "completions/mean_length": 676.4230346679688,
      "completions/mean_terminated_length": 557.9619140625,
      "completions/min_length": 113.0,
      "completions/min_terminated_length": 113.0,
      "epoch": 2.4571428571428573,
      "grad_norm": 0.12177924811840057,
      "learning_rate": 1e-06,
      "loss": 0.0084,
      "num_tokens": 154199039.0,
      "reward": 0.637276828289032,
      "reward_std": 0.1572147011756897,
      "rewards/verify_math_reward/mean": 0.6372767686843872,
      "rewards/verify_math_reward/std": 0.481054425239563,
      "step": 263
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0379464285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2730.0,
      "completions/mean_length": 685.5011596679688,
      "completions/mean_terminated_length": 550.9802856445312,
      "completions/min_length": 102.0,
      "completions/min_terminated_length": 102.0,
      "epoch": 2.466472303206997,
      "grad_norm": 0.14432471990585327,
      "learning_rate": 1e-06,
      "loss": -0.0026,
      "num_tokens": 154768216.0,
      "reward": 0.598214328289032,
      "reward_std": 0.18051277101039886,
      "rewards/verify_math_reward/mean": 0.5982142686843872,
      "rewards/verify_math_reward/std": 0.49053287506103516,
      "step": 264
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0424107142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2733.0,
      "completions/mean_length": 725.1328735351562,
      "completions/mean_terminated_length": 575.84033203125,
      "completions/min_length": 128.0,
      "completions/min_terminated_length": 128.0,
      "epoch": 2.4758017492711373,
      "grad_norm": 0.1281932145357132,
      "learning_rate": 1e-06,
      "loss": 0.006,
      "num_tokens": 155351031.0,
      "reward": 0.590401828289032,
      "reward_std": 0.1468462198972702,
      "rewards/verify_math_reward/mean": 0.5904017686843872,
      "rewards/verify_math_reward/std": 0.49203425645828247,
      "step": 265
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0345982142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3313.0,
      "completions/mean_length": 671.2645263671875,
      "completions/mean_terminated_length": 548.5283203125,
      "completions/min_length": 147.0,
      "completions/min_terminated_length": 147.0,
      "epoch": 2.485131195335277,
      "grad_norm": 0.12536077201366425,
      "learning_rate": 1e-06,
      "loss": 0.0066,
      "num_tokens": 155908852.0,
      "reward": 0.660714328289032,
      "reward_std": 0.1725817173719406,
      "rewards/verify_math_reward/mean": 0.6607142686843872,
      "rewards/verify_math_reward/std": 0.4737313687801361,
      "step": 266
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3356.0,
      "completions/mean_length": 637.2098388671875,
      "completions/mean_terminated_length": 574.3226928710938,
      "completions/min_length": 136.0,
      "completions/min_terminated_length": 136.0,
      "epoch": 2.494460641399417,
      "grad_norm": 0.13122303783893585,
      "learning_rate": 1e-06,
      "loss": 0.0074,
      "num_tokens": 156500464.0,
      "reward": 0.6160714626312256,
      "reward_std": 0.1717965006828308,
      "rewards/verify_math_reward/mean": 0.6160714030265808,
      "rewards/verify_math_reward/std": 0.486612468957901,
      "step": 267
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0647321428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4065.0,
      "completions/mean_length": 856.1016235351562,
      "completions/mean_terminated_length": 631.8604125976562,
      "completions/min_length": 142.0,
      "completions/min_terminated_length": 142.0,
      "epoch": 2.503790087463557,
      "grad_norm": 0.12117322534322739,
      "learning_rate": 1e-06,
      "loss": -0.015,
      "num_tokens": 157128939.0,
      "reward": 0.5803571939468384,
      "reward_std": 0.1734090894460678,
      "rewards/verify_math_reward/mean": 0.5803571343421936,
      "rewards/verify_math_reward/std": 0.4937761127948761,
      "step": 268
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0267857142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3355.0,
      "completions/mean_length": 662.0614013671875,
      "completions/mean_terminated_length": 567.54931640625,
      "completions/min_length": 146.0,
      "completions/min_terminated_length": 146.0,
      "epoch": 2.513119533527697,
      "grad_norm": 0.1250961571931839,
      "learning_rate": 1e-06,
      "loss": -0.0073,
      "num_tokens": 157708682.0,
      "reward": 0.6238839626312256,
      "reward_std": 0.16825860738754272,
      "rewards/verify_math_reward/mean": 0.6238839030265808,
      "rewards/verify_math_reward/std": 0.48468026518821716,
      "step": 269
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.03125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2995.0,
      "completions/mean_length": 689.9475708007812,
      "completions/mean_terminated_length": 580.0748901367188,
      "completions/min_length": 117.0,
      "completions/min_terminated_length": 117.0,
      "epoch": 2.522448979591837,
      "grad_norm": 0.125711128115654,
      "learning_rate": 1e-06,
      "loss": -0.0008,
      "num_tokens": 158305963.0,
      "reward": 0.5915178656578064,
      "reward_std": 0.17355993390083313,
      "rewards/verify_math_reward/mean": 0.5915178656578064,
      "rewards/verify_math_reward/std": 0.49182769656181335,
      "step": 270
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.036830357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3048.0,
      "completions/mean_length": 759.0469360351562,
      "completions/mean_terminated_length": 631.4461059570312,
      "completions/min_length": 181.0,
      "completions/min_terminated_length": 181.0,
      "epoch": 2.5317784256559768,
      "grad_norm": 0.13090497255325317,
      "learning_rate": 1e-06,
      "loss": 0.0045,
      "num_tokens": 158961901.0,
      "reward": 0.5691964626312256,
      "reward_std": 0.2081337720155716,
      "rewards/verify_math_reward/mean": 0.5691964030265808,
      "rewards/verify_math_reward/std": 0.4954652488231659,
      "step": 271
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.03125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2411.0,
      "completions/mean_length": 663.7991333007812,
      "completions/mean_terminated_length": 553.0829467773438,
      "completions/min_length": 135.0,
      "completions/min_terminated_length": 135.0,
      "epoch": 2.5411078717201168,
      "grad_norm": 0.12194574624300003,
      "learning_rate": 1e-06,
      "loss": -0.0086,
      "num_tokens": 159537065.0,
      "reward": 0.6127232313156128,
      "reward_std": 0.14553029835224152,
      "rewards/verify_math_reward/mean": 0.6127232313156128,
      "rewards/verify_math_reward/std": 0.4873998463153839,
      "step": 272
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.030133928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3856.0,
      "completions/mean_length": 683.8850708007812,
      "completions/mean_terminated_length": 577.8699951171875,
      "completions/min_length": 155.0,
      "completions/min_terminated_length": 155.0,
      "epoch": 2.5504373177842563,
      "grad_norm": 0.13890667259693146,
      "learning_rate": 1e-06,
      "loss": 0.0009,
      "num_tokens": 160121370.0,
      "reward": 0.5959821939468384,
      "reward_std": 0.22105351090431213,
      "rewards/verify_math_reward/mean": 0.5959821343421936,
      "rewards/verify_math_reward/std": 0.490975022315979,
      "step": 273
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0558035714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3950.0,
      "completions/mean_length": 850.3705444335938,
      "completions/mean_terminated_length": 658.5484619140625,
      "completions/min_length": 110.0,
      "completions/min_terminated_length": 110.0,
      "epoch": 2.5597667638483967,
      "grad_norm": 0.13573136925697327,
      "learning_rate": 1e-06,
      "loss": -0.0225,
      "num_tokens": 160763310.0,
      "reward": 0.5457589626312256,
      "reward_std": 0.20929309725761414,
      "rewards/verify_math_reward/mean": 0.5457589030265808,
      "rewards/verify_math_reward/std": 0.4981798231601715,
      "step": 274
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.036830357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2766.0,
      "completions/mean_length": 709.935302734375,
      "completions/mean_terminated_length": 580.45654296875,
      "completions/min_length": 138.0,
      "completions/min_terminated_length": 138.0,
      "epoch": 2.5690962099125363,
      "grad_norm": 0.14654983580112457,
      "learning_rate": 1e-06,
      "loss": 0.0005,
      "num_tokens": 161351324.0,
      "reward": 0.5569196939468384,
      "reward_std": 0.21263712644577026,
      "rewards/verify_math_reward/mean": 0.5569196343421936,
      "rewards/verify_math_reward/std": 0.49702703952789307,
      "step": 275
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0323660714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4027.0,
      "completions/mean_length": 710.7522583007812,
      "completions/mean_terminated_length": 597.5201416015625,
      "completions/min_length": 157.0,
      "completions/min_terminated_length": 157.0,
      "epoch": 2.5784256559766763,
      "grad_norm": 0.1251104772090912,
      "learning_rate": 1e-06,
      "loss": -0.0156,
      "num_tokens": 161966646.0,
      "reward": 0.629464328289032,
      "reward_std": 0.16311588883399963,
      "rewards/verify_math_reward/mean": 0.6294642686843872,
      "rewards/verify_math_reward/std": 0.4832179844379425,
      "step": 276
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.033482142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3229.0,
      "completions/mean_length": 686.9375610351562,
      "completions/mean_terminated_length": 568.8406372070312,
      "completions/min_length": 134.0,
      "completions/min_terminated_length": 134.0,
      "epoch": 2.5877551020408163,
      "grad_norm": 0.12175661325454712,
      "learning_rate": 1e-06,
      "loss": 0.0034,
      "num_tokens": 162546198.0,
      "reward": 0.6127232313156128,
      "reward_std": 0.14545612037181854,
      "rewards/verify_math_reward/mean": 0.6127232313156128,
      "rewards/verify_math_reward/std": 0.4873998463153839,
      "step": 277
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0345982142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3774.0,
      "completions/mean_length": 721.3504638671875,
      "completions/mean_terminated_length": 600.4092407226562,
      "completions/min_length": 148.0,
      "completions/min_terminated_length": 148.0,
      "epoch": 2.5970845481049563,
      "grad_norm": 0.12625743448734283,
      "learning_rate": 1e-06,
      "loss": -0.0029,
      "num_tokens": 163158144.0,
      "reward": 0.5535714626312256,
      "reward_std": 0.19624477624893188,
      "rewards/verify_math_reward/mean": 0.5535714030265808,
      "rewards/verify_math_reward/std": 0.4973994791507721,
      "step": 278
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.041294642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2079.0,
      "completions/mean_length": 699.4453735351562,
      "completions/mean_terminated_length": 553.1443481445312,
      "completions/min_length": 126.0,
      "completions/min_terminated_length": 126.0,
      "epoch": 2.6064139941690962,
      "grad_norm": 0.14673063158988953,
      "learning_rate": 1e-06,
      "loss": -0.0063,
      "num_tokens": 163720319.0,
      "reward": 0.6439732313156128,
      "reward_std": 0.1889663189649582,
      "rewards/verify_math_reward/mean": 0.6439732313156128,
      "rewards/verify_math_reward/std": 0.47909072041511536,
      "step": 279
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.033482142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3980.0,
      "completions/mean_length": 682.8538208007812,
      "completions/mean_terminated_length": 564.615478515625,
      "completions/min_length": 126.0,
      "completions/min_terminated_length": 126.0,
      "epoch": 2.6157434402332362,
      "grad_norm": 0.1423073261976242,
      "learning_rate": 1e-06,
      "loss": 0.0072,
      "num_tokens": 164293660.0,
      "reward": 0.59375,
      "reward_std": 0.18821631371974945,
      "rewards/verify_math_reward/mean": 0.59375,
      "rewards/verify_math_reward/std": 0.4914066195487976,
      "step": 280
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.029017857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3634.0,
      "completions/mean_length": 622.4832763671875,
      "completions/mean_terminated_length": 518.677001953125,
      "completions/min_length": 139.0,
      "completions/min_terminated_length": 139.0,
      "epoch": 2.6250728862973762,
      "grad_norm": 0.13733068108558655,
      "learning_rate": 1e-06,
      "loss": -0.002,
      "num_tokens": 164825829.0,
      "reward": 0.684151828289032,
      "reward_std": 0.17292234301567078,
      "rewards/verify_math_reward/mean": 0.6841517686843872,
      "rewards/verify_math_reward/std": 0.4651124179363251,
      "step": 281
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3279.0,
      "completions/mean_length": 651.2421875,
      "completions/mean_terminated_length": 572.5947265625,
      "completions/min_length": 129.0,
      "completions/min_terminated_length": 129.0,
      "epoch": 2.6344023323615158,
      "grad_norm": 0.134793221950531,
      "learning_rate": 1e-06,
      "loss": -0.0025,
      "num_tokens": 165412086.0,
      "reward": 0.6986607313156128,
      "reward_std": 0.17134006321430206,
      "rewards/verify_math_reward/mean": 0.6986607313156128,
      "rewards/verify_math_reward/std": 0.4590960144996643,
      "step": 282
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.041294642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3373.0,
      "completions/mean_length": 703.3582763671875,
      "completions/mean_terminated_length": 557.225830078125,
      "completions/min_length": 126.0,
      "completions/min_terminated_length": 126.0,
      "epoch": 2.643731778425656,
      "grad_norm": 0.14702339470386505,
      "learning_rate": 1e-06,
      "loss": -0.0083,
      "num_tokens": 165971255.0,
      "reward": 0.6473214626312256,
      "reward_std": 0.19043126702308655,
      "rewards/verify_math_reward/mean": 0.6473214030265808,
      "rewards/verify_math_reward/std": 0.47807058691978455,
      "step": 283
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3780.0,
      "completions/mean_length": 627.9967041015625,
      "completions/mean_terminated_length": 548.8184814453125,
      "completions/min_length": 142.0,
      "completions/min_terminated_length": 142.0,
      "epoch": 2.6530612244897958,
      "grad_norm": 0.14755719900131226,
      "learning_rate": 1e-06,
      "loss": 0.0125,
      "num_tokens": 166547276.0,
      "reward": 0.6618303656578064,
      "reward_std": 0.2054309993982315,
      "rewards/verify_math_reward/mean": 0.6618303656578064,
      "rewards/verify_math_reward/std": 0.4733508229255676,
      "step": 284
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.046875,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3496.0,
      "completions/mean_length": 752.3772583007812,
      "completions/mean_terminated_length": 587.936767578125,
      "completions/min_length": 136.0,
      "completions/min_terminated_length": 136.0,
      "epoch": 2.6623906705539357,
      "grad_norm": 0.1372850388288498,
      "learning_rate": 1e-06,
      "loss": -0.0177,
      "num_tokens": 167138942.0,
      "reward": 0.6037946939468384,
      "reward_std": 0.1795377880334854,
      "rewards/verify_math_reward/mean": 0.6037946343421936,
      "rewards/verify_math_reward/std": 0.48938122391700745,
      "step": 285
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.049107142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3962.0,
      "completions/mean_length": 834.3270263671875,
      "completions/mean_terminated_length": 665.8837890625,
      "completions/min_length": 131.0,
      "completions/min_terminated_length": 131.0,
      "epoch": 2.6717201166180757,
      "grad_norm": 0.11591921746730804,
      "learning_rate": 1e-06,
      "loss": -0.0029,
      "num_tokens": 167795235.0,
      "reward": 0.4966517984867096,
      "reward_std": 0.18193678557872772,
      "rewards/verify_math_reward/mean": 0.4966517984867096,
      "rewards/verify_math_reward/std": 0.5002680420875549,
      "step": 286
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0379464285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3330.0,
      "completions/mean_length": 740.8438110351562,
      "completions/mean_terminated_length": 608.5057983398438,
      "completions/min_length": 128.0,
      "completions/min_terminated_length": 128.0,
      "epoch": 2.6810495626822157,
      "grad_norm": 0.1284467726945877,
      "learning_rate": 1e-06,
      "loss": -0.0119,
      "num_tokens": 168414383.0,
      "reward": 0.5491071939468384,
      "reward_std": 0.19166797399520874,
      "rewards/verify_math_reward/mean": 0.5491071343421936,
      "rewards/verify_math_reward/std": 0.49786055088043213,
      "step": 287
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0345982142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3599.0,
      "completions/mean_length": 717.2232666015625,
      "completions/mean_terminated_length": 596.1340942382812,
      "completions/min_length": 93.0,
      "completions/min_terminated_length": 93.0,
      "epoch": 2.6903790087463557,
      "grad_norm": 0.12711331248283386,
      "learning_rate": 1e-06,
      "loss": -0.0121,
      "num_tokens": 169014887.0,
      "reward": 0.5803571939468384,
      "reward_std": 0.1764167845249176,
      "rewards/verify_math_reward/mean": 0.5803571343421936,
      "rewards/verify_math_reward/std": 0.4937761127948761,
      "step": 288
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0390625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2618.0,
      "completions/mean_length": 737.8783569335938,
      "completions/mean_terminated_length": 601.3693237304688,
      "completions/min_length": 135.0,
      "completions/min_terminated_length": 135.0,
      "epoch": 2.6997084548104957,
      "grad_norm": 0.12846635282039642,
      "learning_rate": 1e-06,
      "loss": 0.0049,
      "num_tokens": 169616954.0,
      "reward": 0.5446428656578064,
      "reward_std": 0.19043126702308655,
      "rewards/verify_math_reward/mean": 0.5446428656578064,
      "rewards/verify_math_reward/std": 0.49828118085861206,
      "step": 289
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.03125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3065.0,
      "completions/mean_length": 672.2310791015625,
      "completions/mean_terminated_length": 561.786865234375,
      "completions/min_length": 105.0,
      "completions/min_terminated_length": 105.0,
      "epoch": 2.7090379008746357,
      "grad_norm": 0.143126979470253,
      "learning_rate": 1e-06,
      "loss": 0.0022,
      "num_tokens": 170192033.0,
      "reward": 0.6328125,
      "reward_std": 0.20628906786441803,
      "rewards/verify_math_reward/mean": 0.6328125,
      "rewards/verify_math_reward/std": 0.48230743408203125,
      "step": 290
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0279017857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2952.0,
      "completions/mean_length": 676.6707763671875,
      "completions/mean_terminated_length": 578.5269775390625,
      "completions/min_length": 130.0,
      "completions/min_terminated_length": 130.0,
      "epoch": 2.7183673469387752,
      "grad_norm": 0.13283273577690125,
      "learning_rate": 1e-06,
      "loss": -0.0192,
      "num_tokens": 170786226.0,
      "reward": 0.6350446939468384,
      "reward_std": 0.16361257433891296,
      "rewards/verify_math_reward/mean": 0.6350446343421936,
      "rewards/verify_math_reward/std": 0.481686532497406,
      "step": 291
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0390625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3502.0,
      "completions/mean_length": 749.4386596679688,
      "completions/mean_terminated_length": 613.3995361328125,
      "completions/min_length": 150.0,
      "completions/min_terminated_length": 150.0,
      "epoch": 2.7276967930029157,
      "grad_norm": 0.13216552138328552,
      "learning_rate": 1e-06,
      "loss": -0.0158,
      "num_tokens": 171416731.0,
      "reward": 0.5546875,
      "reward_std": 0.19084130227565765,
      "rewards/verify_math_reward/mean": 0.5546875,
      "rewards/verify_math_reward/std": 0.4972778558731079,
      "step": 292
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0513392857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3361.0,
      "completions/mean_length": 826.5346069335938,
      "completions/mean_terminated_length": 649.5988159179688,
      "completions/min_length": 174.0,
      "completions/min_terminated_length": 174.0,
      "epoch": 2.7370262390670552,
      "grad_norm": 0.13431333005428314,
      "learning_rate": 1e-06,
      "loss": -0.0139,
      "num_tokens": 172059850.0,
      "reward": 0.578125,
      "reward_std": 0.21409814059734344,
      "rewards/verify_math_reward/mean": 0.578125,
      "rewards/verify_math_reward/std": 0.4941346049308777,
      "step": 293
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0390625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4036.0,
      "completions/mean_length": 732.0123291015625,
      "completions/mean_terminated_length": 595.2647705078125,
      "completions/min_length": 136.0,
      "completions/min_terminated_length": 136.0,
      "epoch": 2.746355685131195,
      "grad_norm": 0.13596613705158234,
      "learning_rate": 1e-06,
      "loss": 0.0014,
      "num_tokens": 172673205.0,
      "reward": 0.5647321939468384,
      "reward_std": 0.1773567646741867,
      "rewards/verify_math_reward/mean": 0.5647321343421936,
      "rewards/verify_math_reward/std": 0.49606895446777344,
      "step": 294
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0345982142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3955.0,
      "completions/mean_length": 710.5457763671875,
      "completions/mean_terminated_length": 589.2173461914062,
      "completions/min_length": 120.0,
      "completions/min_terminated_length": 120.0,
      "epoch": 2.755685131195335,
      "grad_norm": 0.13402144610881805,
      "learning_rate": 1e-06,
      "loss": -0.0041,
      "num_tokens": 173268798.0,
      "reward": 0.6361607313156128,
      "reward_std": 0.1869696080684662,
      "rewards/verify_math_reward/mean": 0.6361607313156128,
      "rewards/verify_math_reward/std": 0.4813718795776367,
      "step": 295
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.033482142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3040.0,
      "completions/mean_length": 715.3951416015625,
      "completions/mean_terminated_length": 598.2840576171875,
      "completions/min_length": 153.0,
      "completions/min_terminated_length": 153.0,
      "epoch": 2.765014577259475,
      "grad_norm": 0.13711683452129364,
      "learning_rate": 1e-06,
      "loss": 0.0119,
      "num_tokens": 173875576.0,
      "reward": 0.6205357313156128,
      "reward_std": 0.18092577159404755,
      "rewards/verify_math_reward/mean": 0.6205357313156128,
      "rewards/verify_math_reward/std": 0.4855247139930725,
      "step": 296
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4085.0,
      "completions/mean_length": 650.9855346679688,
      "completions/mean_terminated_length": 588.3488159179688,
      "completions/min_length": 139.0,
      "completions/min_terminated_length": 139.0,
      "epoch": 2.774344023323615,
      "grad_norm": 0.1356029063463211,
      "learning_rate": 1e-06,
      "loss": 0.0124,
      "num_tokens": 174481611.0,
      "reward": 0.5524553656578064,
      "reward_std": 0.19230668246746063,
      "rewards/verify_math_reward/mean": 0.5524553656578064,
      "rewards/verify_math_reward/std": 0.49751853942871094,
      "step": 297
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.033482142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3929.0,
      "completions/mean_length": 730.5736694335938,
      "completions/mean_terminated_length": 613.9884643554688,
      "completions/min_length": 127.0,
      "completions/min_terminated_length": 127.0,
      "epoch": 2.783673469387755,
      "grad_norm": 0.12608329951763153,
      "learning_rate": 1e-06,
      "loss": 0.0002,
      "num_tokens": 175105597.0,
      "reward": 0.5636160969734192,
      "reward_std": 0.1888921558856964,
      "rewards/verify_math_reward/mean": 0.5636160969734192,
      "rewards/verify_math_reward/std": 0.49621346592903137,
      "step": 298
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0267857142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3211.0,
      "completions/mean_length": 690.0904541015625,
      "completions/mean_terminated_length": 596.3497314453125,
      "completions/min_length": 161.0,
      "completions/min_terminated_length": 161.0,
      "epoch": 2.793002915451895,
      "grad_norm": 0.1373504251241684,
      "learning_rate": 1e-06,
      "loss": -0.0018,
      "num_tokens": 175713446.0,
      "reward": 0.5959821939468384,
      "reward_std": 0.19411678612232208,
      "rewards/verify_math_reward/mean": 0.5959821343421936,
      "rewards/verify_math_reward/std": 0.490975022315979,
      "step": 299
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.044642857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3949.0,
      "completions/mean_length": 787.927490234375,
      "completions/mean_terminated_length": 633.3446044921875,
      "completions/min_length": 141.0,
      "completions/min_terminated_length": 141.0,
      "epoch": 2.8023323615160347,
      "grad_norm": 0.13010147213935852,
      "learning_rate": 1e-06,
      "loss": -0.0057,
      "num_tokens": 176339549.0,
      "reward": 0.6082589626312256,
      "reward_std": 0.197828471660614,
      "rewards/verify_math_reward/mean": 0.6082589030265808,
      "rewards/verify_math_reward/std": 0.4884119927883148,
      "step": 300
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0357142857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3866.0,
      "completions/mean_length": 710.0156860351562,
      "completions/mean_terminated_length": 584.6088256835938,
      "completions/min_length": 149.0,
      "completions/min_terminated_length": 149.0,
      "epoch": 2.811661807580175,
      "grad_norm": 0.13526295125484467,
      "learning_rate": 1e-06,
      "loss": -0.0009,
      "num_tokens": 176925299.0,
      "reward": 0.5948660969734192,
      "reward_std": 0.17400822043418884,
      "rewards/verify_math_reward/mean": 0.5948660969734192,
      "rewards/verify_math_reward/std": 0.49119213223457336,
      "step": 301
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0457589285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2630.0,
      "completions/mean_length": 766.9464721679688,
      "completions/mean_terminated_length": 607.3076171875,
      "completions/min_length": 152.0,
      "completions/min_terminated_length": 152.0,
      "epoch": 2.8209912536443147,
      "grad_norm": 0.13222624361515045,
      "learning_rate": 1e-06,
      "loss": 0.0022,
      "num_tokens": 177528235.0,
      "reward": 0.5714285969734192,
      "reward_std": 0.20474882423877716,
      "rewards/verify_math_reward/mean": 0.5714285969734192,
      "rewards/verify_math_reward/std": 0.49514803290367126,
      "step": 302
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0457589285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3639.0,
      "completions/mean_length": 815.3839721679688,
      "completions/mean_terminated_length": 658.0678100585938,
      "completions/min_length": 148.0,
      "completions/min_terminated_length": 148.0,
      "epoch": 2.8303206997084547,
      "grad_norm": 0.13167433440685272,
      "learning_rate": 1e-06,
      "loss": 0.004,
      "num_tokens": 178180747.0,
      "reward": 0.574776828289032,
      "reward_std": 0.18655845522880554,
      "rewards/verify_math_reward/mean": 0.5747767686843872,
      "rewards/verify_math_reward/std": 0.49465295672416687,
      "step": 303
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.052455357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2785.0,
      "completions/mean_length": 772.6763916015625,
      "completions/mean_terminated_length": 588.6996459960938,
      "completions/min_length": 109.0,
      "completions/min_terminated_length": 109.0,
      "epoch": 2.8396501457725947,
      "grad_norm": 0.13399431109428406,
      "learning_rate": 1e-06,
      "loss": -0.0236,
      "num_tokens": 178771577.0,
      "reward": 0.6395089626312256,
      "reward_std": 0.20369574427604675,
      "rewards/verify_math_reward/mean": 0.6395089030265808,
      "rewards/verify_math_reward/std": 0.4804111123085022,
      "step": 304
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0513392857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4059.0,
      "completions/mean_length": 773.997802734375,
      "completions/mean_terminated_length": 594.2188110351562,
      "completions/min_length": 156.0,
      "completions/min_terminated_length": 156.0,
      "epoch": 2.8489795918367347,
      "grad_norm": 0.13628385961055756,
      "learning_rate": 1e-06,
      "loss": -0.0091,
      "num_tokens": 179375927.0,
      "reward": 0.5892857313156128,
      "reward_std": 0.18911674618721008,
      "rewards/verify_math_reward/mean": 0.5892857313156128,
      "rewards/verify_math_reward/std": 0.49223825335502625,
      "step": 305
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.052455357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4093.0,
      "completions/mean_length": 757.5892944335938,
      "completions/mean_terminated_length": 572.7774047851562,
      "completions/min_length": 166.0,
      "completions/min_terminated_length": 166.0,
      "epoch": 2.8583090379008746,
      "grad_norm": 0.1519719660282135,
      "learning_rate": 1e-06,
      "loss": -0.0135,
      "num_tokens": 179945351.0,
      "reward": 0.6104910969734192,
      "reward_std": 0.22432535886764526,
      "rewards/verify_math_reward/mean": 0.6104910969734192,
      "rewards/verify_math_reward/std": 0.48791125416755676,
      "step": 306
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0390625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3822.0,
      "completions/mean_length": 735.1261596679688,
      "completions/mean_terminated_length": 598.5051879882812,
      "completions/min_length": 143.0,
      "completions/min_terminated_length": 143.0,
      "epoch": 2.8676384839650146,
      "grad_norm": 0.12454867362976074,
      "learning_rate": 1e-06,
      "loss": 0.0023,
      "num_tokens": 180544224.0,
      "reward": 0.6071428656578064,
      "reward_std": 0.16111847758293152,
      "rewards/verify_math_reward/mean": 0.6071428656578064,
      "rewards/verify_math_reward/std": 0.48865824937820435,
      "step": 307
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.033482142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2904.0,
      "completions/mean_length": 703.9564819335938,
      "completions/mean_terminated_length": 586.4491577148438,
      "completions/min_length": 161.0,
      "completions/min_terminated_length": 161.0,
      "epoch": 2.8769679300291546,
      "grad_norm": 0.1262216717004776,
      "learning_rate": 1e-06,
      "loss": -0.0226,
      "num_tokens": 181138897.0,
      "reward": 0.6540178656578064,
      "reward_std": 0.16389724612236023,
      "rewards/verify_math_reward/mean": 0.6540178656578064,
      "rewards/verify_math_reward/std": 0.4759531021118164,
      "step": 308
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0279017857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2071.0,
      "completions/mean_length": 687.1428833007812,
      "completions/mean_terminated_length": 589.2996215820312,
      "completions/min_length": 152.0,
      "completions/min_terminated_length": 152.0,
      "epoch": 2.8862973760932946,
      "grad_norm": 0.14330852031707764,
      "learning_rate": 1e-06,
      "loss": -0.003,
      "num_tokens": 181739313.0,
      "reward": 0.5714285969734192,
      "reward_std": 0.2055736929178238,
      "rewards/verify_math_reward/mean": 0.5714285969734192,
      "rewards/verify_math_reward/std": 0.49514803290367126,
      "step": 309
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.033482142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2574.0,
      "completions/mean_length": 757.286865234375,
      "completions/mean_terminated_length": 641.6270141601562,
      "completions/min_length": 152.0,
      "completions/min_terminated_length": 152.0,
      "epoch": 2.8956268221574346,
      "grad_norm": 0.12904781103134155,
      "learning_rate": 1e-06,
      "loss": -0.0127,
      "num_tokens": 182380298.0,
      "reward": 0.5558035969734192,
      "reward_std": 0.20741517841815948,
      "rewards/verify_math_reward/mean": 0.5558035969734192,
      "rewards/verify_math_reward/std": 0.49715372920036316,
      "step": 310
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0345982142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3326.0,
      "completions/mean_length": 696.5201416015625,
      "completions/mean_terminated_length": 574.6890258789062,
      "completions/min_length": 146.0,
      "completions/min_terminated_length": 146.0,
      "epoch": 2.904956268221574,
      "grad_norm": 0.1322242170572281,
      "learning_rate": 1e-06,
      "loss": -0.0087,
      "num_tokens": 182976588.0,
      "reward": 0.660714328289032,
      "reward_std": 0.16108639538288116,
      "rewards/verify_math_reward/mean": 0.6607142686843872,
      "rewards/verify_math_reward/std": 0.4737313687801361,
      "step": 311
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0345982142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2593.0,
      "completions/mean_length": 703.1808471679688,
      "completions/mean_terminated_length": 581.5884399414062,
      "completions/min_length": 101.0,
      "completions/min_terminated_length": 101.0,
      "epoch": 2.914285714285714,
      "grad_norm": 0.1302194595336914,
      "learning_rate": 1e-06,
      "loss": -0.0103,
      "num_tokens": 183570006.0,
      "reward": 0.6328125,
      "reward_std": 0.16622911393642426,
      "rewards/verify_math_reward/mean": 0.6328125,
      "rewards/verify_math_reward/std": 0.48230743408203125,
      "step": 312
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0613839285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3008.0,
      "completions/mean_length": 846.943115234375,
      "completions/mean_terminated_length": 634.4601440429688,
      "completions/min_length": 143.0,
      "completions/min_terminated_length": 143.0,
      "epoch": 2.923615160349854,
      "grad_norm": 0.13785329461097717,
      "learning_rate": 1e-06,
      "loss": -0.0443,
      "num_tokens": 184197243.0,
      "reward": 0.5636160969734192,
      "reward_std": 0.1965164840221405,
      "rewards/verify_math_reward/mean": 0.5636160969734192,
      "rewards/verify_math_reward/std": 0.49621346592903137,
      "step": 313
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0591517857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3083.0,
      "completions/mean_length": 844.5457763671875,
      "completions/mean_terminated_length": 640.1245727539062,
      "completions/min_length": 151.0,
      "completions/min_terminated_length": 151.0,
      "epoch": 2.932944606413994,
      "grad_norm": 0.13741254806518555,
      "learning_rate": 1e-06,
      "loss": 0.0042,
      "num_tokens": 184826340.0,
      "reward": 0.5345982313156128,
      "reward_std": 0.19531960785388947,
      "rewards/verify_math_reward/mean": 0.5345982313156128,
      "rewards/verify_math_reward/std": 0.4990801215171814,
      "step": 314
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0424107142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3755.0,
      "completions/mean_length": 752.6060791015625,
      "completions/mean_terminated_length": 604.5303344726562,
      "completions/min_length": 143.0,
      "completions/min_terminated_length": 143.0,
      "epoch": 2.942274052478134,
      "grad_norm": 0.14405564963817596,
      "learning_rate": 1e-06,
      "loss": 0.0138,
      "num_tokens": 185439739.0,
      "reward": 0.5725446939468384,
      "reward_std": 0.19828493893146515,
      "rewards/verify_math_reward/mean": 0.5725446343421936,
      "rewards/verify_math_reward/std": 0.49498558044433594,
      "step": 315
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.025669642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3917.0,
      "completions/mean_length": 671.7924194335938,
      "completions/mean_terminated_length": 581.5784912109375,
      "completions/min_length": 171.0,
      "completions/min_terminated_length": 171.0,
      "epoch": 2.951603498542274,
      "grad_norm": 0.13511620461940765,
      "learning_rate": 1e-06,
      "loss": 0.0039,
      "num_tokens": 186042529.0,
      "reward": 0.6205357313156128,
      "reward_std": 0.18201276659965515,
      "rewards/verify_math_reward/mean": 0.6205357313156128,
      "rewards/verify_math_reward/std": 0.4855247139930725,
      "step": 316
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0345982142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2425.0,
      "completions/mean_length": 769.5111694335938,
      "completions/mean_terminated_length": 650.2959594726562,
      "completions/min_length": 139.0,
      "completions/min_terminated_length": 139.0,
      "epoch": 2.960932944606414,
      "grad_norm": 0.1378905326128006,
      "learning_rate": 1e-06,
      "loss": -0.0012,
      "num_tokens": 186692347.0,
      "reward": 0.574776828289032,
      "reward_std": 0.21744313836097717,
      "rewards/verify_math_reward/mean": 0.5747767686843872,
      "rewards/verify_math_reward/std": 0.49465295672416687,
      "step": 317
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.03125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3056.0,
      "completions/mean_length": 670.747802734375,
      "completions/mean_terminated_length": 560.2557373046875,
      "completions/min_length": 161.0,
      "completions/min_terminated_length": 161.0,
      "epoch": 2.970262390670554,
      "grad_norm": 0.13824069499969482,
      "learning_rate": 1e-06,
      "loss": -0.0149,
      "num_tokens": 187267801.0,
      "reward": 0.637276828289032,
      "reward_std": 0.1700248271226883,
      "rewards/verify_math_reward/mean": 0.6372767686843872,
      "rewards/verify_math_reward/std": 0.481054425239563,
      "step": 318
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.041294642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3957.0,
      "completions/mean_length": 743.5469360351562,
      "completions/mean_terminated_length": 599.1455078125,
      "completions/min_length": 135.0,
      "completions/min_terminated_length": 135.0,
      "epoch": 2.979591836734694,
      "grad_norm": 0.147800013422966,
      "learning_rate": 1e-06,
      "loss": -0.0195,
      "num_tokens": 187872251.0,
      "reward": 0.598214328289032,
      "reward_std": 0.2073000818490982,
      "rewards/verify_math_reward/mean": 0.5982142686843872,
      "rewards/verify_math_reward/std": 0.49053287506103516,
      "step": 319
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.033482142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3505.0,
      "completions/mean_length": 719.9676513671875,
      "completions/mean_terminated_length": 603.0150146484375,
      "completions/min_length": 157.0,
      "completions/min_terminated_length": 157.0,
      "epoch": 2.9889212827988336,
      "grad_norm": 0.1389048844575882,
      "learning_rate": 1e-06,
      "loss": -0.0203,
      "num_tokens": 188491166.0,
      "reward": 0.6484375,
      "reward_std": 0.1920802742242813,
      "rewards/verify_math_reward/mean": 0.6484375,
      "rewards/verify_math_reward/std": 0.4777248501777649,
      "step": 320
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.045454545454545414,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2906.0,
      "completions/mean_length": 778.6278686523438,
      "completions/mean_terminated_length": 620.6577758789062,
      "completions/min_length": 112.0,
      "completions/min_terminated_length": 112.0,
      "epoch": 2.9982507288629736,
      "grad_norm": 0.1287021040916443,
      "learning_rate": 1e-06,
      "loss": -0.0073,
      "num_tokens": 189114243.0,
      "reward": 0.6328125,
      "reward_std": 0.17979742586612701,
      "rewards/verify_math_reward/mean": 0.6328125,
      "rewards/verify_math_reward/std": 0.48230743408203125,
      "step": 321
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0669642857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3831.0,
      "completions/mean_length": 921.7031860351562,
      "completions/mean_terminated_length": 693.8827514648438,
      "completions/min_length": 122.0,
      "completions/min_terminated_length": 122.0,
      "epoch": 3.00932944606414,
      "grad_norm": 0.13434647023677826,
      "learning_rate": 1e-06,
      "loss": -0.0095,
      "num_tokens": 189784617.0,
      "reward": 0.5100446939468384,
      "reward_std": 0.20407551527023315,
      "rewards/verify_math_reward/mean": 0.5100446343421936,
      "rewards/verify_math_reward/std": 0.5001782774925232,
      "step": 322
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0435267857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2655.0,
      "completions/mean_length": 795.4654541015625,
      "completions/mean_terminated_length": 645.2660522460938,
      "completions/min_length": 106.0,
      "completions/min_terminated_length": 106.0,
      "epoch": 3.01865889212828,
      "grad_norm": 0.12008710950613022,
      "learning_rate": 1e-06,
      "loss": -0.0149,
      "num_tokens": 190420106.0,
      "reward": 0.6160714626312256,
      "reward_std": 0.17209820449352264,
      "rewards/verify_math_reward/mean": 0.6160714030265808,
      "rewards/verify_math_reward/std": 0.486612468957901,
      "step": 323
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0457589285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4028.0,
      "completions/mean_length": 761.6808471679688,
      "completions/mean_terminated_length": 601.7894897460938,
      "completions/min_length": 155.0,
      "completions/min_terminated_length": 155.0,
      "epoch": 3.02798833819242,
      "grad_norm": 0.13155941665172577,
      "learning_rate": 1e-06,
      "loss": -0.0158,
      "num_tokens": 191024300.0,
      "reward": 0.5691964626312256,
      "reward_std": 0.1790817528963089,
      "rewards/verify_math_reward/mean": 0.5691964030265808,
      "rewards/verify_math_reward/std": 0.4954652786254883,
      "step": 324
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0379464285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3180.0,
      "completions/mean_length": 740.3035888671875,
      "completions/mean_terminated_length": 607.9442749023438,
      "completions/min_length": 137.0,
      "completions/min_terminated_length": 137.0,
      "epoch": 3.03731778425656,
      "grad_norm": 0.13252215087413788,
      "learning_rate": 1e-06,
      "loss": 0.0011,
      "num_tokens": 191638940.0,
      "reward": 0.629464328289032,
      "reward_std": 0.1604149043560028,
      "rewards/verify_math_reward/mean": 0.6294642686843872,
      "rewards/verify_math_reward/std": 0.4832179844379425,
      "step": 325
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.030133928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3468.0,
      "completions/mean_length": 683.068115234375,
      "completions/mean_terminated_length": 577.0276489257812,
      "completions/min_length": 174.0,
      "completions/min_terminated_length": 174.0,
      "epoch": 3.0466472303206995,
      "grad_norm": 0.13193079829216003,
      "learning_rate": 1e-06,
      "loss": -0.0047,
      "num_tokens": 192220641.0,
      "reward": 0.6171875,
      "reward_std": 0.16788770258426666,
      "rewards/verify_math_reward/mean": 0.6171875,
      "rewards/verify_math_reward/std": 0.4863446056842804,
      "step": 326
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0390625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3660.0,
      "completions/mean_length": 777.6105346679688,
      "completions/mean_terminated_length": 642.7166137695312,
      "completions/min_length": 180.0,
      "completions/min_terminated_length": 180.0,
      "epoch": 3.0559766763848395,
      "grad_norm": 0.12973052263259888,
      "learning_rate": 1e-06,
      "loss": -0.0079,
      "num_tokens": 192866172.0,
      "reward": 0.609375,
      "reward_std": 0.16398420929908752,
      "rewards/verify_math_reward/mean": 0.609375,
      "rewards/verify_math_reward/std": 0.48816296458244324,
      "step": 327
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.041294642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2679.0,
      "completions/mean_length": 752.8348388671875,
      "completions/mean_terminated_length": 608.8335571289062,
      "completions/min_length": 167.0,
      "completions/min_terminated_length": 167.0,
      "epoch": 3.0653061224489795,
      "grad_norm": 0.1253746896982193,
      "learning_rate": 1e-06,
      "loss": -0.0106,
      "num_tokens": 193481912.0,
      "reward": 0.6004464626312256,
      "reward_std": 0.16642162203788757,
      "rewards/verify_math_reward/mean": 0.6004464030265808,
      "rewards/verify_math_reward/std": 0.49008017778396606,
      "step": 328
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0457589285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3062.0,
      "completions/mean_length": 734.8326416015625,
      "completions/mean_terminated_length": 573.65380859375,
      "completions/min_length": 148.0,
      "completions/min_terminated_length": 148.0,
      "epoch": 3.0746355685131195,
      "grad_norm": 0.12909363210201263,
      "learning_rate": 1e-06,
      "loss": -0.0075,
      "num_tokens": 194063194.0,
      "reward": 0.6428571939468384,
      "reward_std": 0.15097863972187042,
      "rewards/verify_math_reward/mean": 0.6428571343421936,
      "rewards/verify_math_reward/std": 0.4794250428676605,
      "step": 329
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.052455357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3504.0,
      "completions/mean_length": 821.8214721679688,
      "completions/mean_terminated_length": 640.5653686523438,
      "completions/min_length": 133.0,
      "completions/min_terminated_length": 133.0,
      "epoch": 3.0839650145772595,
      "grad_norm": 0.13262464106082916,
      "learning_rate": 1e-06,
      "loss": -0.0178,
      "num_tokens": 194702770.0,
      "reward": 0.5401785969734192,
      "reward_std": 0.20598556101322174,
      "rewards/verify_math_reward/mean": 0.5401785969734192,
      "rewards/verify_math_reward/std": 0.49866142868995667,
      "step": 330
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0357142857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3871.0,
      "completions/mean_length": 725.6373291015625,
      "completions/mean_terminated_length": 600.8090209960938,
      "completions/min_length": 117.0,
      "completions/min_terminated_length": 117.0,
      "epoch": 3.0932944606413995,
      "grad_norm": 0.1538906842470169,
      "learning_rate": 1e-06,
      "loss": -0.0009,
      "num_tokens": 195308645.0,
      "reward": 0.6462053656578064,
      "reward_std": 0.20779426395893097,
      "rewards/verify_math_reward/mean": 0.6462053656578064,
      "rewards/verify_math_reward/std": 0.478413462638855,
      "step": 331
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0379464285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2818.0,
      "completions/mean_length": 733.4788208007812,
      "completions/mean_terminated_length": 600.850341796875,
      "completions/min_length": 125.0,
      "completions/min_terminated_length": 125.0,
      "epoch": 3.1026239067055394,
      "grad_norm": 0.13098306953907013,
      "learning_rate": 1e-06,
      "loss": -0.0074,
      "num_tokens": 195913794.0,
      "reward": 0.6361607313156128,
      "reward_std": 0.17325936257839203,
      "rewards/verify_math_reward/mean": 0.6361607313156128,
      "rewards/verify_math_reward/std": 0.4813718795776367,
      "step": 332
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0379464285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2617.0,
      "completions/mean_length": 677.46875,
      "completions/mean_terminated_length": 542.6310424804688,
      "completions/min_length": 119.0,
      "completions/min_terminated_length": 119.0,
      "epoch": 3.1119533527696794,
      "grad_norm": 0.12563659250736237,
      "learning_rate": 1e-06,
      "loss": -0.0169,
      "num_tokens": 196476870.0,
      "reward": 0.660714328289032,
      "reward_std": 0.148612841963768,
      "rewards/verify_math_reward/mean": 0.6607142686843872,
      "rewards/verify_math_reward/std": 0.4737313389778137,
      "step": 333
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0479910714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3885.0,
      "completions/mean_length": 813.8158569335938,
      "completions/mean_terminated_length": 648.35986328125,
      "completions/min_length": 169.0,
      "completions/min_terminated_length": 169.0,
      "epoch": 3.1212827988338194,
      "grad_norm": 0.13097511231899261,
      "learning_rate": 1e-06,
      "loss": -0.0096,
      "num_tokens": 197130753.0,
      "reward": 0.5814732313156128,
      "reward_std": 0.17179329693317413,
      "rewards/verify_math_reward/mean": 0.5814732313156128,
      "rewards/verify_math_reward/std": 0.4935929775238037,
      "step": 334
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.041294642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3680.0,
      "completions/mean_length": 707.6663208007812,
      "completions/mean_terminated_length": 561.7194213867188,
      "completions/min_length": 154.0,
      "completions/min_terminated_length": 154.0,
      "epoch": 3.130612244897959,
      "grad_norm": 0.14367610216140747,
      "learning_rate": 1e-06,
      "loss": -0.0061,
      "num_tokens": 197694814.0,
      "reward": 0.6930803656578064,
      "reward_std": 0.18543826043605804,
      "rewards/verify_math_reward/mean": 0.6930803656578064,
      "rewards/verify_math_reward/std": 0.46147334575653076,
      "step": 335
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0424107142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3441.0,
      "completions/mean_length": 784.130615234375,
      "completions/mean_terminated_length": 637.4510498046875,
      "completions/min_length": 194.0,
      "completions/min_terminated_length": 194.0,
      "epoch": 3.139941690962099,
      "grad_norm": 0.12637105584144592,
      "learning_rate": 1e-06,
      "loss": -0.0138,
      "num_tokens": 198332083.0,
      "reward": 0.5680803656578064,
      "reward_std": 0.1907668560743332,
      "rewards/verify_math_reward/mean": 0.5680803656578064,
      "rewards/verify_math_reward/std": 0.4956200420856476,
      "step": 336
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.041294642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2695.0,
      "completions/mean_length": 718.239990234375,
      "completions/mean_terminated_length": 572.74853515625,
      "completions/min_length": 170.0,
      "completions/min_terminated_length": 170.0,
      "epoch": 3.149271137026239,
      "grad_norm": 0.13643290102481842,
      "learning_rate": 1e-06,
      "loss": -0.0233,
      "num_tokens": 198911378.0,
      "reward": 0.6696428656578064,
      "reward_std": 0.17818161845207214,
      "rewards/verify_math_reward/mean": 0.6696428656578064,
      "rewards/verify_math_reward/std": 0.47060438990592957,
      "step": 337
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0747767857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3612.0,
      "completions/mean_length": 953.9129638671875,
      "completions/mean_terminated_length": 699.9686279296875,
      "completions/min_length": 94.0,
      "completions/min_terminated_length": 94.0,
      "epoch": 3.158600583090379,
      "grad_norm": 0.14042812585830688,
      "learning_rate": 1e-06,
      "loss": -0.0225,
      "num_tokens": 199606236.0,
      "reward": 0.5078125,
      "reward_std": 0.20572523772716522,
      "rewards/verify_math_reward/mean": 0.5078125,
      "rewards/verify_math_reward/std": 0.5002182126045227,
      "step": 338
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.046875,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3683.0,
      "completions/mean_length": 801.0625610351562,
      "completions/mean_terminated_length": 639.016357421875,
      "completions/min_length": 147.0,
      "completions/min_terminated_length": 147.0,
      "epoch": 3.167930029154519,
      "grad_norm": 0.12723299860954285,
      "learning_rate": 1e-06,
      "loss": 0.0063,
      "num_tokens": 200244484.0,
      "reward": 0.625,
      "reward_std": 0.1662386953830719,
      "rewards/verify_math_reward/mean": 0.625,
      "rewards/verify_math_reward/std": 0.48439329862594604,
      "step": 339
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.049107142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3886.0,
      "completions/mean_length": 784.4241333007812,
      "completions/mean_terminated_length": 613.4037475585938,
      "completions/min_length": 156.0,
      "completions/min_terminated_length": 156.0,
      "epoch": 3.177259475218659,
      "grad_norm": 0.14140678942203522,
      "learning_rate": 1e-06,
      "loss": -0.01,
      "num_tokens": 200852000.0,
      "reward": 0.6272321939468384,
      "reward_std": 0.18426865339279175,
      "rewards/verify_math_reward/mean": 0.6272321343421936,
      "rewards/verify_math_reward/std": 0.4838111400604248,
      "step": 340
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.03125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3868.0,
      "completions/mean_length": 730.6752319335938,
      "completions/mean_terminated_length": 622.1163330078125,
      "completions/min_length": 138.0,
      "completions/min_terminated_length": 138.0,
      "epoch": 3.186588921282799,
      "grad_norm": 0.1404697149991989,
      "learning_rate": 1e-06,
      "loss": 0.0014,
      "num_tokens": 201474669.0,
      "reward": 0.6205357313156128,
      "reward_std": 0.2093295454978943,
      "rewards/verify_math_reward/mean": 0.6205357313156128,
      "rewards/verify_math_reward/std": 0.4855247139930725,
      "step": 341
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0535714285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3940.0,
      "completions/mean_length": 855.9699096679688,
      "completions/mean_terminated_length": 672.5719604492188,
      "completions/min_length": 146.0,
      "completions/min_terminated_length": 146.0,
      "epoch": 3.195918367346939,
      "grad_norm": 0.1275302618741989,
      "learning_rate": 1e-06,
      "loss": -0.0152,
      "num_tokens": 202138482.0,
      "reward": 0.5725446939468384,
      "reward_std": 0.20038749277591705,
      "rewards/verify_math_reward/mean": 0.5725446343421936,
      "rewards/verify_math_reward/std": 0.49498558044433594,
      "step": 342
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0613839285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2908.0,
      "completions/mean_length": 827.2846069335938,
      "completions/mean_terminated_length": 613.5160522460938,
      "completions/min_length": 165.0,
      "completions/min_terminated_length": 165.0,
      "epoch": 3.205247813411079,
      "grad_norm": 0.14738671481609344,
      "learning_rate": 1e-06,
      "loss": -0.0267,
      "num_tokens": 202740553.0,
      "reward": 0.6417410969734192,
      "reward_std": 0.21538016200065613,
      "rewards/verify_math_reward/mean": 0.6417410969734192,
      "rewards/verify_math_reward/std": 0.47975653409957886,
      "step": 343
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.046875,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2358.0,
      "completions/mean_length": 794.0335083007812,
      "completions/mean_terminated_length": 631.6416625976562,
      "completions/min_length": 144.0,
      "completions/min_terminated_length": 144.0,
      "epoch": 3.2145772594752184,
      "grad_norm": 0.13328631222248077,
      "learning_rate": 1e-06,
      "loss": -0.0075,
      "num_tokens": 203368535.0,
      "reward": 0.5792410969734192,
      "reward_std": 0.17145699262619019,
      "rewards/verify_math_reward/mean": 0.5792410969734192,
      "rewards/verify_math_reward/std": 0.49395665526390076,
      "step": 344
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0401785714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3802.0,
      "completions/mean_length": 832.3995971679688,
      "completions/mean_terminated_length": 695.78369140625,
      "completions/min_length": 130.0,
      "completions/min_terminated_length": 130.0,
      "epoch": 3.2239067055393584,
      "grad_norm": 0.1348450630903244,
      "learning_rate": 1e-06,
      "loss": -0.0015,
      "num_tokens": 204061693.0,
      "reward": 0.574776828289032,
      "reward_std": 0.18201346695423126,
      "rewards/verify_math_reward/mean": 0.5747767686843872,
      "rewards/verify_math_reward/std": 0.49465295672416687,
      "step": 345
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0457589285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2567.0,
      "completions/mean_length": 774.4877319335938,
      "completions/mean_terminated_length": 615.2105102539062,
      "completions/min_length": 160.0,
      "completions/min_terminated_length": 160.0,
      "epoch": 3.2332361516034984,
      "grad_norm": 0.13413555920124054,
      "learning_rate": 1e-06,
      "loss": 0.0046,
      "num_tokens": 204671650.0,
      "reward": 0.660714328289032,
      "reward_std": 0.14530597627162933,
      "rewards/verify_math_reward/mean": 0.6607142686843872,
      "rewards/verify_math_reward/std": 0.4737313687801361,
      "step": 346
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.046875,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3891.0,
      "completions/mean_length": 791.2344360351562,
      "completions/mean_terminated_length": 628.7048950195312,
      "completions/min_length": 123.0,
      "completions/min_terminated_length": 123.0,
      "epoch": 3.2425655976676384,
      "grad_norm": 0.13582055270671844,
      "learning_rate": 1e-06,
      "loss": -0.0156,
      "num_tokens": 205299796.0,
      "reward": 0.6104910969734192,
      "reward_std": 0.19294606149196625,
      "rewards/verify_math_reward/mean": 0.6104910969734192,
      "rewards/verify_math_reward/std": 0.48791125416755676,
      "step": 347
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0379464285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2645.0,
      "completions/mean_length": 706.6217041015625,
      "completions/mean_terminated_length": 572.933837890625,
      "completions/min_length": 144.0,
      "completions/min_terminated_length": 144.0,
      "epoch": 3.2518950437317784,
      "grad_norm": 0.13521024584770203,
      "learning_rate": 1e-06,
      "loss": -0.0113,
      "num_tokens": 205884865.0,
      "reward": 0.645089328289032,
      "reward_std": 0.18464843928813934,
      "rewards/verify_math_reward/mean": 0.6450892686843872,
      "rewards/verify_math_reward/std": 0.4787535071372986,
      "step": 348
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0814732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4049.0,
      "completions/mean_length": 996.7288208007812,
      "completions/mean_terminated_length": 721.8238525390625,
      "completions/min_length": 146.0,
      "completions/min_terminated_length": 146.0,
      "epoch": 3.2612244897959184,
      "grad_norm": 0.12205115705728531,
      "learning_rate": 1e-06,
      "loss": -0.0238,
      "num_tokens": 206573774.0,
      "reward": 0.5089285969734192,
      "reward_std": 0.17626340687274933,
      "rewards/verify_math_reward/mean": 0.5089285969734192,
      "rewards/verify_math_reward/std": 0.5001994967460632,
      "step": 349
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.049107142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2760.0,
      "completions/mean_length": 789.8225708007812,
      "completions/mean_terminated_length": 619.0809936523438,
      "completions/min_length": 126.0,
      "completions/min_terminated_length": 126.0,
      "epoch": 3.2705539358600584,
      "grad_norm": 0.13562090694904327,
      "learning_rate": 1e-06,
      "loss": -0.027,
      "num_tokens": 207193695.0,
      "reward": 0.5558035969734192,
      "reward_std": 0.18370482325553894,
      "rewards/verify_math_reward/mean": 0.5558035969734192,
      "rewards/verify_math_reward/std": 0.49715372920036316,
      "step": 350
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0558035714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3762.0,
      "completions/mean_length": 830.6975708007812,
      "completions/mean_terminated_length": 637.7127685546875,
      "completions/min_length": 121.0,
      "completions/min_terminated_length": 121.0,
      "epoch": 3.2798833819241984,
      "grad_norm": 0.13589246571063995,
      "learning_rate": 1e-06,
      "loss": -0.0036,
      "num_tokens": 207819856.0,
      "reward": 0.5803571939468384,
      "reward_std": 0.1640915721654892,
      "rewards/verify_math_reward/mean": 0.5803571343421936,
      "rewards/verify_math_reward/std": 0.4937761127948761,
      "step": 351
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.052455357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2989.0,
      "completions/mean_length": 877.3136596679688,
      "completions/mean_terminated_length": 699.1295776367188,
      "completions/min_length": 133.0,
      "completions/min_terminated_length": 133.0,
      "epoch": 3.2892128279883384,
      "grad_norm": 0.1251857578754425,
      "learning_rate": 1e-06,
      "loss": -0.0371,
      "num_tokens": 208508065.0,
      "reward": 0.6015625,
      "reward_std": 0.19414816796779633,
      "rewards/verify_math_reward/mean": 0.6015625,
      "rewards/verify_math_reward/std": 0.48984986543655396,
      "step": 352
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0591517857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3697.0,
      "completions/mean_length": 823.2980346679688,
      "completions/mean_terminated_length": 617.5409545898438,
      "completions/min_length": 141.0,
      "completions/min_terminated_length": 141.0,
      "epoch": 3.298542274052478,
      "grad_norm": 0.14291563630104065,
      "learning_rate": 1e-06,
      "loss": -0.0214,
      "num_tokens": 209122172.0,
      "reward": 0.6339285969734192,
      "reward_std": 0.19520379602909088,
      "rewards/verify_math_reward/mean": 0.6339285969734192,
      "rewards/verify_math_reward/std": 0.48199838399887085,
      "step": 353
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0558035714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4090.0,
      "completions/mean_length": 848.1451416015625,
      "completions/mean_terminated_length": 656.1914672851562,
      "completions/min_length": 144.0,
      "completions/min_terminated_length": 144.0,
      "epoch": 3.307871720116618,
      "grad_norm": 0.13552476465702057,
      "learning_rate": 1e-06,
      "loss": -0.0086,
      "num_tokens": 209768230.0,
      "reward": 0.5602678656578064,
      "reward_std": 0.19181881844997406,
      "rewards/verify_math_reward/mean": 0.5602678656578064,
      "rewards/verify_math_reward/std": 0.4966317415237427,
      "step": 354
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0279017857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3176.0,
      "completions/mean_length": 659.3795166015625,
      "completions/mean_terminated_length": 560.7393798828125,
      "completions/min_length": 153.0,
      "completions/min_terminated_length": 153.0,
      "epoch": 3.317201166180758,
      "grad_norm": 0.13598616421222687,
      "learning_rate": 1e-06,
      "loss": 0.0058,
      "num_tokens": 210334002.0,
      "reward": 0.7042410969734192,
      "reward_std": 0.14478211104869843,
      "rewards/verify_math_reward/mean": 0.7042410969734192,
      "rewards/verify_math_reward/std": 0.45663803815841675,
      "step": 355
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0457589285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3969.0,
      "completions/mean_length": 791.5814819335938,
      "completions/mean_terminated_length": 633.1239624023438,
      "completions/min_length": 128.0,
      "completions/min_terminated_length": 128.0,
      "epoch": 3.326530612244898,
      "grad_norm": 0.1426772177219391,
      "learning_rate": 1e-06,
      "loss": -0.0031,
      "num_tokens": 210966083.0,
      "reward": 0.6584821939468384,
      "reward_std": 0.20719128847122192,
      "rewards/verify_math_reward/mean": 0.6584821343421936,
      "rewards/verify_math_reward/std": 0.4744836091995239,
      "step": 356
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0535714285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3949.0,
      "completions/mean_length": 876.513427734375,
      "completions/mean_terminated_length": 694.2783203125,
      "completions/min_length": 155.0,
      "completions/min_terminated_length": 155.0,
      "epoch": 3.335860058309038,
      "grad_norm": 0.13519161939620972,
      "learning_rate": 1e-06,
      "loss": -0.0165,
      "num_tokens": 211651935.0,
      "reward": 0.546875,
      "reward_std": 0.19663341343402863,
      "rewards/verify_math_reward/mean": 0.546875,
      "rewards/verify_math_reward/std": 0.4980759024620056,
      "step": 357
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0479910714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3788.0,
      "completions/mean_length": 769.3594360351562,
      "completions/mean_terminated_length": 601.662353515625,
      "completions/min_length": 146.0,
      "completions/min_terminated_length": 146.0,
      "epoch": 3.345189504373178,
      "grad_norm": 0.14134107530117035,
      "learning_rate": 1e-06,
      "loss": -0.0208,
      "num_tokens": 212248921.0,
      "reward": 0.65625,
      "reward_std": 0.18475720286369324,
      "rewards/verify_math_reward/mean": 0.65625,
      "rewards/verify_math_reward/std": 0.4752241373062134,
      "step": 358
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0703125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3232.0,
      "completions/mean_length": 898.0324096679688,
      "completions/mean_terminated_length": 656.1692504882812,
      "completions/min_length": 94.0,
      "completions/min_terminated_length": 94.0,
      "epoch": 3.354518950437318,
      "grad_norm": 0.12622913718223572,
      "learning_rate": 1e-06,
      "loss": -0.0161,
      "num_tokens": 212890230.0,
      "reward": 0.5870535969734192,
      "reward_std": 0.16855987906455994,
      "rewards/verify_math_reward/mean": 0.5870535969734192,
      "rewards/verify_math_reward/std": 0.49263837933540344,
      "step": 359
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0636160714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3392.0,
      "completions/mean_length": 845.3672485351562,
      "completions/mean_terminated_length": 624.525634765625,
      "completions/min_length": 148.0,
      "completions/min_terminated_length": 148.0,
      "epoch": 3.363848396501458,
      "grad_norm": 0.13135181367397308,
      "learning_rate": 1e-06,
      "loss": -0.0056,
      "num_tokens": 213505119.0,
      "reward": 0.6171875,
      "reward_std": 0.16390681266784668,
      "rewards/verify_math_reward/mean": 0.6171875,
      "rewards/verify_math_reward/std": 0.4863446056842804,
      "step": 360
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0647321428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3638.0,
      "completions/mean_length": 812.7600708007812,
      "completions/mean_terminated_length": 585.5191040039062,
      "completions/min_length": 144.0,
      "completions/min_terminated_length": 144.0,
      "epoch": 3.373177842565598,
      "grad_norm": 0.14955846965312958,
      "learning_rate": 1e-06,
      "loss": -0.0245,
      "num_tokens": 214096504.0,
      "reward": 0.6272321939468384,
      "reward_std": 0.18513557314872742,
      "rewards/verify_math_reward/mean": 0.6272321343421936,
      "rewards/verify_math_reward/std": 0.4838111698627472,
      "step": 361
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0379464285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3908.0,
      "completions/mean_length": 715.8527221679688,
      "completions/mean_terminated_length": 582.5289916992188,
      "completions/min_length": 162.0,
      "completions/min_terminated_length": 162.0,
      "epoch": 3.3825072886297374,
      "grad_norm": 0.15491895377635956,
      "learning_rate": 1e-06,
      "loss": -0.0,
      "num_tokens": 214684796.0,
      "reward": 0.645089328289032,
      "reward_std": 0.21222344040870667,
      "rewards/verify_math_reward/mean": 0.6450892686843872,
      "rewards/verify_math_reward/std": 0.4787535071372986,
      "step": 362
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0424107142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2995.0,
      "completions/mean_length": 782.0413208007812,
      "completions/mean_terminated_length": 635.2692260742188,
      "completions/min_length": 140.0,
      "completions/min_terminated_length": 140.0,
      "epoch": 3.3918367346938774,
      "grad_norm": 0.14674150943756104,
      "learning_rate": 1e-06,
      "loss": -0.0152,
      "num_tokens": 215319953.0,
      "reward": 0.543526828289032,
      "reward_std": 0.20241807401180267,
      "rewards/verify_math_reward/mean": 0.5435267686843872,
      "rewards/verify_math_reward/std": 0.49838000535964966,
      "step": 363
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0558035714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3937.0,
      "completions/mean_length": 815.5201416015625,
      "completions/mean_terminated_length": 621.6383056640625,
      "completions/min_length": 160.0,
      "completions/min_terminated_length": 160.0,
      "epoch": 3.4011661807580174,
      "grad_norm": 0.13411541283130646,
      "learning_rate": 1e-06,
      "loss": 0.002,
      "num_tokens": 215943707.0,
      "reward": 0.5892857313156128,
      "reward_std": 0.17900757491588593,
      "rewards/verify_math_reward/mean": 0.5892857313156128,
      "rewards/verify_math_reward/std": 0.49223822355270386,
      "step": 364
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0435267857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3950.0,
      "completions/mean_length": 784.443115234375,
      "completions/mean_terminated_length": 633.7421264648438,
      "completions/min_length": 108.0,
      "completions/min_terminated_length": 108.0,
      "epoch": 3.4104956268221573,
      "grad_norm": 0.12934422492980957,
      "learning_rate": 1e-06,
      "loss": -0.0098,
      "num_tokens": 216571784.0,
      "reward": 0.6395089626312256,
      "reward_std": 0.16473278403282166,
      "rewards/verify_math_reward/mean": 0.6395089030265808,
      "rewards/verify_math_reward/std": 0.4804111421108246,
      "step": 365
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0345982142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3971.0,
      "completions/mean_length": 796.396240234375,
      "completions/mean_terminated_length": 678.1444702148438,
      "completions/min_length": 167.0,
      "completions/min_terminated_length": 167.0,
      "epoch": 3.4198250728862973,
      "grad_norm": 0.1159258708357811,
      "learning_rate": 1e-06,
      "loss": 0.0024,
      "num_tokens": 217260379.0,
      "reward": 0.5479910969734192,
      "reward_std": 0.18103523552417755,
      "rewards/verify_math_reward/mean": 0.5479910969734192,
      "rewards/verify_math_reward/std": 0.49796950817108154,
      "step": 366
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.060267857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3105.0,
      "completions/mean_length": 846.7154541015625,
      "completions/mean_terminated_length": 638.3289794921875,
      "completions/min_length": 129.0,
      "completions/min_terminated_length": 129.0,
      "epoch": 3.4291545189504373,
      "grad_norm": 0.1376204788684845,
      "learning_rate": 1e-06,
      "loss": -0.0124,
      "num_tokens": 217896060.0,
      "reward": 0.5859375,
      "reward_std": 0.16029544174671173,
      "rewards/verify_math_reward/mean": 0.5859375,
      "rewards/verify_math_reward/std": 0.4928344786167145,
      "step": 367
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0379464285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2955.0,
      "completions/mean_length": 704.5178833007812,
      "completions/mean_terminated_length": 570.7470703125,
      "completions/min_length": 139.0,
      "completions/min_terminated_length": 139.0,
      "epoch": 3.4384839650145773,
      "grad_norm": 0.14520323276519775,
      "learning_rate": 1e-06,
      "loss": -0.0186,
      "num_tokens": 218472124.0,
      "reward": 0.6930803656578064,
      "reward_std": 0.17577669024467468,
      "rewards/verify_math_reward/mean": 0.6930803656578064,
      "rewards/verify_math_reward/std": 0.46147337555885315,
      "step": 368
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0647321428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2264.0,
      "completions/mean_length": 805.7455444335938,
      "completions/mean_terminated_length": 578.0191040039062,
      "completions/min_length": 174.0,
      "completions/min_terminated_length": 174.0,
      "epoch": 3.4478134110787173,
      "grad_norm": 0.1559911072254181,
      "learning_rate": 1e-06,
      "loss": -0.0396,
      "num_tokens": 219056856.0,
      "reward": 0.5814732313156128,
      "reward_std": 0.1884777694940567,
      "rewards/verify_math_reward/mean": 0.5814732313156128,
      "rewards/verify_math_reward/std": 0.4935929775238037,
      "step": 369
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0591517857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3876.0,
      "completions/mean_length": 855.8839721679688,
      "completions/mean_terminated_length": 652.1755981445312,
      "completions/min_length": 151.0,
      "completions/min_terminated_length": 151.0,
      "epoch": 3.4571428571428573,
      "grad_norm": 0.13544338941574097,
      "learning_rate": 1e-06,
      "loss": -0.028,
      "num_tokens": 219692504.0,
      "reward": 0.5892857313156128,
      "reward_std": 0.1836727261543274,
      "rewards/verify_math_reward/mean": 0.5892857313156128,
      "rewards/verify_math_reward/std": 0.49223825335502625,
      "step": 370
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.033482142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3912.0,
      "completions/mean_length": 712.3582763671875,
      "completions/mean_terminated_length": 595.1420288085938,
      "completions/min_length": 146.0,
      "completions/min_terminated_length": 146.0,
      "epoch": 3.466472303206997,
      "grad_norm": 0.12585312128067017,
      "learning_rate": 1e-06,
      "loss": 0.0096,
      "num_tokens": 220293105.0,
      "reward": 0.6707589626312256,
      "reward_std": 0.17404918372631073,
      "rewards/verify_math_reward/mean": 0.6707589030265808,
      "rewards/verify_math_reward/std": 0.4702001214027405,
      "step": 371
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3040.0,
      "completions/mean_length": 826.2098388671875,
      "completions/mean_terminated_length": 608.2238159179688,
      "completions/min_length": 117.0,
      "completions/min_terminated_length": 117.0,
      "epoch": 3.4758017492711373,
      "grad_norm": 0.14365816116333008,
      "learning_rate": 1e-06,
      "loss": -0.0285,
      "num_tokens": 220900005.0,
      "reward": 0.6104910969734192,
      "reward_std": 0.20459476113319397,
      "rewards/verify_math_reward/mean": 0.6104910969734192,
      "rewards/verify_math_reward/std": 0.48791125416755676,
      "step": 372
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.056919642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3636.0,
      "completions/mean_length": 827.7076416015625,
      "completions/mean_terminated_length": 630.44970703125,
      "completions/min_length": 172.0,
      "completions/min_terminated_length": 172.0,
      "epoch": 3.485131195335277,
      "grad_norm": 0.14375828206539154,
      "learning_rate": 1e-06,
      "loss": -0.018,
      "num_tokens": 221522103.0,
      "reward": 0.613839328289032,
      "reward_std": 0.18352049589157104,
      "rewards/verify_math_reward/mean": 0.6138392686843872,
      "rewards/verify_math_reward/std": 0.48714008927345276,
      "step": 373
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0703125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3994.0,
      "completions/mean_length": 876.318115234375,
      "completions/mean_terminated_length": 632.8126831054688,
      "completions/min_length": 171.0,
      "completions/min_terminated_length": 171.0,
      "epoch": 3.494460641399417,
      "grad_norm": 0.13252145051956177,
      "learning_rate": 1e-06,
      "loss": -0.0201,
      "num_tokens": 222146820.0,
      "reward": 0.6015625,
      "reward_std": 0.15887469053268433,
      "rewards/verify_math_reward/mean": 0.6015625,
      "rewards/verify_math_reward/std": 0.48984986543655396,
      "step": 374
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0535714285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3752.0,
      "completions/mean_length": 815.950927734375,
      "completions/mean_terminated_length": 630.2877197265625,
      "completions/min_length": 158.0,
      "completions/min_terminated_length": 158.0,
      "epoch": 3.503790087463557,
      "grad_norm": 0.1298367977142334,
      "learning_rate": 1e-06,
      "loss": -0.0095,
      "num_tokens": 222759280.0,
      "reward": 0.566964328289032,
      "reward_std": 0.16679365932941437,
      "rewards/verify_math_reward/mean": 0.5669642686843872,
      "rewards/verify_math_reward/std": 0.49577224254608154,
      "step": 375
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.060267857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3828.0,
      "completions/mean_length": 863.8660888671875,
      "completions/mean_terminated_length": 656.57958984375,
      "completions/min_length": 141.0,
      "completions/min_terminated_length": 141.0,
      "epoch": 3.513119533527697,
      "grad_norm": 0.13564053177833557,
      "learning_rate": 1e-06,
      "loss": -0.0202,
      "num_tokens": 223400536.0,
      "reward": 0.645089328289032,
      "reward_std": 0.19433686137199402,
      "rewards/verify_math_reward/mean": 0.6450892686843872,
      "rewards/verify_math_reward/std": 0.4787535071372986,
      "step": 376
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.049107142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3322.0,
      "completions/mean_length": 843.8370971679688,
      "completions/mean_terminated_length": 675.885009765625,
      "completions/min_length": 81.0,
      "completions/min_terminated_length": 81.0,
      "epoch": 3.522448979591837,
      "grad_norm": 0.14007282257080078,
      "learning_rate": 1e-06,
      "loss": -0.0381,
      "num_tokens": 224072686.0,
      "reward": 0.6004464626312256,
      "reward_std": 0.19309763610363007,
      "rewards/verify_math_reward/mean": 0.6004464030265808,
      "rewards/verify_math_reward/std": 0.49008017778396606,
      "step": 377
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0881696428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3827.0,
      "completions/mean_length": 936.2188110351562,
      "completions/mean_terminated_length": 630.6829833984375,
      "completions/min_length": 189.0,
      "completions/min_terminated_length": 189.0,
      "epoch": 3.5317784256559768,
      "grad_norm": 0.14223435521125793,
      "learning_rate": 1e-06,
      "loss": -0.0286,
      "num_tokens": 224682810.0,
      "reward": 0.5848214626312256,
      "reward_std": 0.18006137013435364,
      "rewards/verify_math_reward/mean": 0.5848214030265808,
      "rewards/verify_math_reward/std": 0.49302801489830017,
      "step": 378
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.049107142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3849.0,
      "completions/mean_length": 807.4642944335938,
      "completions/mean_terminated_length": 637.6337890625,
      "completions/min_length": 140.0,
      "completions/min_terminated_length": 140.0,
      "epoch": 3.5411078717201168,
      "grad_norm": 0.14385223388671875,
      "learning_rate": 1e-06,
      "loss": -0.0125,
      "num_tokens": 225310714.0,
      "reward": 0.6640625,
      "reward_std": 0.20113424956798553,
      "rewards/verify_math_reward/mean": 0.6640625,
      "rewards/verify_math_reward/std": 0.4725809693336487,
      "step": 379
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2413.0,
      "completions/mean_length": 818.2366333007812,
      "completions/mean_terminated_length": 599.7190551757812,
      "completions/min_length": 120.0,
      "completions/min_terminated_length": 120.0,
      "epoch": 3.5504373177842563,
      "grad_norm": 0.1361023336648941,
      "learning_rate": 1e-06,
      "loss": -0.0249,
      "num_tokens": 225914558.0,
      "reward": 0.606026828289032,
      "reward_std": 0.1762627214193344,
      "rewards/verify_math_reward/mean": 0.6060267686843872,
      "rewards/verify_math_reward/std": 0.48890194296836853,
      "step": 380
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.030133928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3871.0,
      "completions/mean_length": 701.216552734375,
      "completions/mean_terminated_length": 595.7399291992188,
      "completions/min_length": 128.0,
      "completions/min_terminated_length": 128.0,
      "epoch": 3.5597667638483967,
      "grad_norm": 0.15030620992183685,
      "learning_rate": 1e-06,
      "loss": 0.0088,
      "num_tokens": 226515488.0,
      "reward": 0.6551339626312256,
      "reward_std": 0.20752577483654022,
      "rewards/verify_math_reward/mean": 0.6551339030265808,
      "rewards/verify_math_reward/std": 0.4755900800228119,
      "step": 381
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0546875,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2559.0,
      "completions/mean_length": 805.2801513671875,
      "completions/mean_terminated_length": 614.9078979492188,
      "completions/min_length": 150.0,
      "completions/min_terminated_length": 150.0,
      "epoch": 3.5690962099125363,
      "grad_norm": 0.13644549250602722,
      "learning_rate": 1e-06,
      "loss": -0.0447,
      "num_tokens": 227127147.0,
      "reward": 0.598214328289032,
      "reward_std": 0.1886281818151474,
      "rewards/verify_math_reward/mean": 0.5982142686843872,
      "rewards/verify_math_reward/std": 0.49053287506103516,
      "step": 382
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0390625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2530.0,
      "completions/mean_length": 751.3248291015625,
      "completions/mean_terminated_length": 615.3623657226562,
      "completions/min_length": 104.0,
      "completions/min_terminated_length": 104.0,
      "epoch": 3.5784256559766763,
      "grad_norm": 0.1410011351108551,
      "learning_rate": 1e-06,
      "loss": -0.0067,
      "num_tokens": 227752982.0,
      "reward": 0.6049107313156128,
      "reward_std": 0.19899921119213104,
      "rewards/verify_math_reward/mean": 0.6049107313156128,
      "rewards/verify_math_reward/std": 0.48914292454719543,
      "step": 383
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0558035714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3538.0,
      "completions/mean_length": 792.4922485351562,
      "completions/mean_terminated_length": 597.2493896484375,
      "completions/min_length": 145.0,
      "completions/min_terminated_length": 145.0,
      "epoch": 3.5877551020408163,
      "grad_norm": 0.14608652889728546,
      "learning_rate": 1e-06,
      "loss": 0.0001,
      "num_tokens": 228351775.0,
      "reward": 0.6283482313156128,
      "reward_std": 0.16453734040260315,
      "rewards/verify_math_reward/mean": 0.6283482313156128,
      "rewards/verify_math_reward/std": 0.4835159182548523,
      "step": 384
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0457589285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2929.0,
      "completions/mean_length": 772.9308471679688,
      "completions/mean_terminated_length": 613.5789184570312,
      "completions/min_length": 162.0,
      "completions/min_terminated_length": 162.0,
      "epoch": 3.5970845481049563,
      "grad_norm": 0.13708025217056274,
      "learning_rate": 1e-06,
      "loss": -0.0003,
      "num_tokens": 228969657.0,
      "reward": 0.6238839626312256,
      "reward_std": 0.17577669024467468,
      "rewards/verify_math_reward/mean": 0.6238839030265808,
      "rewards/verify_math_reward/std": 0.4846802353858948,
      "step": 385
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.046875,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3409.0,
      "completions/mean_length": 806.5625610351562,
      "completions/mean_terminated_length": 644.786865234375,
      "completions/min_length": 150.0,
      "completions/min_terminated_length": 150.0,
      "epoch": 3.6064139941690962,
      "grad_norm": 0.13801677525043488,
      "learning_rate": 1e-06,
      "loss": -0.0027,
      "num_tokens": 229603593.0,
      "reward": 0.5948660969734192,
      "reward_std": 0.1791912168264389,
      "rewards/verify_math_reward/mean": 0.5948660969734192,
      "rewards/verify_math_reward/std": 0.49119213223457336,
      "step": 386
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0591517857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2749.0,
      "completions/mean_length": 845.9074096679688,
      "completions/mean_terminated_length": 641.57177734375,
      "completions/min_length": 149.0,
      "completions/min_terminated_length": 149.0,
      "epoch": 3.6157434402332362,
      "grad_norm": 0.13870160281658173,
      "learning_rate": 1e-06,
      "loss": -0.0152,
      "num_tokens": 230234822.0,
      "reward": 0.6227678656578064,
      "reward_std": 0.20790626108646393,
      "rewards/verify_math_reward/mean": 0.6227678656578064,
      "rewards/verify_math_reward/std": 0.4849644899368286,
      "step": 387
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0558035714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3625.0,
      "completions/mean_length": 825.7779541015625,
      "completions/mean_terminated_length": 632.5023803710938,
      "completions/min_length": 164.0,
      "completions/min_terminated_length": 164.0,
      "epoch": 3.6250728862973762,
      "grad_norm": 0.14085707068443298,
      "learning_rate": 1e-06,
      "loss": -0.0008,
      "num_tokens": 230859255.0,
      "reward": 0.5993303656578064,
      "reward_std": 0.18580886721611023,
      "rewards/verify_math_reward/mean": 0.5993303656578064,
      "rewards/verify_math_reward/std": 0.49030786752700806,
      "step": 388
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0401785714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2616.0,
      "completions/mean_length": 697.0502319335938,
      "completions/mean_terminated_length": 554.7686157226562,
      "completions/min_length": 98.0,
      "completions/min_terminated_length": 98.0,
      "epoch": 3.6344023323615158,
      "grad_norm": 0.15169435739517212,
      "learning_rate": 1e-06,
      "loss": -0.0167,
      "num_tokens": 231425676.0,
      "reward": 0.6651785969734192,
      "reward_std": 0.18716758489608765,
      "rewards/verify_math_reward/mean": 0.6651785969734192,
      "rewards/verify_math_reward/std": 0.47219160199165344,
      "step": 389
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3494.0,
      "completions/mean_length": 888.3616333007812,
      "completions/mean_terminated_length": 674.51904296875,
      "completions/min_length": 178.0,
      "completions/min_terminated_length": 178.0,
      "epoch": 3.643731778425656,
      "grad_norm": 0.14200334250926971,
      "learning_rate": 1e-06,
      "loss": 0.0048,
      "num_tokens": 232072504.0,
      "reward": 0.5524553656578064,
      "reward_std": 0.21560657024383545,
      "rewards/verify_math_reward/mean": 0.5524553656578064,
      "rewards/verify_math_reward/std": 0.49751853942871094,
      "step": 390
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0658482142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2384.0,
      "completions/mean_length": 844.3281860351562,
      "completions/mean_terminated_length": 615.1182861328125,
      "completions/min_length": 155.0,
      "completions/min_terminated_length": 155.0,
      "epoch": 3.6530612244897958,
      "grad_norm": 0.14360344409942627,
      "learning_rate": 1e-06,
      "loss": -0.0092,
      "num_tokens": 232673902.0,
      "reward": 0.6540178656578064,
      "reward_std": 0.20388302206993103,
      "rewards/verify_math_reward/mean": 0.6540178656578064,
      "rewards/verify_math_reward/std": 0.4759531021118164,
      "step": 391
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0658482142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3132.0,
      "completions/mean_length": 868.8236694335938,
      "completions/mean_terminated_length": 641.3404541015625,
      "completions/min_length": 151.0,
      "completions/min_terminated_length": 151.0,
      "epoch": 3.6623906705539357,
      "grad_norm": 0.13329817354679108,
      "learning_rate": 1e-06,
      "loss": -0.0339,
      "num_tokens": 233309688.0,
      "reward": 0.5691964626312256,
      "reward_std": 0.16183848679065704,
      "rewards/verify_math_reward/mean": 0.5691964030265808,
      "rewards/verify_math_reward/std": 0.4954652488231659,
      "step": 392
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.03125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3744.0,
      "completions/mean_length": 700.9074096679688,
      "completions/mean_terminated_length": 591.3882446289062,
      "completions/min_length": 168.0,
      "completions/min_terminated_length": 168.0,
      "epoch": 3.6717201166180757,
      "grad_norm": 0.11687356978654861,
      "learning_rate": 1e-06,
      "loss": -0.0187,
      "num_tokens": 233918197.0,
      "reward": 0.7064732313156128,
      "reward_std": 0.1381234973669052,
      "rewards/verify_math_reward/mean": 0.7064732313156128,
      "rewards/verify_math_reward/std": 0.4556320011615753,
      "step": 393
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.044642857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3973.0,
      "completions/mean_length": 798.2098388671875,
      "completions/mean_terminated_length": 644.1074829101562,
      "completions/min_length": 147.0,
      "completions/min_terminated_length": 147.0,
      "epoch": 3.6810495626822157,
      "grad_norm": 0.13066112995147705,
      "learning_rate": 1e-06,
      "loss": -0.0009,
      "num_tokens": 234562081.0,
      "reward": 0.6183035969734192,
      "reward_std": 0.18340173363685608,
      "rewards/verify_math_reward/mean": 0.6183035969734192,
      "rewards/verify_math_reward/std": 0.4860740303993225,
      "step": 394
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0647321428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3591.0,
      "completions/mean_length": 863.0379638671875,
      "completions/mean_terminated_length": 639.27685546875,
      "completions/min_length": 120.0,
      "completions/min_terminated_length": 120.0,
      "epoch": 3.6903790087463557,
      "grad_norm": 0.12336290627717972,
      "learning_rate": 1e-06,
      "loss": -0.0232,
      "num_tokens": 235182019.0,
      "reward": 0.5892857313156128,
      "reward_std": 0.152669295668602,
      "rewards/verify_math_reward/mean": 0.5892857313156128,
      "rewards/verify_math_reward/std": 0.49223825335502625,
      "step": 395
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0636160714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3988.0,
      "completions/mean_length": 813.911865234375,
      "completions/mean_terminated_length": 590.9332275390625,
      "completions/min_length": 190.0,
      "completions/min_terminated_length": 190.0,
      "epoch": 3.6997084548104957,
      "grad_norm": 0.13793021440505981,
      "learning_rate": 1e-06,
      "loss": -0.0293,
      "num_tokens": 235766756.0,
      "reward": 0.6395089626312256,
      "reward_std": 0.17107973992824554,
      "rewards/verify_math_reward/mean": 0.6395089030265808,
      "rewards/verify_math_reward/std": 0.4804111123085022,
      "step": 396
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0424107142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3688.0,
      "completions/mean_length": 735.0938110351562,
      "completions/mean_terminated_length": 586.242431640625,
      "completions/min_length": 137.0,
      "completions/min_terminated_length": 137.0,
      "epoch": 3.7090379008746357,
      "grad_norm": 0.1464046835899353,
      "learning_rate": 1e-06,
      "loss": 0.0095,
      "num_tokens": 236356112.0,
      "reward": 0.6551339626312256,
      "reward_std": 0.1889663189649582,
      "rewards/verify_math_reward/mean": 0.6551339030265808,
      "rewards/verify_math_reward/std": 0.4755900502204895,
      "step": 397
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.060267857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3841.0,
      "completions/mean_length": 872.0123291015625,
      "completions/mean_terminated_length": 665.2482299804688,
      "completions/min_length": 156.0,
      "completions/min_terminated_length": 156.0,
      "epoch": 3.7183673469387752,
      "grad_norm": 0.1336894929409027,
      "learning_rate": 1e-06,
      "loss": -0.0191,
      "num_tokens": 237006867.0,
      "reward": 0.5714285969734192,
      "reward_std": 0.18799132108688354,
      "rewards/verify_math_reward/mean": 0.5714285969734192,
      "rewards/verify_math_reward/std": 0.49514803290367126,
      "step": 398
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0323660714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3535.0,
      "completions/mean_length": 691.0391235351562,
      "completions/mean_terminated_length": 577.1476440429688,
      "completions/min_length": 157.0,
      "completions/min_terminated_length": 157.0,
      "epoch": 3.7276967930029157,
      "grad_norm": 0.11745762825012207,
      "learning_rate": 1e-06,
      "loss": -0.0188,
      "num_tokens": 237587446.0,
      "reward": 0.7031250596046448,
      "reward_std": 0.12320679426193237,
      "rewards/verify_math_reward/mean": 0.703125,
      "rewards/verify_math_reward/std": 0.4571361541748047,
      "step": 399
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0424107142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2893.0,
      "completions/mean_length": 780.8047485351562,
      "completions/mean_terminated_length": 633.9778442382812,
      "completions/min_length": 128.0,
      "completions/min_terminated_length": 128.0,
      "epoch": 3.7370262390670552,
      "grad_norm": 0.15450018644332886,
      "learning_rate": 1e-06,
      "loss": -0.0074,
      "num_tokens": 238211383.0,
      "reward": 0.6495535969734192,
      "reward_std": 0.2320307046175003,
      "rewards/verify_math_reward/mean": 0.6495535969734192,
      "rewards/verify_math_reward/std": 0.477376252412796,
      "step": 400
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.052455357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3241.0,
      "completions/mean_length": 803.6082763671875,
      "completions/mean_terminated_length": 621.3439331054688,
      "completions/min_length": 182.0,
      "completions/min_terminated_length": 182.0,
      "epoch": 3.746355685131195,
      "grad_norm": 0.12850894033908844,
      "learning_rate": 1e-06,
      "loss": -0.0054,
      "num_tokens": 238820568.0,
      "reward": 0.6774553656578064,
      "reward_std": 0.16037212312221527,
      "rewards/verify_math_reward/mean": 0.6774553656578064,
      "rewards/verify_math_reward/std": 0.4677111804485321,
      "step": 401
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0345982142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2589.0,
      "completions/mean_length": 761.5379638671875,
      "completions/mean_terminated_length": 642.0369873046875,
      "completions/min_length": 125.0,
      "completions/min_terminated_length": 125.0,
      "epoch": 3.755685131195335,
      "grad_norm": 0.12745584547519684,
      "learning_rate": 1e-06,
      "loss": -0.0159,
      "num_tokens": 239461130.0,
      "reward": 0.6350446939468384,
      "reward_std": 0.17607979476451874,
      "rewards/verify_math_reward/mean": 0.6350446343421936,
      "rewards/verify_math_reward/std": 0.4816865026950836,
      "step": 402
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0558035714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3306.0,
      "completions/mean_length": 825.7422485351562,
      "completions/mean_terminated_length": 632.4645385742188,
      "completions/min_length": 110.0,
      "completions/min_terminated_length": 110.0,
      "epoch": 3.765014577259475,
      "grad_norm": 0.15667924284934998,
      "learning_rate": 1e-06,
      "loss": -0.0284,
      "num_tokens": 240076939.0,
      "reward": 0.6283482313156128,
      "reward_std": 0.21045538783073425,
      "rewards/verify_math_reward/mean": 0.6283482313156128,
      "rewards/verify_math_reward/std": 0.4835159480571747,
      "step": 403
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0758928571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2968.0,
      "completions/mean_length": 906.87841796875,
      "completions/mean_terminated_length": 644.9697875976562,
      "completions/min_length": 154.0,
      "completions/min_terminated_length": 154.0,
      "epoch": 3.774344023323615,
      "grad_norm": 0.13290569186210632,
      "learning_rate": 1e-06,
      "loss": -0.0356,
      "num_tokens": 240695558.0,
      "reward": 0.65625,
      "reward_std": 0.15063981711864471,
      "rewards/verify_math_reward/mean": 0.65625,
      "rewards/verify_math_reward/std": 0.4752241373062134,
      "step": 404
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0613839285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4044.0,
      "completions/mean_length": 851.2489013671875,
      "completions/mean_terminated_length": 639.0475463867188,
      "completions/min_length": 179.0,
      "completions/min_terminated_length": 179.0,
      "epoch": 3.783673469387755,
      "grad_norm": 0.1469256579875946,
      "learning_rate": 1e-06,
      "loss": -0.0167,
      "num_tokens": 241334309.0,
      "reward": 0.6149553656578064,
      "reward_std": 0.21905934810638428,
      "rewards/verify_math_reward/mean": 0.6149553656578064,
      "rewards/verify_math_reward/std": 0.4868776500225067,
      "step": 405
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0558035714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3180.0,
      "completions/mean_length": 818.7745971679688,
      "completions/mean_terminated_length": 625.0850830078125,
      "completions/min_length": 152.0,
      "completions/min_terminated_length": 152.0,
      "epoch": 3.793002915451895,
      "grad_norm": 0.13795356452465057,
      "learning_rate": 1e-06,
      "loss": -0.0188,
      "num_tokens": 241942995.0,
      "reward": 0.6026785969734192,
      "reward_std": 0.18986350297927856,
      "rewards/verify_math_reward/mean": 0.6026785969734192,
      "rewards/verify_math_reward/std": 0.48961687088012695,
      "step": 406
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0502232142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4073.0,
      "completions/mean_length": 825.0558471679688,
      "completions/mean_terminated_length": 652.0916748046875,
      "completions/min_length": 138.0,
      "completions/min_terminated_length": 138.0,
      "epoch": 3.8023323615160347,
      "grad_norm": 0.13587205111980438,
      "learning_rate": 1e-06,
      "loss": -0.0142,
      "num_tokens": 242579757.0,
      "reward": 0.6707589626312256,
      "reward_std": 0.1813715398311615,
      "rewards/verify_math_reward/mean": 0.6707589030265808,
      "rewards/verify_math_reward/std": 0.4702001214027405,
      "step": 407
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0513392857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3190.0,
      "completions/mean_length": 764.341552734375,
      "completions/mean_terminated_length": 584.0399780273438,
      "completions/min_length": 147.0,
      "completions/min_terminated_length": 147.0,
      "epoch": 3.811661807580175,
      "grad_norm": 0.16450262069702148,
      "learning_rate": 1e-06,
      "loss": -0.0204,
      "num_tokens": 243155263.0,
      "reward": 0.640625,
      "reward_std": 0.1904633492231369,
      "rewards/verify_math_reward/mean": 0.640625,
      "rewards/verify_math_reward/std": 0.48008525371551514,
      "step": 408
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0814732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3973.0,
      "completions/mean_length": 964.9732666015625,
      "completions/mean_terminated_length": 687.2515258789062,
      "completions/min_length": 132.0,
      "completions/min_terminated_length": 132.0,
      "epoch": 3.8209912536443147,
      "grad_norm": 0.12772898375988007,
      "learning_rate": 1e-06,
      "loss": -0.0336,
      "num_tokens": 243807335.0,
      "reward": 0.5390625,
      "reward_std": 0.18554674088954926,
      "rewards/verify_math_reward/mean": 0.5390625,
      "rewards/verify_math_reward/std": 0.4987502098083496,
      "step": 409
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0703125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2786.0,
      "completions/mean_length": 890.90966796875,
      "completions/mean_terminated_length": 648.5078125,
      "completions/min_length": 121.0,
      "completions/min_terminated_length": 121.0,
      "epoch": 3.8303206997084547,
      "grad_norm": 0.1236671730875969,
      "learning_rate": 1e-06,
      "loss": -0.0305,
      "num_tokens": 244439062.0,
      "reward": 0.5803571939468384,
      "reward_std": 0.1407584697008133,
      "rewards/verify_math_reward/mean": 0.5803571343421936,
      "rewards/verify_math_reward/std": 0.4937761425971985,
      "step": 410
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0636160714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2679.0,
      "completions/mean_length": 824.6395263671875,
      "completions/mean_terminated_length": 602.3897705078125,
      "completions/min_length": 158.0,
      "completions/min_terminated_length": 158.0,
      "epoch": 3.8396501457725947,
      "grad_norm": 0.12373624742031097,
      "learning_rate": 1e-06,
      "loss": -0.0081,
      "num_tokens": 245029283.0,
      "reward": 0.6305803656578064,
      "reward_std": 0.149286150932312,
      "rewards/verify_math_reward/mean": 0.6305803656578064,
      "rewards/verify_math_reward/std": 0.4829172194004059,
      "step": 411
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0558035714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3963.0,
      "completions/mean_length": 812.9129638671875,
      "completions/mean_terminated_length": 618.8770751953125,
      "completions/min_length": 139.0,
      "completions/min_terminated_length": 139.0,
      "epoch": 3.8489795918367347,
      "grad_norm": 0.12474401295185089,
      "learning_rate": 1e-06,
      "loss": -0.0275,
      "num_tokens": 245637429.0,
      "reward": 0.637276828289032,
      "reward_std": 0.1511622667312622,
      "rewards/verify_math_reward/mean": 0.6372767686843872,
      "rewards/verify_math_reward/std": 0.481054425239563,
      "step": 412
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0546875,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3725.0,
      "completions/mean_length": 758.2020263671875,
      "completions/mean_terminated_length": 565.1062622070312,
      "completions/min_length": 164.0,
      "completions/min_terminated_length": 164.0,
      "epoch": 3.8583090379008746,
      "grad_norm": 0.1259421408176422,
      "learning_rate": 1e-06,
      "loss": -0.0023,
      "num_tokens": 246195354.0,
      "reward": 0.7120535969734192,
      "reward_std": 0.16214017570018768,
      "rewards/verify_math_reward/mean": 0.7120535969734192,
      "rewards/verify_math_reward/std": 0.4530588984489441,
      "step": 413
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0424107142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3854.0,
      "completions/mean_length": 782.7678833007812,
      "completions/mean_terminated_length": 636.0280151367188,
      "completions/min_length": 159.0,
      "completions/min_terminated_length": 159.0,
      "epoch": 3.8676384839650146,
      "grad_norm": 0.1368919163942337,
      "learning_rate": 1e-06,
      "loss": -0.0193,
      "num_tokens": 246836738.0,
      "reward": 0.6328125,
      "reward_std": 0.1842258721590042,
      "rewards/verify_math_reward/mean": 0.6328125,
      "rewards/verify_math_reward/std": 0.48230743408203125,
      "step": 414
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0703125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3937.0,
      "completions/mean_length": 812.1361694335938,
      "completions/mean_terminated_length": 563.7766723632812,
      "completions/min_length": 126.0,
      "completions/min_terminated_length": 126.0,
      "epoch": 3.8769679300291546,
      "grad_norm": 0.1541055142879486,
      "learning_rate": 1e-06,
      "loss": -0.0267,
      "num_tokens": 247390324.0,
      "reward": 0.6707589626312256,
      "reward_std": 0.1791156381368637,
      "rewards/verify_math_reward/mean": 0.6707589030265808,
      "rewards/verify_math_reward/std": 0.4702001214027405,
      "step": 415
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1004464285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4034.0,
      "completions/mean_length": 1001.8270263671875,
      "completions/mean_terminated_length": 656.3237915039062,
      "completions/min_length": 194.0,
      "completions/min_terminated_length": 194.0,
      "epoch": 3.8862973760932946,
      "grad_norm": 0.14875942468643188,
      "learning_rate": 1e-06,
      "loss": -0.033,
      "num_tokens": 248015425.0,
      "reward": 0.5658482313156128,
      "reward_std": 0.18051347136497498,
      "rewards/verify_math_reward/mean": 0.5658482313156128,
      "rewards/verify_math_reward/std": 0.49592188000679016,
      "step": 416
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0926339285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3460.0,
      "completions/mean_length": 947.64404296875,
      "completions/mean_terminated_length": 626.22509765625,
      "completions/min_length": 148.0,
      "completions/min_terminated_length": 148.0,
      "epoch": 3.8956268221574346,
      "grad_norm": 0.15762212872505188,
      "learning_rate": 1e-06,
      "loss": -0.0303,
      "num_tokens": 248617754.0,
      "reward": 0.6116071939468384,
      "reward_std": 0.19588413834571838,
      "rewards/verify_math_reward/mean": 0.6116071343421936,
      "rewards/verify_math_reward/std": 0.48765692114830017,
      "step": 417
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0513392857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3648.0,
      "completions/mean_length": 772.6361694335938,
      "completions/mean_terminated_length": 592.7835083007812,
      "completions/min_length": 136.0,
      "completions/min_terminated_length": 136.0,
      "epoch": 3.904956268221574,
      "grad_norm": 0.14572742581367493,
      "learning_rate": 1e-06,
      "loss": -0.0137,
      "num_tokens": 249213132.0,
      "reward": 0.6383928656578064,
      "reward_std": 0.1770893931388855,
      "rewards/verify_math_reward/mean": 0.6383928656578064,
      "rewards/verify_math_reward/std": 0.4807341694831848,
      "step": 418
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0881696428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3839.0,
      "completions/mean_length": 958.3995971679688,
      "completions/mean_terminated_length": 655.0086059570312,
      "completions/min_length": 128.0,
      "completions/min_terminated_length": 128.0,
      "epoch": 3.914285714285714,
      "grad_norm": 0.14375333487987518,
      "learning_rate": 1e-06,
      "loss": -0.043,
      "num_tokens": 249833482.0,
      "reward": 0.6116071939468384,
      "reward_std": 0.1910271793603897,
      "rewards/verify_math_reward/mean": 0.6116071343421936,
      "rewards/verify_math_reward/std": 0.4876568913459778,
      "step": 419
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0680803571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2972.0,
      "completions/mean_length": 814.8326416015625,
      "completions/mean_terminated_length": 575.1305541992188,
      "completions/min_length": 149.0,
      "completions/min_terminated_length": 149.0,
      "epoch": 3.923615160349854,
      "grad_norm": 0.1448078751564026,
      "learning_rate": 1e-06,
      "loss": -0.0308,
      "num_tokens": 250401628.0,
      "reward": 0.6462053656578064,
      "reward_std": 0.1954641044139862,
      "rewards/verify_math_reward/mean": 0.6462053656578064,
      "rewards/verify_math_reward/std": 0.478413462638855,
      "step": 420
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0814732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3747.0,
      "completions/mean_length": 934.40966796875,
      "completions/mean_terminated_length": 653.9769287109375,
      "completions/min_length": 135.0,
      "completions/min_terminated_length": 135.0,
      "epoch": 3.932944606413994,
      "grad_norm": 0.1312483698129654,
      "learning_rate": 1e-06,
      "loss": -0.0277,
      "num_tokens": 251033115.0,
      "reward": 0.5959821939468384,
      "reward_std": 0.17070813477039337,
      "rewards/verify_math_reward/mean": 0.5959821343421936,
      "rewards/verify_math_reward/std": 0.490975022315979,
      "step": 421
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0904017857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2844.0,
      "completions/mean_length": 938.62841796875,
      "completions/mean_terminated_length": 624.8282470703125,
      "completions/min_length": 121.0,
      "completions/min_terminated_length": 121.0,
      "epoch": 3.942274052478134,
      "grad_norm": 0.13763560354709625,
      "learning_rate": 1e-06,
      "loss": -0.024,
      "num_tokens": 251628358.0,
      "reward": 0.598214328289032,
      "reward_std": 0.17468014359474182,
      "rewards/verify_math_reward/mean": 0.5982142686843872,
      "rewards/verify_math_reward/std": 0.49053290486335754,
      "step": 422
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0703125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3104.0,
      "completions/mean_length": 877.888427734375,
      "completions/mean_terminated_length": 634.5017700195312,
      "completions/min_length": 155.0,
      "completions/min_terminated_length": 155.0,
      "epoch": 3.951603498542274,
      "grad_norm": 0.1306796371936798,
      "learning_rate": 1e-06,
      "loss": -0.0224,
      "num_tokens": 252248058.0,
      "reward": 0.590401828289032,
      "reward_std": 0.1441427320241928,
      "rewards/verify_math_reward/mean": 0.5904017686843872,
      "rewards/verify_math_reward/std": 0.49203425645828247,
      "step": 423
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0613839285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3405.0,
      "completions/mean_length": 783.1819458007812,
      "completions/mean_terminated_length": 566.5291137695312,
      "completions/min_length": 180.0,
      "completions/min_terminated_length": 180.0,
      "epoch": 3.960932944606414,
      "grad_norm": 0.14820237457752228,
      "learning_rate": 1e-06,
      "loss": -0.0039,
      "num_tokens": 252807021.0,
      "reward": 0.6752232313156128,
      "reward_std": 0.17731650173664093,
      "rewards/verify_math_reward/mean": 0.6752232313156128,
      "rewards/verify_math_reward/std": 0.46855294704437256,
      "step": 424
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0479910714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2686.0,
      "completions/mean_length": 813.3392944335938,
      "completions/mean_terminated_length": 647.8593139648438,
      "completions/min_length": 153.0,
      "completions/min_terminated_length": 153.0,
      "epoch": 3.970262390670554,
      "grad_norm": 0.1265283226966858,
      "learning_rate": 1e-06,
      "loss": -0.0025,
      "num_tokens": 253448237.0,
      "reward": 0.6383928656578064,
      "reward_std": 0.14789676666259766,
      "rewards/verify_math_reward/mean": 0.6383928656578064,
      "rewards/verify_math_reward/std": 0.4807341694831848,
      "step": 425
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0747767857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3163.0,
      "completions/mean_length": 817.4107666015625,
      "completions/mean_terminated_length": 552.4342651367188,
      "completions/min_length": 165.0,
      "completions/min_terminated_length": 165.0,
      "epoch": 3.979591836734694,
      "grad_norm": 0.15848904848098755,
      "learning_rate": 1e-06,
      "loss": -0.0176,
      "num_tokens": 253996485.0,
      "reward": 0.65625,
      "reward_std": 0.1633433997631073,
      "rewards/verify_math_reward/mean": 0.65625,
      "rewards/verify_math_reward/std": 0.4752241373062134,
      "step": 426
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0926339285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3300.0,
      "completions/mean_length": 928.4308471679688,
      "completions/mean_terminated_length": 605.0504150390625,
      "completions/min_length": 152.0,
      "completions/min_terminated_length": 152.0,
      "epoch": 3.9889212827988336,
      "grad_norm": 0.14289753139019012,
      "learning_rate": 1e-06,
      "loss": -0.0418,
      "num_tokens": 254578759.0,
      "reward": 0.6584821939468384,
      "reward_std": 0.18231727182865143,
      "rewards/verify_math_reward/mean": 0.6584821343421936,
      "rewards/verify_math_reward/std": 0.4744836091995239,
      "step": 427
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.03693181818181823,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3822.0,
      "completions/mean_length": 729.0454711914062,
      "completions/mean_terminated_length": 599.92919921875,
      "completions/min_length": 151.0,
      "completions/min_terminated_length": 151.0,
      "epoch": 3.9982507288629736,
      "grad_norm": 0.14556092023849487,
      "learning_rate": 1e-06,
      "loss": -0.0337,
      "num_tokens": 255159623.0,
      "reward": 0.6674107313156128,
      "reward_std": 0.18644899129867554,
      "rewards/verify_math_reward/mean": 0.6674107313156128,
      "rewards/verify_math_reward/std": 0.47140392661094666,
      "step": 428
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0792410714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2074.0,
      "completions/mean_length": 853.6361694335938,
      "completions/mean_terminated_length": 574.5963745117188,
      "completions/min_length": 135.0,
      "completions/min_terminated_length": 135.0,
      "epoch": 4.0093294460641395,
      "grad_norm": 0.15588702261447906,
      "learning_rate": 1e-06,
      "loss": -0.0277,
      "num_tokens": 255727761.0,
      "reward": 0.6517857313156128,
      "reward_std": 0.17585016787052155,
      "rewards/verify_math_reward/mean": 0.6517857313156128,
      "rewards/verify_math_reward/std": 0.47667041420936584,
      "step": 429
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0479910714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2577.0,
      "completions/mean_length": 804.4207763671875,
      "completions/mean_terminated_length": 638.4912109375,
      "completions/min_length": 156.0,
      "completions/min_terminated_length": 156.0,
      "epoch": 4.01865889212828,
      "grad_norm": 0.1348199099302292,
      "learning_rate": 1e-06,
      "loss": -0.0125,
      "num_tokens": 256357130.0,
      "reward": 0.609375,
      "reward_std": 0.16781283915042877,
      "rewards/verify_math_reward/mean": 0.609375,
      "rewards/verify_math_reward/std": 0.48816296458244324,
      "step": 430
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0636160714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3073.0,
      "completions/mean_length": 843.4207763671875,
      "completions/mean_terminated_length": 622.4469604492188,
      "completions/min_length": 139.0,
      "completions/min_terminated_length": 139.0,
      "epoch": 4.0279883381924195,
      "grad_norm": 0.14399297535419464,
      "learning_rate": 1e-06,
      "loss": -0.0185,
      "num_tokens": 256957755.0,
      "reward": 0.6383928656578064,
      "reward_std": 0.17336954176425934,
      "rewards/verify_math_reward/mean": 0.6383928656578064,
      "rewards/verify_math_reward/std": 0.4807341992855072,
      "step": 431
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0591517857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3801.0,
      "completions/mean_length": 859.8326416015625,
      "completions/mean_terminated_length": 656.3724975585938,
      "completions/min_length": 150.0,
      "completions/min_terminated_length": 150.0,
      "epoch": 4.03731778425656,
      "grad_norm": 0.1406911313533783,
      "learning_rate": 1e-06,
      "loss": -0.0198,
      "num_tokens": 257597061.0,
      "reward": 0.6506696939468384,
      "reward_std": 0.20203858613967896,
      "rewards/verify_math_reward/mean": 0.6506696343421936,
      "rewards/verify_math_reward/std": 0.47702476382255554,
      "step": 432
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.060267857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3694.0,
      "completions/mean_length": 870.8538208007812,
      "completions/mean_terminated_length": 664.0154418945312,
      "completions/min_length": 145.0,
      "completions/min_terminated_length": 145.0,
      "epoch": 4.0466472303206995,
      "grad_norm": 0.12592561542987823,
      "learning_rate": 1e-06,
      "loss": -0.015,
      "num_tokens": 258241218.0,
      "reward": 0.6049107313156128,
      "reward_std": 0.16330133378505707,
      "rewards/verify_math_reward/mean": 0.6049107313156128,
      "rewards/verify_math_reward/std": 0.48914292454719543,
      "step": 433
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0959821428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4062.0,
      "completions/mean_length": 970.9531860351562,
      "completions/mean_terminated_length": 639.1580810546875,
      "completions/min_length": 162.0,
      "completions/min_terminated_length": 162.0,
      "epoch": 4.05597667638484,
      "grad_norm": 0.1565476655960083,
      "learning_rate": 1e-06,
      "loss": -0.0586,
      "num_tokens": 258858808.0,
      "reward": 0.5926339626312256,
      "reward_std": 0.24100124835968018,
      "rewards/verify_math_reward/mean": 0.5926339030265808,
      "rewards/verify_math_reward/std": 0.49161845445632935,
      "step": 434
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.052455357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3065.0,
      "completions/mean_length": 739.1752319335938,
      "completions/mean_terminated_length": 553.3439331054688,
      "completions/min_length": 135.0,
      "completions/min_terminated_length": 135.0,
      "epoch": 4.0653061224489795,
      "grad_norm": 0.15354785323143005,
      "learning_rate": 1e-06,
      "loss": -0.0075,
      "num_tokens": 259412597.0,
      "reward": 0.6674107313156128,
      "reward_std": 0.16600088775157928,
      "rewards/verify_math_reward/mean": 0.6674107313156128,
      "rewards/verify_math_reward/std": 0.47140392661094666,
      "step": 435
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0803571428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3878.0,
      "completions/mean_length": 891.4464721679688,
      "completions/mean_terminated_length": 611.4368896484375,
      "completions/min_length": 151.0,
      "completions/min_terminated_length": 151.0,
      "epoch": 4.07463556851312,
      "grad_norm": 0.13570523262023926,
      "learning_rate": 1e-06,
      "loss": -0.0166,
      "num_tokens": 260006725.0,
      "reward": 0.6584821939468384,
      "reward_std": 0.17048059403896332,
      "rewards/verify_math_reward/mean": 0.6584821343421936,
      "rewards/verify_math_reward/std": 0.4744836091995239,
      "step": 436
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0859375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3185.0,
      "completions/mean_length": 911.8248291015625,
      "completions/mean_terminated_length": 612.4578857421875,
      "completions/min_length": 156.0,
      "completions/min_terminated_length": 156.0,
      "epoch": 4.0839650145772595,
      "grad_norm": 0.1456775665283203,
      "learning_rate": 1e-06,
      "loss": -0.0405,
      "num_tokens": 260597984.0,
      "reward": 0.5703125,
      "reward_std": 0.1639414280653,
      "rewards/verify_math_reward/mean": 0.5703125,
      "rewards/verify_math_reward/std": 0.49530795216560364,
      "step": 437
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0803571428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3499.0,
      "completions/mean_length": 956.8460083007812,
      "completions/mean_terminated_length": 682.5509643554688,
      "completions/min_length": 172.0,
      "completions/min_terminated_length": 172.0,
      "epoch": 4.093294460641399,
      "grad_norm": 0.1299942433834076,
      "learning_rate": 1e-06,
      "loss": -0.0383,
      "num_tokens": 261241614.0,
      "reward": 0.5881696939468384,
      "reward_std": 0.18749207258224487,
      "rewards/verify_math_reward/mean": 0.5881696343421936,
      "rewards/verify_math_reward/std": 0.4924396276473999,
      "step": 438
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0814732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3715.0,
      "completions/mean_length": 933.3694458007812,
      "completions/mean_terminated_length": 652.844482421875,
      "completions/min_length": 91.0,
      "completions/min_terminated_length": 91.0,
      "epoch": 4.1026239067055394,
      "grad_norm": 0.143808975815773,
      "learning_rate": 1e-06,
      "loss": -0.0022,
      "num_tokens": 261869505.0,
      "reward": 0.5758928656578064,
      "reward_std": 0.17822733521461487,
      "rewards/verify_math_reward/mean": 0.5758928656578064,
      "rewards/verify_math_reward/std": 0.49448272585868835,
      "step": 439
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0558035714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2522.0,
      "completions/mean_length": 869.5736694335938,
      "completions/mean_terminated_length": 678.8865356445312,
      "completions/min_length": 185.0,
      "completions/min_terminated_length": 185.0,
      "epoch": 4.111953352769679,
      "grad_norm": 0.12110484391450882,
      "learning_rate": 1e-06,
      "loss": -0.0054,
      "num_tokens": 262525931.0,
      "reward": 0.5580357313156128,
      "reward_std": 0.17404848337173462,
      "rewards/verify_math_reward/mean": 0.5580357313156128,
      "rewards/verify_math_reward/std": 0.49689778685569763,
      "step": 440
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0669642857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3422.0,
      "completions/mean_length": 889.1685791015625,
      "completions/mean_terminated_length": 659.0131225585938,
      "completions/min_length": 160.0,
      "completions/min_terminated_length": 160.0,
      "epoch": 4.121282798833819,
      "grad_norm": 0.12871553003787994,
      "learning_rate": 1e-06,
      "loss": -0.0113,
      "num_tokens": 263160586.0,
      "reward": 0.6383928656578064,
      "reward_std": 0.17175164818763733,
      "rewards/verify_math_reward/mean": 0.6383928656578064,
      "rewards/verify_math_reward/std": 0.4807341694831848,
      "step": 441
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0613839285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3879.0,
      "completions/mean_length": 875.021240234375,
      "completions/mean_terminated_length": 664.3745727539062,
      "completions/min_length": 146.0,
      "completions/min_terminated_length": 146.0,
      "epoch": 4.130612244897959,
      "grad_norm": 0.14722591638565063,
      "learning_rate": 1e-06,
      "loss": -0.0157,
      "num_tokens": 263809621.0,
      "reward": 0.6071428656578064,
      "reward_std": 0.18385820090770721,
      "rewards/verify_math_reward/mean": 0.6071428656578064,
      "rewards/verify_math_reward/std": 0.48865827918052673,
      "step": 442
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0725446428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3830.0,
      "completions/mean_length": 875.8939819335938,
      "completions/mean_terminated_length": 624.0204467773438,
      "completions/min_length": 135.0,
      "completions/min_terminated_length": 135.0,
      "epoch": 4.139941690962099,
      "grad_norm": 0.12880070507526398,
      "learning_rate": 1e-06,
      "loss": -0.0562,
      "num_tokens": 264426542.0,
      "reward": 0.6015625,
      "reward_std": 0.1623249500989914,
      "rewards/verify_math_reward/mean": 0.6015625,
      "rewards/verify_math_reward/std": 0.48984986543655396,
      "step": 443
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0714285714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2907.0,
      "completions/mean_length": 849.6964721679688,
      "completions/mean_terminated_length": 599.9807739257812,
      "completions/min_length": 138.0,
      "completions/min_terminated_length": 138.0,
      "epoch": 4.149271137026239,
      "grad_norm": 0.1354241520166397,
      "learning_rate": 1e-06,
      "loss": -0.0183,
      "num_tokens": 265022830.0,
      "reward": 0.645089328289032,
      "reward_std": 0.15887397527694702,
      "rewards/verify_math_reward/mean": 0.6450892686843872,
      "rewards/verify_math_reward/std": 0.4787535071372986,
      "step": 444
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0747767857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3499.0,
      "completions/mean_length": 862.200927734375,
      "completions/mean_terminated_length": 600.8444213867188,
      "completions/min_length": 159.0,
      "completions/min_terminated_length": 159.0,
      "epoch": 4.158600583090379,
      "grad_norm": 0.14667780697345734,
      "learning_rate": 1e-06,
      "loss": -0.0144,
      "num_tokens": 265611938.0,
      "reward": 0.6662946939468384,
      "reward_std": 0.17171911895275116,
      "rewards/verify_math_reward/mean": 0.6662946343421936,
      "rewards/verify_math_reward/std": 0.47179925441741943,
      "step": 445
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.056919642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3973.0,
      "completions/mean_length": 830.2857666015625,
      "completions/mean_terminated_length": 633.1834106445312,
      "completions/min_length": 166.0,
      "completions/min_terminated_length": 166.0,
      "epoch": 4.167930029154519,
      "grad_norm": 0.1396162509918213,
      "learning_rate": 1e-06,
      "loss": -0.0254,
      "num_tokens": 266233002.0,
      "reward": 0.6741071939468384,
      "reward_std": 0.18539658188819885,
      "rewards/verify_math_reward/mean": 0.6741071343421936,
      "rewards/verify_math_reward/std": 0.4689692556858063,
      "step": 446
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0870535714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3792.0,
      "completions/mean_length": 939.3381958007812,
      "completions/mean_terminated_length": 638.3362426757812,
      "completions/min_length": 176.0,
      "completions/min_terminated_length": 176.0,
      "epoch": 4.1772594752186585,
      "grad_norm": 0.13883663713932037,
      "learning_rate": 1e-06,
      "loss": -0.021,
      "num_tokens": 266845569.0,
      "reward": 0.6194196939468384,
      "reward_std": 0.1640167087316513,
      "rewards/verify_math_reward/mean": 0.6194196343421936,
      "rewards/verify_math_reward/std": 0.48580074310302734,
      "step": 447
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0669642857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2600.0,
      "completions/mean_length": 847.0703735351562,
      "completions/mean_terminated_length": 613.8934936523438,
      "completions/min_length": 147.0,
      "completions/min_terminated_length": 147.0,
      "epoch": 4.186588921282799,
      "grad_norm": 0.1400126814842224,
      "learning_rate": 1e-06,
      "loss": -0.0264,
      "num_tokens": 267454888.0,
      "reward": 0.6383928656578064,
      "reward_std": 0.17600379884243011,
      "rewards/verify_math_reward/mean": 0.6383928656578064,
      "rewards/verify_math_reward/std": 0.4807341694831848,
      "step": 448
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0825892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3421.0,
      "completions/mean_length": 908.8516235351562,
      "completions/mean_terminated_length": 621.9306640625,
      "completions/min_length": 143.0,
      "completions/min_terminated_length": 143.0,
      "epoch": 4.1959183673469385,
      "grad_norm": 0.13154536485671997,
      "learning_rate": 1e-06,
      "loss": -0.0478,
      "num_tokens": 268055235.0,
      "reward": 0.6205357313156128,
      "reward_std": 0.17299722135066986,
      "rewards/verify_math_reward/mean": 0.6205357313156128,
      "rewards/verify_math_reward/std": 0.4855247139930725,
      "step": 449
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0770089285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3038.0,
      "completions/mean_length": 870.099365234375,
      "completions/mean_terminated_length": 600.94921875,
      "completions/min_length": 100.0,
      "completions/min_terminated_length": 100.0,
      "epoch": 4.205247813411079,
      "grad_norm": 0.15522713959217072,
      "learning_rate": 1e-06,
      "loss": -0.0086,
      "num_tokens": 268640556.0,
      "reward": 0.6428571939468384,
      "reward_std": 0.18170854449272156,
      "rewards/verify_math_reward/mean": 0.6428571343421936,
      "rewards/verify_math_reward/std": 0.4794250428676605,
      "step": 450
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.046875,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3886.0,
      "completions/mean_length": 818.6127319335938,
      "completions/mean_terminated_length": 657.4296875,
      "completions/min_length": 151.0,
      "completions/min_terminated_length": 151.0,
      "epoch": 4.214577259475218,
      "grad_norm": 0.16115118563175201,
      "learning_rate": 1e-06,
      "loss": -0.0335,
      "num_tokens": 269293521.0,
      "reward": 0.6662946939468384,
      "reward_std": 0.17862850427627563,
      "rewards/verify_math_reward/mean": 0.6662946343421936,
      "rewards/verify_math_reward/std": 0.47179925441741943,
      "step": 451
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0658482142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3746.0,
      "completions/mean_length": 829.3225708007812,
      "completions/mean_terminated_length": 599.054931640625,
      "completions/min_length": 140.0,
      "completions/min_terminated_length": 140.0,
      "epoch": 4.223906705539359,
      "grad_norm": 0.126956507563591,
      "learning_rate": 1e-06,
      "loss": -0.0266,
      "num_tokens": 269881898.0,
      "reward": 0.613839328289032,
      "reward_std": 0.1487216353416443,
      "rewards/verify_math_reward/mean": 0.6138392686843872,
      "rewards/verify_math_reward/std": 0.48714008927345276,
      "step": 452
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0669642857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3857.0,
      "completions/mean_length": 916.2991333007812,
      "completions/mean_terminated_length": 688.0908813476562,
      "completions/min_length": 166.0,
      "completions/min_terminated_length": 166.0,
      "epoch": 4.233236151603498,
      "grad_norm": 0.15128618478775024,
      "learning_rate": 1e-06,
      "loss": -0.0144,
      "num_tokens": 270551022.0,
      "reward": 0.5915178656578064,
      "reward_std": 0.2105737328529358,
      "rewards/verify_math_reward/mean": 0.5915178656578064,
      "rewards/verify_math_reward/std": 0.49182769656181335,
      "step": 453
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0647321428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2793.0,
      "completions/mean_length": 851.7913208007812,
      "completions/mean_terminated_length": 627.2518310546875,
      "completions/min_length": 155.0,
      "completions/min_terminated_length": 155.0,
      "epoch": 4.242565597667639,
      "grad_norm": 0.1338435858488083,
      "learning_rate": 1e-06,
      "loss": -0.0384,
      "num_tokens": 271173427.0,
      "reward": 0.6462053656578064,
      "reward_std": 0.18414919078350067,
      "rewards/verify_math_reward/mean": 0.6462053656578064,
      "rewards/verify_math_reward/std": 0.478413462638855,
      "step": 454
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3159.0,
      "completions/mean_length": 858.3795166015625,
      "completions/mean_terminated_length": 642.5381469726562,
      "completions/min_length": 115.0,
      "completions/min_terminated_length": 115.0,
      "epoch": 4.251895043731778,
      "grad_norm": 0.14865481853485107,
      "learning_rate": 1e-06,
      "loss": -0.0194,
      "num_tokens": 271798143.0,
      "reward": 0.6618303656578064,
      "reward_std": 0.18746885657310486,
      "rewards/verify_math_reward/mean": 0.6618303656578064,
      "rewards/verify_math_reward/std": 0.4733508229255676,
      "step": 455
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0758928571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3802.0,
      "completions/mean_length": 878.0256958007812,
      "completions/mean_terminated_length": 613.74755859375,
      "completions/min_length": 146.0,
      "completions/min_terminated_length": 146.0,
      "epoch": 4.261224489795918,
      "grad_norm": 0.12724542617797852,
      "learning_rate": 1e-06,
      "loss": -0.0332,
      "num_tokens": 272391158.0,
      "reward": 0.6462053656578064,
      "reward_std": 0.1444704234600067,
      "rewards/verify_math_reward/mean": 0.6462053656578064,
      "rewards/verify_math_reward/std": 0.478413462638855,
      "step": 456
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0770089285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3373.0,
      "completions/mean_length": 897.0435791015625,
      "completions/mean_terminated_length": 630.1414794921875,
      "completions/min_length": 116.0,
      "completions/min_terminated_length": 116.0,
      "epoch": 4.270553935860058,
      "grad_norm": 0.13616903126239777,
      "learning_rate": 1e-06,
      "loss": -0.0444,
      "num_tokens": 272997917.0,
      "reward": 0.6104910969734192,
      "reward_std": 0.17732398211956024,
      "rewards/verify_math_reward/mean": 0.6104910969734192,
      "rewards/verify_math_reward/std": 0.48791128396987915,
      "step": 457
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0680803571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4093.0,
      "completions/mean_length": 897.5045166015625,
      "completions/mean_terminated_length": 663.8419189453125,
      "completions/min_length": 151.0,
      "completions/min_terminated_length": 151.0,
      "epoch": 4.279883381924198,
      "grad_norm": 0.16025783121585846,
      "learning_rate": 1e-06,
      "loss": 0.0052,
      "num_tokens": 273637897.0,
      "reward": 0.6026785969734192,
      "reward_std": 0.17649208009243011,
      "rewards/verify_math_reward/mean": 0.6026785969734192,
      "rewards/verify_math_reward/std": 0.48961687088012695,
      "step": 458
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.078125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4088.0,
      "completions/mean_length": 910.2344360351562,
      "completions/mean_terminated_length": 640.2542724609375,
      "completions/min_length": 141.0,
      "completions/min_terminated_length": 141.0,
      "epoch": 4.289212827988338,
      "grad_norm": 0.13533426821231842,
      "learning_rate": 1e-06,
      "loss": -0.0204,
      "num_tokens": 274255523.0,
      "reward": 0.6272321939468384,
      "reward_std": 0.15349668264389038,
      "rewards/verify_math_reward/mean": 0.6272321343421936,
      "rewards/verify_math_reward/std": 0.4838111698627472,
      "step": 459
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0837053571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3380.0,
      "completions/mean_length": 970.6027221679688,
      "completions/mean_terminated_length": 685.0913696289062,
      "completions/min_length": 153.0,
      "completions/min_terminated_length": 153.0,
      "epoch": 4.298542274052478,
      "grad_norm": 0.14569327235221863,
      "learning_rate": 1e-06,
      "loss": -0.0348,
      "num_tokens": 274918207.0,
      "reward": 0.5837053656578064,
      "reward_std": 0.20147305727005005,
      "rewards/verify_math_reward/mean": 0.5837053656578064,
      "rewards/verify_math_reward/std": 0.49321892857551575,
      "step": 460
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0680803571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3801.0,
      "completions/mean_length": 906.9654541015625,
      "completions/mean_terminated_length": 673.9940185546875,
      "completions/min_length": 130.0,
      "completions/min_terminated_length": 130.0,
      "epoch": 4.307871720116618,
      "grad_norm": 0.16724221408367157,
      "learning_rate": 1e-06,
      "loss": -0.0306,
      "num_tokens": 275572344.0,
      "reward": 0.5892857313156128,
      "reward_std": 0.20227426290512085,
      "rewards/verify_math_reward/mean": 0.5892857313156128,
      "rewards/verify_math_reward/std": 0.49223825335502625,
      "step": 461
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0691964285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3826.0,
      "completions/mean_length": 835.8058471679688,
      "completions/mean_terminated_length": 593.4412841796875,
      "completions/min_length": 135.0,
      "completions/min_terminated_length": 135.0,
      "epoch": 4.317201166180758,
      "grad_norm": 0.12606185674667358,
      "learning_rate": 1e-06,
      "loss": -0.0281,
      "num_tokens": 276161930.0,
      "reward": 0.6808035969734192,
      "reward_std": 0.12569020688533783,
      "rewards/verify_math_reward/mean": 0.6808035969734192,
      "rewards/verify_math_reward/std": 0.4664256274700165,
      "step": 462
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0870535714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3984.0,
      "completions/mean_length": 906.0625610351562,
      "completions/mean_terminated_length": 601.8875732421875,
      "completions/min_length": 104.0,
      "completions/min_terminated_length": 104.0,
      "epoch": 4.326530612244898,
      "grad_norm": 0.1581316590309143,
      "learning_rate": 1e-06,
      "loss": -0.0299,
      "num_tokens": 276744802.0,
      "reward": 0.5870535969734192,
      "reward_std": 0.17735928297042847,
      "rewards/verify_math_reward/mean": 0.5870535969734192,
      "rewards/verify_math_reward/std": 0.49263837933540344,
      "step": 463
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0658482142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3549.0,
      "completions/mean_length": 858.4408569335938,
      "completions/mean_terminated_length": 630.2257690429688,
      "completions/min_length": 150.0,
      "completions/min_terminated_length": 150.0,
      "epoch": 4.335860058309038,
      "grad_norm": 0.12013786286115646,
      "learning_rate": 1e-06,
      "loss": -0.0447,
      "num_tokens": 277359117.0,
      "reward": 0.6629464626312256,
      "reward_std": 0.15521912276744843,
      "rewards/verify_math_reward/mean": 0.6629464030265808,
      "rewards/verify_math_reward/std": 0.47296738624572754,
      "step": 464
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0558035714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3948.0,
      "completions/mean_length": 772.7299194335938,
      "completions/mean_terminated_length": 576.3191528320312,
      "completions/min_length": 156.0,
      "completions/min_terminated_length": 156.0,
      "epoch": 4.345189504373177,
      "grad_norm": 0.13454978168010712,
      "learning_rate": 1e-06,
      "loss": -0.0133,
      "num_tokens": 277933627.0,
      "reward": 0.6941964626312256,
      "reward_std": 0.1457599252462387,
      "rewards/verify_math_reward/mean": 0.6941964030265808,
      "rewards/verify_math_reward/std": 0.46100425720214844,
      "step": 465
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0613839285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3881.0,
      "completions/mean_length": 824.3705444335938,
      "completions/mean_terminated_length": 610.411376953125,
      "completions/min_length": 155.0,
      "completions/min_terminated_length": 155.0,
      "epoch": 4.354518950437318,
      "grad_norm": 0.13409213721752167,
      "learning_rate": 1e-06,
      "loss": -0.0177,
      "num_tokens": 278534631.0,
      "reward": 0.7042410969734192,
      "reward_std": 0.15631456673145294,
      "rewards/verify_math_reward/mean": 0.7042410969734192,
      "rewards/verify_math_reward/std": 0.45663803815841675,
      "step": 466
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0680803571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3801.0,
      "completions/mean_length": 825.2745971679688,
      "completions/mean_terminated_length": 586.3353271484375,
      "completions/min_length": 164.0,
      "completions/min_terminated_length": 164.0,
      "epoch": 4.363848396501457,
      "grad_norm": 0.11728842556476593,
      "learning_rate": 1e-06,
      "loss": -0.0349,
      "num_tokens": 279111685.0,
      "reward": 0.6272321939468384,
      "reward_std": 0.13947828114032745,
      "rewards/verify_math_reward/mean": 0.6272321343421936,
      "rewards/verify_math_reward/std": 0.4838111400604248,
      "step": 467
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0636160714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4033.0,
      "completions/mean_length": 842.5625610351562,
      "completions/mean_terminated_length": 621.5303955078125,
      "completions/min_length": 139.0,
      "completions/min_terminated_length": 139.0,
      "epoch": 4.373177842565598,
      "grad_norm": 0.12964990735054016,
      "learning_rate": 1e-06,
      "loss": -0.0382,
      "num_tokens": 279718773.0,
      "reward": 0.6584821939468384,
      "reward_std": 0.14628027379512787,
      "rewards/verify_math_reward/mean": 0.6584821343421936,
      "rewards/verify_math_reward/std": 0.4744836091995239,
      "step": 468
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0613839285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3740.0,
      "completions/mean_length": 862.4855346679688,
      "completions/mean_terminated_length": 651.01904296875,
      "completions/min_length": 128.0,
      "completions/min_terminated_length": 128.0,
      "epoch": 4.382507288629737,
      "grad_norm": 0.1491265892982483,
      "learning_rate": 1e-06,
      "loss": -0.033,
      "num_tokens": 280356200.0,
      "reward": 0.6049107313156128,
      "reward_std": 0.18385820090770721,
      "rewards/verify_math_reward/mean": 0.6049107313156128,
      "rewards/verify_math_reward/std": 0.48914292454719543,
      "step": 469
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0792410714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3143.0,
      "completions/mean_length": 939.2310791015625,
      "completions/mean_terminated_length": 667.5575561523438,
      "completions/min_length": 142.0,
      "completions/min_terminated_length": 142.0,
      "epoch": 4.391836734693878,
      "grad_norm": 0.15070927143096924,
      "learning_rate": 1e-06,
      "loss": -0.0101,
      "num_tokens": 281002951.0,
      "reward": 0.5625,
      "reward_std": 0.17544110119342804,
      "rewards/verify_math_reward/mean": 0.5625,
      "rewards/verify_math_reward/std": 0.49635544419288635,
      "step": 470
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0680803571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4080.0,
      "completions/mean_length": 888.2835083007812,
      "completions/mean_terminated_length": 653.9473266601562,
      "completions/min_length": 178.0,
      "completions/min_terminated_length": 178.0,
      "epoch": 4.401166180758017,
      "grad_norm": 0.13083180785179138,
      "learning_rate": 1e-06,
      "loss": -0.0109,
      "num_tokens": 281646629.0,
      "reward": 0.6116071939468384,
      "reward_std": 0.16506867110729218,
      "rewards/verify_math_reward/mean": 0.6116071343421936,
      "rewards/verify_math_reward/std": 0.48765692114830017,
      "step": 471
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0535714285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4055.0,
      "completions/mean_length": 801.9420166015625,
      "completions/mean_terminated_length": 615.48583984375,
      "completions/min_length": 152.0,
      "completions/min_terminated_length": 152.0,
      "epoch": 4.410495626822158,
      "grad_norm": 0.13732460141181946,
      "learning_rate": 1e-06,
      "loss": -0.0055,
      "num_tokens": 282260009.0,
      "reward": 0.6361607313156128,
      "reward_std": 0.16326673328876495,
      "rewards/verify_math_reward/mean": 0.6361607313156128,
      "rewards/verify_math_reward/std": 0.4813718795776367,
      "step": 472
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0881696428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2586.0,
      "completions/mean_length": 903.263427734375,
      "completions/mean_terminated_length": 594.541015625,
      "completions/min_length": 117.0,
      "completions/min_terminated_length": 117.0,
      "epoch": 4.419825072886297,
      "grad_norm": 0.16347962617874146,
      "learning_rate": 1e-06,
      "loss": -0.0228,
      "num_tokens": 282837573.0,
      "reward": 0.621651828289032,
      "reward_std": 0.20421750843524933,
      "rewards/verify_math_reward/mean": 0.6216517686843872,
      "rewards/verify_math_reward/std": 0.4852459728717804,
      "step": 473
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0636160714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3732.0,
      "completions/mean_length": 858.0502319335938,
      "completions/mean_terminated_length": 638.0703125,
      "completions/min_length": 164.0,
      "completions/min_terminated_length": 164.0,
      "epoch": 4.429154518950437,
      "grad_norm": 0.14548324048519135,
      "learning_rate": 1e-06,
      "loss": -0.0393,
      "num_tokens": 283463882.0,
      "reward": 0.6316964626312256,
      "reward_std": 0.19415000081062317,
      "rewards/verify_math_reward/mean": 0.6316964030265808,
      "rewards/verify_math_reward/std": 0.4826137125492096,
      "step": 474
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0948660714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3960.0,
      "completions/mean_length": 982.5480346679688,
      "completions/mean_terminated_length": 656.2305908203125,
      "completions/min_length": 124.0,
      "completions/min_terminated_length": 124.0,
      "epoch": 4.438483965014577,
      "grad_norm": 0.12172096967697144,
      "learning_rate": 1e-06,
      "loss": -0.0221,
      "num_tokens": 284087333.0,
      "reward": 0.5613839626312256,
      "reward_std": 0.1447400450706482,
      "rewards/verify_math_reward/mean": 0.5613839030265808,
      "rewards/verify_math_reward/std": 0.496494859457016,
      "step": 475
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0546875,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3045.0,
      "completions/mean_length": 793.3761596679688,
      "completions/mean_terminated_length": 602.315185546875,
      "completions/min_length": 160.0,
      "completions/min_terminated_length": 160.0,
      "epoch": 4.447813411078717,
      "grad_norm": 0.14543366432189941,
      "learning_rate": 1e-06,
      "loss": -0.0257,
      "num_tokens": 284686502.0,
      "reward": 0.6741071939468384,
      "reward_std": 0.16476556658744812,
      "rewards/verify_math_reward/mean": 0.6741071343421936,
      "rewards/verify_math_reward/std": 0.4689692258834839,
      "step": 476
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0647321428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2731.0,
      "completions/mean_length": 817.685302734375,
      "completions/mean_terminated_length": 590.7852172851562,
      "completions/min_length": 134.0,
      "completions/min_terminated_length": 134.0,
      "epoch": 4.457142857142857,
      "grad_norm": 0.15591642260551453,
      "learning_rate": 1e-06,
      "loss": -0.0229,
      "num_tokens": 285267420.0,
      "reward": 0.6662946939468384,
      "reward_std": 0.18889102339744568,
      "rewards/verify_math_reward/mean": 0.6662946343421936,
      "rewards/verify_math_reward/std": 0.47179925441741943,
      "step": 477
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0558035714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3399.0,
      "completions/mean_length": 852.4230346679688,
      "completions/mean_terminated_length": 660.7222290039062,
      "completions/min_length": 154.0,
      "completions/min_terminated_length": 154.0,
      "epoch": 4.466472303206997,
      "grad_norm": 0.13825039565563202,
      "learning_rate": 1e-06,
      "loss": -0.0378,
      "num_tokens": 285913887.0,
      "reward": 0.637276828289032,
      "reward_std": 0.18907366693019867,
      "rewards/verify_math_reward/mean": 0.6372767686843872,
      "rewards/verify_math_reward/std": 0.481054425239563,
      "step": 478
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0993303571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3001.0,
      "completions/mean_length": 954.0100708007812,
      "completions/mean_terminated_length": 607.4956665039062,
      "completions/min_length": 151.0,
      "completions/min_terminated_length": 151.0,
      "epoch": 4.475801749271137,
      "grad_norm": 0.1647224873304367,
      "learning_rate": 1e-06,
      "loss": -0.0647,
      "num_tokens": 286494920.0,
      "reward": 0.6618303656578064,
      "reward_std": 0.19554077088832855,
      "rewards/verify_math_reward/mean": 0.6618303656578064,
      "rewards/verify_math_reward/std": 0.4733508229255676,
      "step": 479
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0970982142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4022.0,
      "completions/mean_length": 981.6897583007812,
      "completions/mean_terminated_length": 646.7762451171875,
      "completions/min_length": 148.0,
      "completions/min_terminated_length": 148.0,
      "epoch": 4.485131195335277,
      "grad_norm": 0.13880734145641327,
      "learning_rate": 1e-06,
      "loss": -0.0264,
      "num_tokens": 287111986.0,
      "reward": 0.5848214626312256,
      "reward_std": 0.16450344026088715,
      "rewards/verify_math_reward/mean": 0.5848214030265808,
      "rewards/verify_math_reward/std": 0.49302801489830017,
      "step": 480
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0870535714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3929.0,
      "completions/mean_length": 876.583740234375,
      "completions/mean_terminated_length": 569.5978393554688,
      "completions/min_length": 141.0,
      "completions/min_terminated_length": 141.0,
      "epoch": 4.494460641399417,
      "grad_norm": 0.14553415775299072,
      "learning_rate": 1e-06,
      "loss": -0.0336,
      "num_tokens": 287664693.0,
      "reward": 0.6049107313156128,
      "reward_std": 0.16404810547828674,
      "rewards/verify_math_reward/mean": 0.6049107313156128,
      "rewards/verify_math_reward/std": 0.48914292454719543,
      "step": 481
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0948660714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3793.0,
      "completions/mean_length": 947.739990234375,
      "completions/mean_terminated_length": 617.7743530273438,
      "completions/min_length": 154.0,
      "completions/min_terminated_length": 154.0,
      "epoch": 4.503790087463557,
      "grad_norm": 0.13704046607017517,
      "learning_rate": 1e-06,
      "loss": -0.029,
      "num_tokens": 288257948.0,
      "reward": 0.6261160969734192,
      "reward_std": 0.15980690717697144,
      "rewards/verify_math_reward/mean": 0.6261160969734192,
      "rewards/verify_math_reward/std": 0.48410359025001526,
      "step": 482
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0758928571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2332.0,
      "completions/mean_length": 817.3672485351562,
      "completions/mean_terminated_length": 548.1074829101562,
      "completions/min_length": 151.0,
      "completions/min_terminated_length": 151.0,
      "epoch": 4.513119533527696,
      "grad_norm": 0.13461345434188843,
      "learning_rate": 1e-06,
      "loss": -0.0355,
      "num_tokens": 288791949.0,
      "reward": 0.6852678656578064,
      "reward_std": 0.14098599553108215,
      "rewards/verify_math_reward/mean": 0.6852678656578064,
      "rewards/verify_math_reward/std": 0.46466848254203796,
      "step": 483
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0647321428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3147.0,
      "completions/mean_length": 880.9676513671875,
      "completions/mean_terminated_length": 658.447509765625,
      "completions/min_length": 170.0,
      "completions/min_terminated_length": 170.0,
      "epoch": 4.522448979591837,
      "grad_norm": 0.13937199115753174,
      "learning_rate": 1e-06,
      "loss": -0.0133,
      "num_tokens": 289426024.0,
      "reward": 0.6049107313156128,
      "reward_std": 0.16619662940502167,
      "rewards/verify_math_reward/mean": 0.6049107313156128,
      "rewards/verify_math_reward/std": 0.48914292454719543,
      "step": 484
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0959821428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3191.0,
      "completions/mean_length": 985.5792846679688,
      "completions/mean_terminated_length": 655.3370361328125,
      "completions/min_length": 153.0,
      "completions/min_terminated_length": 153.0,
      "epoch": 4.531778425655976,
      "grad_norm": 0.14126333594322205,
      "learning_rate": 1e-06,
      "loss": -0.0281,
      "num_tokens": 290049727.0,
      "reward": 0.5647321939468384,
      "reward_std": 0.14748378098011017,
      "rewards/verify_math_reward/mean": 0.5647321343421936,
      "rewards/verify_math_reward/std": 0.49606895446777344,
      "step": 485
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0390625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2850.0,
      "completions/mean_length": 743.3895263671875,
      "completions/mean_terminated_length": 607.1044921875,
      "completions/min_length": 186.0,
      "completions/min_terminated_length": 186.0,
      "epoch": 4.541107871720117,
      "grad_norm": 0.13064506649971008,
      "learning_rate": 1e-06,
      "loss": 0.0008,
      "num_tokens": 290650796.0,
      "reward": 0.6752232313156128,
      "reward_std": 0.13805679976940155,
      "rewards/verify_math_reward/mean": 0.6752232313156128,
      "rewards/verify_math_reward/std": 0.46855294704437256,
      "step": 486
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0658482142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3197.0,
      "completions/mean_length": 880.872802734375,
      "completions/mean_terminated_length": 654.2389526367188,
      "completions/min_length": 173.0,
      "completions/min_terminated_length": 173.0,
      "epoch": 4.550437317784256,
      "grad_norm": 0.13167732954025269,
      "learning_rate": 1e-06,
      "loss": -0.037,
      "num_tokens": 291284402.0,
      "reward": 0.6495535969734192,
      "reward_std": 0.16728220880031586,
      "rewards/verify_math_reward/mean": 0.6495535969734192,
      "rewards/verify_math_reward/std": 0.477376252412796,
      "step": 487
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0658482142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3039.0,
      "completions/mean_length": 811.8381958007812,
      "completions/mean_terminated_length": 580.3380737304688,
      "completions/min_length": 134.0,
      "completions/min_terminated_length": 134.0,
      "epoch": 4.559766763848397,
      "grad_norm": 0.15319648385047913,
      "learning_rate": 1e-06,
      "loss": -0.0087,
      "num_tokens": 291856041.0,
      "reward": 0.7053571939468384,
      "reward_std": 0.17818161845207214,
      "rewards/verify_math_reward/mean": 0.7053571343421936,
      "rewards/verify_math_reward/std": 0.45613667368888855,
      "step": 488
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1104910714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4078.0,
      "completions/mean_length": 1012.21435546875,
      "completions/mean_terminated_length": 629.1593627929688,
      "completions/min_length": 81.0,
      "completions/min_terminated_length": 81.0,
      "epoch": 4.569096209912536,
      "grad_norm": 0.142557293176651,
      "learning_rate": 1e-06,
      "loss": -0.0411,
      "num_tokens": 292450921.0,
      "reward": 0.6171875,
      "reward_std": 0.17205290496349335,
      "rewards/verify_math_reward/mean": 0.6171875,
      "rewards/verify_math_reward/std": 0.4863446056842804,
      "step": 489
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0859375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2693.0,
      "completions/mean_length": 946.9230346679688,
      "completions/mean_terminated_length": 650.85595703125,
      "completions/min_length": 144.0,
      "completions/min_terminated_length": 144.0,
      "epoch": 4.578425655976677,
      "grad_norm": 0.14201919734477997,
      "learning_rate": 1e-06,
      "loss": -0.04,
      "num_tokens": 293081524.0,
      "reward": 0.5736607313156128,
      "reward_std": 0.19776137173175812,
      "rewards/verify_math_reward/mean": 0.5736607313156128,
      "rewards/verify_math_reward/std": 0.4948205351829529,
      "step": 490
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0580357142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2370.0,
      "completions/mean_length": 807.294677734375,
      "completions/mean_terminated_length": 604.6730346679688,
      "completions/min_length": 182.0,
      "completions/min_terminated_length": 182.0,
      "epoch": 4.587755102040816,
      "grad_norm": 0.13329870998859406,
      "learning_rate": 1e-06,
      "loss": -0.0107,
      "num_tokens": 293677708.0,
      "reward": 0.6729910969734192,
      "reward_std": 0.1462477743625641,
      "rewards/verify_math_reward/mean": 0.6729910969734192,
      "rewards/verify_math_reward/std": 0.46938255429267883,
      "step": 491
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0580357142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3301.0,
      "completions/mean_length": 820.8638916015625,
      "completions/mean_terminated_length": 619.0782470703125,
      "completions/min_length": 114.0,
      "completions/min_terminated_length": 114.0,
      "epoch": 4.597084548104956,
      "grad_norm": 0.1489163190126419,
      "learning_rate": 1e-06,
      "loss": -0.022,
      "num_tokens": 294288514.0,
      "reward": 0.6428571939468384,
      "reward_std": 0.19122150540351868,
      "rewards/verify_math_reward/mean": 0.6428571343421936,
      "rewards/verify_math_reward/std": 0.47942501306533813,
      "step": 492
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0837053571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2537.0,
      "completions/mean_length": 900.7120971679688,
      "completions/mean_terminated_length": 608.8161010742188,
      "completions/min_length": 158.0,
      "completions/min_terminated_length": 158.0,
      "epoch": 4.606413994169096,
      "grad_norm": 0.15610577166080475,
      "learning_rate": 1e-06,
      "loss": -0.0386,
      "num_tokens": 294874144.0,
      "reward": 0.582589328289032,
      "reward_std": 0.1680738478899002,
      "rewards/verify_math_reward/mean": 0.5825892686843872,
      "rewards/verify_math_reward/std": 0.493407279253006,
      "step": 493
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0658482142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4082.0,
      "completions/mean_length": 853.8560791015625,
      "completions/mean_terminated_length": 625.3178100585938,
      "completions/min_length": 143.0,
      "completions/min_terminated_length": 143.0,
      "epoch": 4.615743440233236,
      "grad_norm": 0.13953043520450592,
      "learning_rate": 1e-06,
      "loss": -0.0571,
      "num_tokens": 295476919.0,
      "reward": 0.6718750596046448,
      "reward_std": 0.18077494204044342,
      "rewards/verify_math_reward/mean": 0.671875,
      "rewards/verify_math_reward/std": 0.46979284286499023,
      "step": 494
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0892857142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4072.0,
      "completions/mean_length": 905.6261596679688,
      "completions/mean_terminated_length": 592.8443603515625,
      "completions/min_length": 134.0,
      "completions/min_terminated_length": 134.0,
      "epoch": 4.625072886297376,
      "grad_norm": 0.14395247399806976,
      "learning_rate": 1e-06,
      "loss": -0.0533,
      "num_tokens": 296046312.0,
      "reward": 0.6729910969734192,
      "reward_std": 0.17510268092155457,
      "rewards/verify_math_reward/mean": 0.6729910969734192,
      "rewards/verify_math_reward/std": 0.46938255429267883,
      "step": 495
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1037946428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2773.0,
      "completions/mean_length": 977.607177734375,
      "completions/mean_terminated_length": 616.4483032226562,
      "completions/min_length": 118.0,
      "completions/min_terminated_length": 118.0,
      "epoch": 4.634402332361516,
      "grad_norm": 0.1408366560935974,
      "learning_rate": 1e-06,
      "loss": -0.0168,
      "num_tokens": 296633080.0,
      "reward": 0.5948660969734192,
      "reward_std": 0.17592641711235046,
      "rewards/verify_math_reward/mean": 0.5948660969734192,
      "rewards/verify_math_reward/std": 0.49119213223457336,
      "step": 496
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0345982142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4043.0,
      "completions/mean_length": 715.794677734375,
      "completions/mean_terminated_length": 594.654296875,
      "completions/min_length": 150.0,
      "completions/min_terminated_length": 150.0,
      "epoch": 4.643731778425656,
      "grad_norm": 0.12047068029642105,
      "learning_rate": 1e-06,
      "loss": -0.0206,
      "num_tokens": 297230288.0,
      "reward": 0.6897321939468384,
      "reward_std": 0.1327543556690216,
      "rewards/verify_math_reward/mean": 0.6897321343421936,
      "rewards/verify_math_reward/std": 0.462861567735672,
      "step": 497
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.09375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3825.0,
      "completions/mean_length": 962.5569458007812,
      "completions/mean_terminated_length": 638.4076538085938,
      "completions/min_length": 110.0,
      "completions/min_terminated_length": 110.0,
      "epoch": 4.653061224489796,
      "grad_norm": 0.13730822503566742,
      "learning_rate": 1e-06,
      "loss": -0.0552,
      "num_tokens": 297838211.0,
      "reward": 0.6495535969734192,
      "reward_std": 0.1579635739326477,
      "rewards/verify_math_reward/mean": 0.6495535969734192,
      "rewards/verify_math_reward/std": 0.477376252412796,
      "step": 498
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0792410714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3474.0,
      "completions/mean_length": 886.5357666015625,
      "completions/mean_terminated_length": 610.3272705078125,
      "completions/min_length": 171.0,
      "completions/min_terminated_length": 171.0,
      "epoch": 4.662390670553936,
      "grad_norm": 0.15910038352012634,
      "learning_rate": 1e-06,
      "loss": -0.0318,
      "num_tokens": 298432763.0,
      "reward": 0.6785714626312256,
      "reward_std": 0.19554010033607483,
      "rewards/verify_math_reward/mean": 0.6785714030265808,
      "rewards/verify_math_reward/std": 0.46728572249412537,
      "step": 499
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0613839285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3054.0,
      "completions/mean_length": 845.0078735351562,
      "completions/mean_terminated_length": 632.3983154296875,
      "completions/min_length": 140.0,
      "completions/min_terminated_length": 140.0,
      "epoch": 4.671720116618076,
      "grad_norm": 0.14126543700695038,
      "learning_rate": 1e-06,
      "loss": -0.0309,
      "num_tokens": 299055754.0,
      "reward": 0.6205357313156128,
      "reward_std": 0.17494861781597137,
      "rewards/verify_math_reward/mean": 0.6205357313156128,
      "rewards/verify_math_reward/std": 0.4855247139930725,
      "step": 500
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0736607142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2202.0,
      "completions/mean_length": 889.982177734375,
      "completions/mean_terminated_length": 635.0457763671875,
      "completions/min_length": 178.0,
      "completions/min_terminated_length": 178.0,
      "epoch": 4.681049562682215,
      "grad_norm": 0.1432536244392395,
      "learning_rate": 1e-06,
      "loss": -0.0299,
      "num_tokens": 299678042.0,
      "reward": 0.6171875,
      "reward_std": 0.1544705480337143,
      "rewards/verify_math_reward/mean": 0.6171875,
      "rewards/verify_math_reward/std": 0.4863446056842804,
      "step": 501
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0814732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3824.0,
      "completions/mean_length": 939.3839721679688,
      "completions/mean_terminated_length": 659.3925170898438,
      "completions/min_length": 158.0,
      "completions/min_terminated_length": 158.0,
      "epoch": 4.690379008746356,
      "grad_norm": 0.14592066407203674,
      "learning_rate": 1e-06,
      "loss": -0.034,
      "num_tokens": 300319298.0,
      "reward": 0.5703125,
      "reward_std": 0.17999425530433655,
      "rewards/verify_math_reward/mean": 0.5703125,
      "rewards/verify_math_reward/std": 0.49530795216560364,
      "step": 502
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1004464285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3352.0,
      "completions/mean_length": 951.3292846679688,
      "completions/mean_terminated_length": 600.1873168945312,
      "completions/min_length": 123.0,
      "completions/min_terminated_length": 123.0,
      "epoch": 4.699708454810495,
      "grad_norm": 0.15302255749702454,
      "learning_rate": 1e-06,
      "loss": -0.048,
      "num_tokens": 300897329.0,
      "reward": 0.6037946939468384,
      "reward_std": 0.19294606149196625,
      "rewards/verify_math_reward/mean": 0.6037946343421936,
      "rewards/verify_math_reward/std": 0.48938122391700745,
      "step": 503
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0892857142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2738.0,
      "completions/mean_length": 968.7600708007812,
      "completions/mean_terminated_length": 662.1679077148438,
      "completions/min_length": 175.0,
      "completions/min_terminated_length": 175.0,
      "epoch": 4.709037900874636,
      "grad_norm": 0.13722142577171326,
      "learning_rate": 1e-06,
      "loss": -0.0316,
      "num_tokens": 301529250.0,
      "reward": 0.6261160969734192,
      "reward_std": 0.15699605643749237,
      "rewards/verify_math_reward/mean": 0.6261160969734192,
      "rewards/verify_math_reward/std": 0.48410359025001526,
      "step": 504
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0758928571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3700.0,
      "completions/mean_length": 903.2154541015625,
      "completions/mean_terminated_length": 641.0060424804688,
      "completions/min_length": 173.0,
      "completions/min_terminated_length": 173.0,
      "epoch": 4.718367346938775,
      "grad_norm": 0.1396508514881134,
      "learning_rate": 1e-06,
      "loss": -0.0216,
      "num_tokens": 302154771.0,
      "reward": 0.6004464626312256,
      "reward_std": 0.16686920821666718,
      "rewards/verify_math_reward/mean": 0.6004464030265808,
      "rewards/verify_math_reward/std": 0.49008017778396606,
      "step": 505
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0859375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2350.0,
      "completions/mean_length": 935.7332763671875,
      "completions/mean_terminated_length": 638.6141967773438,
      "completions/min_length": 158.0,
      "completions/min_terminated_length": 158.0,
      "epoch": 4.727696793002916,
      "grad_norm": 0.14874345064163208,
      "learning_rate": 1e-06,
      "loss": -0.0444,
      "num_tokens": 302767308.0,
      "reward": 0.5881696939468384,
      "reward_std": 0.15905873477458954,
      "rewards/verify_math_reward/mean": 0.5881696343421936,
      "rewards/verify_math_reward/std": 0.4924395978450775,
      "step": 506
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0636160714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3552.0,
      "completions/mean_length": 817.3080444335938,
      "completions/mean_terminated_length": 594.5601806640625,
      "completions/min_length": 157.0,
      "completions/min_terminated_length": 157.0,
      "epoch": 4.737026239067055,
      "grad_norm": 0.14757046103477478,
      "learning_rate": 1e-06,
      "loss": -0.0262,
      "num_tokens": 303364256.0,
      "reward": 0.6651785969734192,
      "reward_std": 0.1788567453622818,
      "rewards/verify_math_reward/mean": 0.6651785969734192,
      "rewards/verify_math_reward/std": 0.47219157218933105,
      "step": 507
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0915178571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4059.0,
      "completions/mean_length": 994.0245971679688,
      "completions/mean_terminated_length": 681.54052734375,
      "completions/min_length": 151.0,
      "completions/min_terminated_length": 151.0,
      "epoch": 4.746355685131196,
      "grad_norm": 0.13975664973258972,
      "learning_rate": 1e-06,
      "loss": -0.0403,
      "num_tokens": 304013742.0,
      "reward": 0.5602678656578064,
      "reward_std": 0.1728488951921463,
      "rewards/verify_math_reward/mean": 0.5602678656578064,
      "rewards/verify_math_reward/std": 0.4966317415237427,
      "step": 508
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0714285714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3845.0,
      "completions/mean_length": 857.404052734375,
      "completions/mean_terminated_length": 608.28125,
      "completions/min_length": 161.0,
      "completions/min_terminated_length": 161.0,
      "epoch": 4.755685131195335,
      "grad_norm": 0.14102959632873535,
      "learning_rate": 1e-06,
      "loss": 0.0005,
      "num_tokens": 304613456.0,
      "reward": 0.6584821939468384,
      "reward_std": 0.14094644784927368,
      "rewards/verify_math_reward/mean": 0.6584821343421936,
      "rewards/verify_math_reward/std": 0.4744836091995239,
      "step": 509
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0558035714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3494.0,
      "completions/mean_length": 815.8225708007812,
      "completions/mean_terminated_length": 621.9586181640625,
      "completions/min_length": 161.0,
      "completions/min_terminated_length": 161.0,
      "epoch": 4.765014577259475,
      "grad_norm": 0.12711749970912933,
      "learning_rate": 1e-06,
      "loss": -0.039,
      "num_tokens": 305224233.0,
      "reward": 0.723214328289032,
      "reward_std": 0.16386404633522034,
      "rewards/verify_math_reward/mean": 0.7232142686843872,
      "rewards/verify_math_reward/std": 0.44765952229499817,
      "step": 510
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0859375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3817.0,
      "completions/mean_length": 955.30810546875,
      "completions/mean_terminated_length": 660.0293579101562,
      "completions/min_length": 168.0,
      "completions/min_terminated_length": 168.0,
      "epoch": 4.774344023323615,
      "grad_norm": 0.14884766936302185,
      "learning_rate": 1e-06,
      "loss": -0.0012,
      "num_tokens": 305867781.0,
      "reward": 0.6082589626312256,
      "reward_std": 0.19392429292201996,
      "rewards/verify_math_reward/mean": 0.6082589030265808,
      "rewards/verify_math_reward/std": 0.48841193318367004,
      "step": 511
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0703125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3730.0,
      "completions/mean_length": 818.5111694335938,
      "completions/mean_terminated_length": 570.6338500976562,
      "completions/min_length": 158.0,
      "completions/min_terminated_length": 158.0,
      "epoch": 4.783673469387755,
      "grad_norm": 0.13729892671108246,
      "learning_rate": 1e-06,
      "loss": -0.02,
      "num_tokens": 306428167.0,
      "reward": 0.6718750596046448,
      "reward_std": 0.1368863582611084,
      "rewards/verify_math_reward/mean": 0.671875,
      "rewards/verify_math_reward/std": 0.46979284286499023,
      "step": 512
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0669642857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3931.0,
      "completions/mean_length": 859.0357666015625,
      "completions/mean_terminated_length": 626.7176513671875,
      "completions/min_length": 163.0,
      "completions/min_terminated_length": 163.0,
      "epoch": 4.793002915451895,
      "grad_norm": 0.13870447874069214,
      "learning_rate": 1e-06,
      "loss": -0.0396,
      "num_tokens": 307044415.0,
      "reward": 0.5970982313156128,
      "reward_std": 0.16848501563072205,
      "rewards/verify_math_reward/mean": 0.5970982313156128,
      "rewards/verify_math_reward/std": 0.4907552897930145,
      "step": 513
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0993303571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3639.0,
      "completions/mean_length": 966.08935546875,
      "completions/mean_terminated_length": 620.9071044921875,
      "completions/min_length": 163.0,
      "completions/min_terminated_length": 163.0,
      "epoch": 4.802332361516035,
      "grad_norm": 0.15466636419296265,
      "learning_rate": 1e-06,
      "loss": -0.0106,
      "num_tokens": 307626375.0,
      "reward": 0.6261160969734192,
      "reward_std": 0.1811119168996811,
      "rewards/verify_math_reward/mean": 0.6261160969734192,
      "rewards/verify_math_reward/std": 0.48410359025001526,
      "step": 514
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0915178571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3488.0,
      "completions/mean_length": 966.2667846679688,
      "completions/mean_terminated_length": 650.9864501953125,
      "completions/min_length": 158.0,
      "completions/min_terminated_length": 158.0,
      "epoch": 4.811661807580175,
      "grad_norm": 0.15332716703414917,
      "learning_rate": 1e-06,
      "loss": -0.0013,
      "num_tokens": 308253054.0,
      "reward": 0.582589328289032,
      "reward_std": 0.176600843667984,
      "rewards/verify_math_reward/mean": 0.5825892686843872,
      "rewards/verify_math_reward/std": 0.493407279253006,
      "step": 515
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0982142857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3642.0,
      "completions/mean_length": 978.966552734375,
      "completions/mean_terminated_length": 639.4876098632812,
      "completions/min_length": 123.0,
      "completions/min_terminated_length": 123.0,
      "epoch": 4.820991253644315,
      "grad_norm": 0.1658165603876114,
      "learning_rate": 1e-06,
      "loss": -0.0435,
      "num_tokens": 308863856.0,
      "reward": 0.5714285969734192,
      "reward_std": 0.2142515331506729,
      "rewards/verify_math_reward/mean": 0.5714285969734192,
      "rewards/verify_math_reward/std": 0.49514803290367126,
      "step": 516
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0669642857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4075.0,
      "completions/mean_length": 874.7545166015625,
      "completions/mean_terminated_length": 643.5645751953125,
      "completions/min_length": 171.0,
      "completions/min_terminated_length": 171.0,
      "epoch": 4.830320699708455,
      "grad_norm": 0.14621497690677643,
      "learning_rate": 1e-06,
      "loss": -0.0209,
      "num_tokens": 309490748.0,
      "reward": 0.6082589626312256,
      "reward_std": 0.20308955013751984,
      "rewards/verify_math_reward/mean": 0.6082589030265808,
      "rewards/verify_math_reward/std": 0.48841196298599243,
      "step": 517
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0870535714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3166.0,
      "completions/mean_length": 932.0703735351562,
      "completions/mean_terminated_length": 630.3753051757812,
      "completions/min_length": 112.0,
      "completions/min_terminated_length": 112.0,
      "epoch": 4.839650145772595,
      "grad_norm": 0.16321462392807007,
      "learning_rate": 1e-06,
      "loss": -0.0138,
      "num_tokens": 310088707.0,
      "reward": 0.6361607313156128,
      "reward_std": 0.14102061092853546,
      "rewards/verify_math_reward/mean": 0.6361607313156128,
      "rewards/verify_math_reward/std": 0.4813718795776367,
      "step": 518
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1004464285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3853.0,
      "completions/mean_length": 983.9944458007812,
      "completions/mean_terminated_length": 636.5,
      "completions/min_length": 182.0,
      "completions/min_terminated_length": 182.0,
      "epoch": 4.848979591836734,
      "grad_norm": 0.1474456787109375,
      "learning_rate": 1e-06,
      "loss": -0.0406,
      "num_tokens": 310684830.0,
      "reward": 0.6116071939468384,
      "reward_std": 0.14394910633563995,
      "rewards/verify_math_reward/mean": 0.6116071343421936,
      "rewards/verify_math_reward/std": 0.48765692114830017,
      "step": 519
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0870535714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3515.0,
      "completions/mean_length": 954.6317138671875,
      "completions/mean_terminated_length": 655.0880737304688,
      "completions/min_length": 189.0,
      "completions/min_terminated_length": 189.0,
      "epoch": 4.858309037900875,
      "grad_norm": 0.12145557254552841,
      "learning_rate": 1e-06,
      "loss": -0.0311,
      "num_tokens": 311308524.0,
      "reward": 0.5814732313156128,
      "reward_std": 0.15033601224422455,
      "rewards/verify_math_reward/mean": 0.5814732313156128,
      "rewards/verify_math_reward/std": 0.4935929775238037,
      "step": 520
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0647321428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3251.0,
      "completions/mean_length": 856.9922485351562,
      "completions/mean_terminated_length": 632.8126831054688,
      "completions/min_length": 152.0,
      "completions/min_terminated_length": 152.0,
      "epoch": 4.867638483965014,
      "grad_norm": 0.13548052310943604,
      "learning_rate": 1e-06,
      "loss": -0.034,
      "num_tokens": 311931709.0,
      "reward": 0.6473214626312256,
      "reward_std": 0.16326813399791718,
      "rewards/verify_math_reward/mean": 0.6473214030265808,
      "rewards/verify_math_reward/std": 0.47807058691978455,
      "step": 521
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0703125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2178.0,
      "completions/mean_length": 810.9475708007812,
      "completions/mean_terminated_length": 562.4981689453125,
      "completions/min_length": 96.0,
      "completions/min_terminated_length": 96.0,
      "epoch": 4.876967930029155,
      "grad_norm": 0.15522019565105438,
      "learning_rate": 1e-06,
      "loss": -0.0308,
      "num_tokens": 312485238.0,
      "reward": 0.6875000596046448,
      "reward_std": 0.16747654974460602,
      "rewards/verify_math_reward/mean": 0.6875,
      "rewards/verify_math_reward/std": 0.4637712836265564,
      "step": 522
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0558035714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3690.0,
      "completions/mean_length": 821.794677734375,
      "completions/mean_terminated_length": 628.28369140625,
      "completions/min_length": 143.0,
      "completions/min_terminated_length": 143.0,
      "epoch": 4.886297376093294,
      "grad_norm": 0.14525139331817627,
      "learning_rate": 1e-06,
      "loss": -0.0123,
      "num_tokens": 313113166.0,
      "reward": 0.613839328289032,
      "reward_std": 0.18080954253673553,
      "rewards/verify_math_reward/mean": 0.6138392686843872,
      "rewards/verify_math_reward/std": 0.48714008927345276,
      "step": 523
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0814732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3791.0,
      "completions/mean_length": 857.7813110351562,
      "completions/mean_terminated_length": 570.5516357421875,
      "completions/min_length": 175.0,
      "completions/min_terminated_length": 175.0,
      "epoch": 4.895626822157435,
      "grad_norm": 0.13862133026123047,
      "learning_rate": 1e-06,
      "loss": -0.0359,
      "num_tokens": 313674034.0,
      "reward": 0.6305803656578064,
      "reward_std": 0.12805670499801636,
      "rewards/verify_math_reward/mean": 0.6305803656578064,
      "rewards/verify_math_reward/std": 0.48291724920272827,
      "step": 524
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.09375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3119.0,
      "completions/mean_length": 1012.5089721679688,
      "completions/mean_terminated_length": 693.527099609375,
      "completions/min_length": 191.0,
      "completions/min_terminated_length": 191.0,
      "epoch": 4.904956268221574,
      "grad_norm": 0.1406036764383316,
      "learning_rate": 1e-06,
      "loss": -0.0273,
      "num_tokens": 314333354.0,
      "reward": 0.5457589626312256,
      "reward_std": 0.20298220217227936,
      "rewards/verify_math_reward/mean": 0.5457589030265808,
      "rewards/verify_math_reward/std": 0.4981797933578491,
      "step": 525
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1149553571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3999.0,
      "completions/mean_length": 1040.7913818359375,
      "completions/mean_terminated_length": 643.9609375,
      "completions/min_length": 162.0,
      "completions/min_terminated_length": 162.0,
      "epoch": 4.914285714285715,
      "grad_norm": 0.1542518138885498,
      "learning_rate": 1e-06,
      "loss": -0.0497,
      "num_tokens": 314935831.0,
      "reward": 0.6004464626312256,
      "reward_std": 0.1698743999004364,
      "rewards/verify_math_reward/mean": 0.6004464030265808,
      "rewards/verify_math_reward/std": 0.49008017778396606,
      "step": 526
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0859375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2488.0,
      "completions/mean_length": 903.6641235351562,
      "completions/mean_terminated_length": 603.5299072265625,
      "completions/min_length": 122.0,
      "completions/min_terminated_length": 122.0,
      "epoch": 4.923615160349854,
      "grad_norm": 0.1344006061553955,
      "learning_rate": 1e-06,
      "loss": -0.0452,
      "num_tokens": 315519586.0,
      "reward": 0.5736607313156128,
      "reward_std": 0.15120504796504974,
      "rewards/verify_math_reward/mean": 0.5736607313156128,
      "rewards/verify_math_reward/std": 0.4948205351829529,
      "step": 527
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0926339285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3122.0,
      "completions/mean_length": 942.0848388671875,
      "completions/mean_terminated_length": 620.098388671875,
      "completions/min_length": 132.0,
      "completions/min_terminated_length": 132.0,
      "epoch": 4.932944606413994,
      "grad_norm": 0.15205985307693481,
      "learning_rate": 1e-06,
      "loss": -0.0474,
      "num_tokens": 316112758.0,
      "reward": 0.6350446939468384,
      "reward_std": 0.13940368592739105,
      "rewards/verify_math_reward/mean": 0.6350446343421936,
      "rewards/verify_math_reward/std": 0.481686532497406,
      "step": 528
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0591517857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3746.0,
      "completions/mean_length": 816.1808471679688,
      "completions/mean_terminated_length": 609.976318359375,
      "completions/min_length": 176.0,
      "completions/min_terminated_length": 176.0,
      "epoch": 4.942274052478134,
      "grad_norm": 0.14302214980125427,
      "learning_rate": 1e-06,
      "loss": -0.0214,
      "num_tokens": 316707080.0,
      "reward": 0.6540178656578064,
      "reward_std": 0.177130326628685,
      "rewards/verify_math_reward/mean": 0.6540178656578064,
      "rewards/verify_math_reward/std": 0.4759531021118164,
      "step": 529
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0714285714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2597.0,
      "completions/mean_length": 863.3404541015625,
      "completions/mean_terminated_length": 614.67431640625,
      "completions/min_length": 149.0,
      "completions/min_terminated_length": 149.0,
      "epoch": 4.9516034985422746,
      "grad_norm": 0.15913861989974976,
      "learning_rate": 1e-06,
      "loss": -0.0164,
      "num_tokens": 317308297.0,
      "reward": 0.6618303656578064,
      "reward_std": 0.157324880361557,
      "rewards/verify_math_reward/mean": 0.6618303656578064,
      "rewards/verify_math_reward/std": 0.4733508229255676,
      "step": 530
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0669642857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2789.0,
      "completions/mean_length": 800.0256958007812,
      "completions/mean_terminated_length": 563.4724731445312,
      "completions/min_length": 138.0,
      "completions/min_terminated_length": 138.0,
      "epoch": 4.960932944606414,
      "grad_norm": 0.12956346571445465,
      "learning_rate": 1e-06,
      "loss": -0.0388,
      "num_tokens": 317863608.0,
      "reward": 0.7299107313156128,
      "reward_std": 0.14312425255775452,
      "rewards/verify_math_reward/mean": 0.7299107313156128,
      "rewards/verify_math_reward/std": 0.44425368309020996,
      "step": 531
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.09375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3079.0,
      "completions/mean_length": 933.9152221679688,
      "completions/mean_terminated_length": 606.8029174804688,
      "completions/min_length": 148.0,
      "completions/min_terminated_length": 148.0,
      "epoch": 4.970262390670554,
      "grad_norm": 0.1410096436738968,
      "learning_rate": 1e-06,
      "loss": -0.045,
      "num_tokens": 318436020.0,
      "reward": 0.6573660969734192,
      "reward_std": 0.17942015826702118,
      "rewards/verify_math_reward/mean": 0.6573660969734192,
      "rewards/verify_math_reward/std": 0.47485536336898804,
      "step": 532
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.078125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2926.0,
      "completions/mean_length": 890.5357666015625,
      "completions/mean_terminated_length": 618.88623046875,
      "completions/min_length": 100.0,
      "completions/min_terminated_length": 100.0,
      "epoch": 4.979591836734694,
      "grad_norm": 0.13046663999557495,
      "learning_rate": 1e-06,
      "loss": -0.0144,
      "num_tokens": 319039924.0,
      "reward": 0.6305803656578064,
      "reward_std": 0.14714929461479187,
      "rewards/verify_math_reward/mean": 0.6305803656578064,
      "rewards/verify_math_reward/std": 0.48291724920272827,
      "step": 533
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0770089285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3411.0,
      "completions/mean_length": 894.8928833007812,
      "completions/mean_terminated_length": 627.8114013671875,
      "completions/min_length": 135.0,
      "completions/min_terminated_length": 135.0,
      "epoch": 4.988921282798834,
      "grad_norm": 0.14756572246551514,
      "learning_rate": 1e-06,
      "loss": -0.0265,
      "num_tokens": 319651332.0,
      "reward": 0.6004464626312256,
      "reward_std": 0.1861012876033783,
      "rewards/verify_math_reward/mean": 0.6004464030265808,
      "rewards/verify_math_reward/std": 0.49008017778396606,
      "step": 534
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.07670454545454541,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1950.0,
      "completions/mean_length": 822.45458984375,
      "completions/mean_terminated_length": 550.4984741210938,
      "completions/min_length": 111.0,
      "completions/min_terminated_length": 111.0,
      "epoch": 4.998250728862974,
      "grad_norm": 0.1231074184179306,
      "learning_rate": 1e-06,
      "loss": -0.0154,
      "num_tokens": 320228151.0,
      "reward": 0.625,
      "reward_std": 0.11835899204015732,
      "rewards/verify_math_reward/mean": 0.625,
      "rewards/verify_math_reward/std": 0.48439329862594604,
      "step": 535
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0758928571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3135.0,
      "completions/mean_length": 858.0636596679688,
      "completions/mean_terminated_length": 592.1461181640625,
      "completions/min_length": 134.0,
      "completions/min_terminated_length": 134.0,
      "epoch": 5.0093294460641395,
      "grad_norm": 0.13598649203777313,
      "learning_rate": 1e-06,
      "loss": -0.0272,
      "num_tokens": 320796040.0,
      "reward": 0.6863839626312256,
      "reward_std": 0.13519500195980072,
      "rewards/verify_math_reward/mean": 0.6863839030265808,
      "rewards/verify_math_reward/std": 0.46422144770622253,
      "step": 536
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0881696428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3349.0,
      "completions/mean_length": 936.01904296875,
      "completions/mean_terminated_length": 630.4639282226562,
      "completions/min_length": 173.0,
      "completions/min_terminated_length": 173.0,
      "epoch": 5.01865889212828,
      "grad_norm": 0.13071931898593903,
      "learning_rate": 1e-06,
      "loss": -0.0181,
      "num_tokens": 321392849.0,
      "reward": 0.5837053656578064,
      "reward_std": 0.14304685592651367,
      "rewards/verify_math_reward/mean": 0.5837053656578064,
      "rewards/verify_math_reward/std": 0.49321892857551575,
      "step": 537
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1037946428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3047.0,
      "completions/mean_length": 1017.6016235351562,
      "completions/mean_terminated_length": 661.07470703125,
      "completions/min_length": 146.0,
      "completions/min_terminated_length": 146.0,
      "epoch": 5.0279883381924195,
      "grad_norm": 0.14592930674552917,
      "learning_rate": 1e-06,
      "loss": -0.0498,
      "num_tokens": 322014052.0,
      "reward": 0.5915178656578064,
      "reward_std": 0.18532103300094604,
      "rewards/verify_math_reward/mean": 0.5915178656578064,
      "rewards/verify_math_reward/std": 0.49182769656181335,
      "step": 538
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0736607142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2656.0,
      "completions/mean_length": 851.3750610351562,
      "completions/mean_terminated_length": 593.3687133789062,
      "completions/min_length": 104.0,
      "completions/min_terminated_length": 104.0,
      "epoch": 5.03731778425656,
      "grad_norm": 0.1583303064107895,
      "learning_rate": 1e-06,
      "loss": -0.022,
      "num_tokens": 322599156.0,
      "reward": 0.621651828289032,
      "reward_std": 0.15417632460594177,
      "rewards/verify_math_reward/mean": 0.6216517686843872,
      "rewards/verify_math_reward/std": 0.4852459728717804,
      "step": 539
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0703125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4071.0,
      "completions/mean_length": 847.8739013671875,
      "completions/mean_terminated_length": 602.21728515625,
      "completions/min_length": 118.0,
      "completions/min_terminated_length": 118.0,
      "epoch": 5.0466472303206995,
      "grad_norm": 0.15164095163345337,
      "learning_rate": 1e-06,
      "loss": -0.0335,
      "num_tokens": 323183915.0,
      "reward": 0.7008928656578064,
      "reward_std": 0.1834784299135208,
      "rewards/verify_math_reward/mean": 0.7008928656578064,
      "rewards/verify_math_reward/std": 0.4581226110458374,
      "step": 540
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0691964285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3451.0,
      "completions/mean_length": 839.5145263671875,
      "completions/mean_terminated_length": 597.4256591796875,
      "completions/min_length": 160.0,
      "completions/min_terminated_length": 160.0,
      "epoch": 5.05597667638484,
      "grad_norm": 0.1190546452999115,
      "learning_rate": 1e-06,
      "loss": -0.0257,
      "num_tokens": 323765096.0,
      "reward": 0.6383928656578064,
      "reward_std": 0.13275323808193207,
      "rewards/verify_math_reward/mean": 0.6383928656578064,
      "rewards/verify_math_reward/std": 0.4807341694831848,
      "step": 541
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0613839285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4063.0,
      "completions/mean_length": 836.075927734375,
      "completions/mean_terminated_length": 622.8822631835938,
      "completions/min_length": 145.0,
      "completions/min_terminated_length": 145.0,
      "epoch": 5.0653061224489795,
      "grad_norm": 0.1438395380973816,
      "learning_rate": 1e-06,
      "loss": -0.0128,
      "num_tokens": 324371476.0,
      "reward": 0.6484375,
      "reward_std": 0.1672803908586502,
      "rewards/verify_math_reward/mean": 0.6484375,
      "rewards/verify_math_reward/std": 0.4777248501777649,
      "step": 542
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0691964285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4044.0,
      "completions/mean_length": 838.6585083007812,
      "completions/mean_terminated_length": 596.5059814453125,
      "completions/min_length": 167.0,
      "completions/min_terminated_length": 167.0,
      "epoch": 5.07463556851312,
      "grad_norm": 0.1270408034324646,
      "learning_rate": 1e-06,
      "loss": -0.013,
      "num_tokens": 324956410.0,
      "reward": 0.6316964626312256,
      "reward_std": 0.1478532999753952,
      "rewards/verify_math_reward/mean": 0.6316964030265808,
      "rewards/verify_math_reward/std": 0.4826137125492096,
      "step": 543
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.078125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3538.0,
      "completions/mean_length": 867.8594360351562,
      "completions/mean_terminated_length": 594.2881469726562,
      "completions/min_length": 162.0,
      "completions/min_terminated_length": 162.0,
      "epoch": 5.0839650145772595,
      "grad_norm": 0.13325980305671692,
      "learning_rate": 1e-06,
      "loss": -0.0223,
      "num_tokens": 325540796.0,
      "reward": 0.6484375,
      "reward_std": 0.13804681599140167,
      "rewards/verify_math_reward/mean": 0.6484375,
      "rewards/verify_math_reward/std": 0.4777248501777649,
      "step": 544
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0970982142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3920.0,
      "completions/mean_length": 973.5379638671875,
      "completions/mean_terminated_length": 637.747802734375,
      "completions/min_length": 147.0,
      "completions/min_terminated_length": 147.0,
      "epoch": 5.093294460641399,
      "grad_norm": 0.12674620747566223,
      "learning_rate": 1e-06,
      "loss": -0.0362,
      "num_tokens": 326155990.0,
      "reward": 0.543526828289032,
      "reward_std": 0.14628097414970398,
      "rewards/verify_math_reward/mean": 0.5435267686843872,
      "rewards/verify_math_reward/std": 0.49838000535964966,
      "step": 545
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0736607142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3766.0,
      "completions/mean_length": 826.5234985351562,
      "completions/mean_terminated_length": 566.5409545898438,
      "completions/min_length": 170.0,
      "completions/min_terminated_length": 170.0,
      "epoch": 5.1026239067055394,
      "grad_norm": 0.14388048648834229,
      "learning_rate": 1e-06,
      "loss": -0.0136,
      "num_tokens": 326709187.0,
      "reward": 0.6651785969734192,
      "reward_std": 0.14064082503318787,
      "rewards/verify_math_reward/mean": 0.6651785969734192,
      "rewards/verify_math_reward/std": 0.47219160199165344,
      "step": 546
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0669642857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4052.0,
      "completions/mean_length": 839.1495971679688,
      "completions/mean_terminated_length": 605.404296875,
      "completions/min_length": 123.0,
      "completions/min_terminated_length": 123.0,
      "epoch": 5.111953352769679,
      "grad_norm": 0.1343618780374527,
      "learning_rate": 1e-06,
      "loss": -0.04,
      "num_tokens": 327305321.0,
      "reward": 0.676339328289032,
      "reward_std": 0.1538725346326828,
      "rewards/verify_math_reward/mean": 0.6763392686843872,
      "rewards/verify_math_reward/std": 0.4681335985660553,
      "step": 547
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.060267857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3394.0,
      "completions/mean_length": 786.8739013671875,
      "completions/mean_terminated_length": 574.649658203125,
      "completions/min_length": 174.0,
      "completions/min_terminated_length": 174.0,
      "epoch": 5.121282798833819,
      "grad_norm": 0.14267843961715698,
      "learning_rate": 1e-06,
      "loss": -0.0185,
      "num_tokens": 327871144.0,
      "reward": 0.7243303656578064,
      "reward_std": 0.13839450478553772,
      "rewards/verify_math_reward/mean": 0.7243303656578064,
      "rewards/verify_math_reward/std": 0.4471006691455841,
      "step": 548
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3624.0,
      "completions/mean_length": 859.0814819335938,
      "completions/mean_terminated_length": 643.2869262695312,
      "completions/min_length": 118.0,
      "completions/min_terminated_length": 118.0,
      "epoch": 5.130612244897959,
      "grad_norm": 0.12819762527942657,
      "learning_rate": 1e-06,
      "loss": -0.0339,
      "num_tokens": 328502497.0,
      "reward": 0.6517857313156128,
      "reward_std": 0.15105168521404266,
      "rewards/verify_math_reward/mean": 0.6517857313156128,
      "rewards/verify_math_reward/std": 0.47667041420936584,
      "step": 549
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3505.0,
      "completions/mean_length": 967.5535888671875,
      "completions/mean_terminated_length": 613.903076171875,
      "completions/min_length": 159.0,
      "completions/min_terminated_length": 159.0,
      "epoch": 5.139941690962099,
      "grad_norm": 0.14108048379421234,
      "learning_rate": 1e-06,
      "loss": -0.034,
      "num_tokens": 329095945.0,
      "reward": 0.5412946939468384,
      "reward_std": 0.1646895855665207,
      "rewards/verify_math_reward/mean": 0.5412946343421936,
      "rewards/verify_math_reward/std": 0.49857014417648315,
      "step": 550
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0758928571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4018.0,
      "completions/mean_length": 845.7891235351562,
      "completions/mean_terminated_length": 578.863525390625,
      "completions/min_length": 123.0,
      "completions/min_terminated_length": 123.0,
      "epoch": 5.149271137026239,
      "grad_norm": 0.13589175045490265,
      "learning_rate": 1e-06,
      "loss": -0.0216,
      "num_tokens": 329663580.0,
      "reward": 0.6540178656578064,
      "reward_std": 0.12967249751091003,
      "rewards/verify_math_reward/mean": 0.6540178656578064,
      "rewards/verify_math_reward/std": 0.4759531021118164,
      "step": 551
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3706.0,
      "completions/mean_length": 881.1920166015625,
      "completions/mean_terminated_length": 666.8714599609375,
      "completions/min_length": 176.0,
      "completions/min_terminated_length": 176.0,
      "epoch": 5.158600583090379,
      "grad_norm": 0.1442086398601532,
      "learning_rate": 1e-06,
      "loss": -0.0559,
      "num_tokens": 330311032.0,
      "reward": 0.6696428656578064,
      "reward_std": 0.18426865339279175,
      "rewards/verify_math_reward/mean": 0.6696428656578064,
      "rewards/verify_math_reward/std": 0.47060438990592957,
      "step": 552
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0636160714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3465.0,
      "completions/mean_length": 824.1585083007812,
      "completions/mean_terminated_length": 601.8760375976562,
      "completions/min_length": 154.0,
      "completions/min_terminated_length": 154.0,
      "epoch": 5.167930029154519,
      "grad_norm": 0.13595373928546906,
      "learning_rate": 1e-06,
      "loss": -0.0322,
      "num_tokens": 330902230.0,
      "reward": 0.6506696939468384,
      "reward_std": 0.1429395079612732,
      "rewards/verify_math_reward/mean": 0.6506696343421936,
      "rewards/verify_math_reward/std": 0.47702476382255554,
      "step": 553
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0948660714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3057.0,
      "completions/mean_length": 971.66748046875,
      "completions/mean_terminated_length": 644.2095947265625,
      "completions/min_length": 182.0,
      "completions/min_terminated_length": 182.0,
      "epoch": 5.1772594752186585,
      "grad_norm": 0.12988536059856415,
      "learning_rate": 1e-06,
      "loss": -0.0419,
      "num_tokens": 331512100.0,
      "reward": 0.6104910969734192,
      "reward_std": 0.15266859531402588,
      "rewards/verify_math_reward/mean": 0.6104910969734192,
      "rewards/verify_math_reward/std": 0.48791128396987915,
      "step": 554
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0803571428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2589.0,
      "completions/mean_length": 908.65966796875,
      "completions/mean_terminated_length": 630.1541137695312,
      "completions/min_length": 154.0,
      "completions/min_terminated_length": 154.0,
      "epoch": 5.186588921282799,
      "grad_norm": 0.15173228085041046,
      "learning_rate": 1e-06,
      "loss": -0.0365,
      "num_tokens": 332117251.0,
      "reward": 0.6696428656578064,
      "reward_std": 0.1817852407693863,
      "rewards/verify_math_reward/mean": 0.6696428656578064,
      "rewards/verify_math_reward/std": 0.47060438990592957,
      "step": 555
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.056919642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3508.0,
      "completions/mean_length": 795.2188110351562,
      "completions/mean_terminated_length": 596.0,
      "completions/min_length": 153.0,
      "completions/min_terminated_length": 153.0,
      "epoch": 5.1959183673469385,
      "grad_norm": 0.14987683296203613,
      "learning_rate": 1e-06,
      "loss": -0.039,
      "num_tokens": 332711983.0,
      "reward": 0.6953125596046448,
      "reward_std": 0.18096107244491577,
      "rewards/verify_math_reward/mean": 0.6953125,
      "rewards/verify_math_reward/std": 0.4605320394039154,
      "step": 556
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0970982142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2917.0,
      "completions/mean_length": 995.87060546875,
      "completions/mean_terminated_length": 662.4820556640625,
      "completions/min_length": 174.0,
      "completions/min_terminated_length": 174.0,
      "epoch": 5.205247813411079,
      "grad_norm": 0.14564700424671173,
      "learning_rate": 1e-06,
      "loss": -0.0613,
      "num_tokens": 333336899.0,
      "reward": 0.6049107313156128,
      "reward_std": 0.19107064604759216,
      "rewards/verify_math_reward/mean": 0.6049107313156128,
      "rewards/verify_math_reward/std": 0.48914292454719543,
      "step": 557
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1004464285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2628.0,
      "completions/mean_length": 945.86279296875,
      "completions/mean_terminated_length": 594.1104125976562,
      "completions/min_length": 143.0,
      "completions/min_terminated_length": 143.0,
      "epoch": 5.214577259475218,
      "grad_norm": 0.14157523214817047,
      "learning_rate": 1e-06,
      "loss": -0.0394,
      "num_tokens": 333911280.0,
      "reward": 0.6305803656578064,
      "reward_std": 0.16686992347240448,
      "rewards/verify_math_reward/mean": 0.6305803656578064,
      "rewards/verify_math_reward/std": 0.48291724920272827,
      "step": 558
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3678.0,
      "completions/mean_length": 975.755615234375,
      "completions/mean_terminated_length": 623.0322875976562,
      "completions/min_length": 137.0,
      "completions/min_terminated_length": 137.0,
      "epoch": 5.223906705539359,
      "grad_norm": 0.1475507616996765,
      "learning_rate": 1e-06,
      "loss": -0.0633,
      "num_tokens": 334498133.0,
      "reward": 0.6305803656578064,
      "reward_std": 0.18366950750350952,
      "rewards/verify_math_reward/mean": 0.6305803656578064,
      "rewards/verify_math_reward/std": 0.4829172194004059,
      "step": 559
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1049107142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2293.0,
      "completions/mean_length": 961.2678833007812,
      "completions/mean_terminated_length": 593.8554077148438,
      "completions/min_length": 100.0,
      "completions/min_terminated_length": 100.0,
      "epoch": 5.233236151603498,
      "grad_norm": 0.1454574018716812,
      "learning_rate": 1e-06,
      "loss": -0.0422,
      "num_tokens": 335064125.0,
      "reward": 0.5736607313156128,
      "reward_std": 0.13527238368988037,
      "rewards/verify_math_reward/mean": 0.5736607313156128,
      "rewards/verify_math_reward/std": 0.4948205351829529,
      "step": 560
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0892857142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3801.0,
      "completions/mean_length": 919.1116333007812,
      "completions/mean_terminated_length": 607.6519775390625,
      "completions/min_length": 156.0,
      "completions/min_terminated_length": 156.0,
      "epoch": 5.242565597667639,
      "grad_norm": 0.13434049487113953,
      "learning_rate": 1e-06,
      "loss": -0.0304,
      "num_tokens": 335651097.0,
      "reward": 0.6651785969734192,
      "reward_std": 0.14109477400779724,
      "rewards/verify_math_reward/mean": 0.6651785969734192,
      "rewards/verify_math_reward/std": 0.47219160199165344,
      "step": 561
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0982142857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4050.0,
      "completions/mean_length": 958.59716796875,
      "completions/mean_terminated_length": 616.8997192382812,
      "completions/min_length": 129.0,
      "completions/min_terminated_length": 129.0,
      "epoch": 5.251895043731778,
      "grad_norm": 0.14526161551475525,
      "learning_rate": 1e-06,
      "loss": -0.0292,
      "num_tokens": 336234872.0,
      "reward": 0.613839328289032,
      "reward_std": 0.15867966413497925,
      "rewards/verify_math_reward/mean": 0.6138392686843872,
      "rewards/verify_math_reward/std": 0.48714008927345276,
      "step": 562
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0948660714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3176.0,
      "completions/mean_length": 942.2433471679688,
      "completions/mean_terminated_length": 611.7015991210938,
      "completions/min_length": 149.0,
      "completions/min_terminated_length": 149.0,
      "epoch": 5.261224489795918,
      "grad_norm": 0.15732093155384064,
      "learning_rate": 1e-06,
      "loss": -0.0679,
      "num_tokens": 336816234.0,
      "reward": 0.5993303656578064,
      "reward_std": 0.19666732847690582,
      "rewards/verify_math_reward/mean": 0.5993303656578064,
      "rewards/verify_math_reward/std": 0.49030786752700806,
      "step": 563
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1127232142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3807.0,
      "completions/mean_length": 1053.8560791015625,
      "completions/mean_terminated_length": 667.3698120117188,
      "completions/min_length": 151.0,
      "completions/min_terminated_length": 151.0,
      "epoch": 5.270553935860058,
      "grad_norm": 0.14700715243816376,
      "learning_rate": 1e-06,
      "loss": -0.0676,
      "num_tokens": 337446441.0,
      "reward": 0.559151828289032,
      "reward_std": 0.19974806904792786,
      "rewards/verify_math_reward/mean": 0.5591517686843872,
      "rewards/verify_math_reward/std": 0.496766060590744,
      "step": 564
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0970982142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3499.0,
      "completions/mean_length": 947.7522583007812,
      "completions/mean_terminated_length": 609.1890869140625,
      "completions/min_length": 121.0,
      "completions/min_terminated_length": 121.0,
      "epoch": 5.279883381924198,
      "grad_norm": 0.15658360719680786,
      "learning_rate": 1e-06,
      "loss": -0.0453,
      "num_tokens": 338030283.0,
      "reward": 0.6116071939468384,
      "reward_std": 0.1766418069601059,
      "rewards/verify_math_reward/mean": 0.6116071343421936,
      "rewards/verify_math_reward/std": 0.4876568913459778,
      "step": 565
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0770089285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2421.0,
      "completions/mean_length": 863.8370971679688,
      "completions/mean_terminated_length": 594.1644897460938,
      "completions/min_length": 120.0,
      "completions/min_terminated_length": 120.0,
      "epoch": 5.289212827988338,
      "grad_norm": 0.15140020847320557,
      "learning_rate": 1e-06,
      "loss": -0.0456,
      "num_tokens": 338607561.0,
      "reward": 0.6484375,
      "reward_std": 0.15916889905929565,
      "rewards/verify_math_reward/mean": 0.6484375,
      "rewards/verify_math_reward/std": 0.4777248501777649,
      "step": 566
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0959821428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2559.0,
      "completions/mean_length": 953.05810546875,
      "completions/mean_terminated_length": 619.3629760742188,
      "completions/min_length": 163.0,
      "completions/min_terminated_length": 163.0,
      "epoch": 5.298542274052478,
      "grad_norm": 0.14211858808994293,
      "learning_rate": 1e-06,
      "loss": -0.0451,
      "num_tokens": 339194709.0,
      "reward": 0.6116071939468384,
      "reward_std": 0.16044698655605316,
      "rewards/verify_math_reward/mean": 0.6116071343421936,
      "rewards/verify_math_reward/std": 0.4876568913459778,
      "step": 567
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.078125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3767.0,
      "completions/mean_length": 886.1897583007812,
      "completions/mean_terminated_length": 614.1719360351562,
      "completions/min_length": 132.0,
      "completions/min_terminated_length": 132.0,
      "epoch": 5.307871720116618,
      "grad_norm": 0.14483484625816345,
      "learning_rate": 1e-06,
      "loss": -0.0294,
      "num_tokens": 339788775.0,
      "reward": 0.6149553656578064,
      "reward_std": 0.1488385647535324,
      "rewards/verify_math_reward/mean": 0.6149553656578064,
      "rewards/verify_math_reward/std": 0.4868776500225067,
      "step": 568
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1071428571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3927.0,
      "completions/mean_length": 993.4576416015625,
      "completions/mean_terminated_length": 621.1524658203125,
      "completions/min_length": 177.0,
      "completions/min_terminated_length": 177.0,
      "epoch": 5.317201166180758,
      "grad_norm": 0.12855836749076843,
      "learning_rate": 1e-06,
      "loss": -0.0427,
      "num_tokens": 340377953.0,
      "reward": 0.637276828289032,
      "reward_std": 0.1392500400543213,
      "rewards/verify_math_reward/mean": 0.6372767686843872,
      "rewards/verify_math_reward/std": 0.481054425239563,
      "step": 569
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0870535714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2971.0,
      "completions/mean_length": 915.40185546875,
      "completions/mean_terminated_length": 612.1173706054688,
      "completions/min_length": 139.0,
      "completions/min_terminated_length": 139.0,
      "epoch": 5.326530612244898,
      "grad_norm": 0.14327600598335266,
      "learning_rate": 1e-06,
      "loss": -0.0175,
      "num_tokens": 340963633.0,
      "reward": 0.609375,
      "reward_std": 0.1397392898797989,
      "rewards/verify_math_reward/mean": 0.609375,
      "rewards/verify_math_reward/std": 0.48816296458244324,
      "step": 570
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0892857142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3925.0,
      "completions/mean_length": 932.4777221679688,
      "completions/mean_terminated_length": 622.3284301757812,
      "completions/min_length": 122.0,
      "completions/min_terminated_length": 122.0,
      "epoch": 5.335860058309038,
      "grad_norm": 0.14557726681232452,
      "learning_rate": 1e-06,
      "loss": -0.0468,
      "num_tokens": 341553677.0,
      "reward": 0.6785714626312256,
      "reward_std": 0.17315199971199036,
      "rewards/verify_math_reward/mean": 0.6785714030265808,
      "rewards/verify_math_reward/std": 0.46728572249412537,
      "step": 571
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0770089285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2654.0,
      "completions/mean_length": 871.8303833007812,
      "completions/mean_terminated_length": 602.82470703125,
      "completions/min_length": 152.0,
      "completions/min_terminated_length": 152.0,
      "epoch": 5.345189504373177,
      "grad_norm": 0.1359301060438156,
      "learning_rate": 1e-06,
      "loss": -0.0419,
      "num_tokens": 342144901.0,
      "reward": 0.5959821939468384,
      "reward_std": 0.157290980219841,
      "rewards/verify_math_reward/mean": 0.5959821343421936,
      "rewards/verify_math_reward/std": 0.490975022315979,
      "step": 572
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0792410714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4062.0,
      "completions/mean_length": 871.1339721679688,
      "completions/mean_terminated_length": 593.5999755859375,
      "completions/min_length": 81.0,
      "completions/min_terminated_length": 81.0,
      "epoch": 5.354518950437318,
      "grad_norm": 0.1599980890750885,
      "learning_rate": 1e-06,
      "loss": -0.0469,
      "num_tokens": 342729197.0,
      "reward": 0.6640625,
      "reward_std": 0.17502851784229279,
      "rewards/verify_math_reward/mean": 0.6640625,
      "rewards/verify_math_reward/std": 0.4725809693336487,
      "step": 573
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0714285714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2673.0,
      "completions/mean_length": 794.0379638671875,
      "completions/mean_terminated_length": 540.0408935546875,
      "completions/min_length": 162.0,
      "completions/min_terminated_length": 162.0,
      "epoch": 5.363848396501457,
      "grad_norm": 0.14998947083950043,
      "learning_rate": 1e-06,
      "loss": -0.0202,
      "num_tokens": 343269063.0,
      "reward": 0.6261160969734192,
      "reward_std": 0.14170026779174805,
      "rewards/verify_math_reward/mean": 0.6261160969734192,
      "rewards/verify_math_reward/std": 0.48410359025001526,
      "step": 574
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0837053571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3654.0,
      "completions/mean_length": 961.2656860351562,
      "completions/mean_terminated_length": 674.9013671875,
      "completions/min_length": 138.0,
      "completions/min_terminated_length": 138.0,
      "epoch": 5.373177842565598,
      "grad_norm": 0.13110540807247162,
      "learning_rate": 1e-06,
      "loss": -0.0179,
      "num_tokens": 343915637.0,
      "reward": 0.6082589626312256,
      "reward_std": 0.1699492633342743,
      "rewards/verify_math_reward/mean": 0.6082589030265808,
      "rewards/verify_math_reward/std": 0.48841196298599243,
      "step": 575
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0792410714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3127.0,
      "completions/mean_length": 881.825927734375,
      "completions/mean_terminated_length": 605.2120971679688,
      "completions/min_length": 160.0,
      "completions/min_terminated_length": 160.0,
      "epoch": 5.382507288629737,
      "grad_norm": 0.14458364248275757,
      "learning_rate": 1e-06,
      "loss": -0.031,
      "num_tokens": 344511481.0,
      "reward": 0.6395089626312256,
      "reward_std": 0.15030533075332642,
      "rewards/verify_math_reward/mean": 0.6395089030265808,
      "rewards/verify_math_reward/std": 0.4804111421108246,
      "step": 576
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0792410714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4036.0,
      "completions/mean_length": 910.7310791015625,
      "completions/mean_terminated_length": 636.6048583984375,
      "completions/min_length": 138.0,
      "completions/min_terminated_length": 138.0,
      "epoch": 5.391836734693878,
      "grad_norm": 0.171955406665802,
      "learning_rate": 1e-06,
      "loss": -0.0286,
      "num_tokens": 345123256.0,
      "reward": 0.6875000596046448,
      "reward_std": 0.13508693873882294,
      "rewards/verify_math_reward/mean": 0.6875,
      "rewards/verify_math_reward/std": 0.4637712836265564,
      "step": 577
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0770089285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2465.0,
      "completions/mean_length": 869.3939819335938,
      "completions/mean_terminated_length": 600.1849975585938,
      "completions/min_length": 153.0,
      "completions/min_terminated_length": 153.0,
      "epoch": 5.401166180758017,
      "grad_norm": 0.13843972980976105,
      "learning_rate": 1e-06,
      "loss": -0.0194,
      "num_tokens": 345715233.0,
      "reward": 0.6473214626312256,
      "reward_std": 0.1371905654668808,
      "rewards/verify_math_reward/mean": 0.6473214030265808,
      "rewards/verify_math_reward/std": 0.47807061672210693,
      "step": 578
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1049107142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3303.0,
      "completions/mean_length": 992.16748046875,
      "completions/mean_terminated_length": 628.3765869140625,
      "completions/min_length": 124.0,
      "completions/min_terminated_length": 124.0,
      "epoch": 5.410495626822158,
      "grad_norm": 0.18873398005962372,
      "learning_rate": 1e-06,
      "loss": -0.0229,
      "num_tokens": 346313231.0,
      "reward": 0.5926339626312256,
      "reward_std": 0.1828792542219162,
      "rewards/verify_math_reward/mean": 0.5926339030265808,
      "rewards/verify_math_reward/std": 0.49161845445632935,
      "step": 579
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0680803571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3424.0,
      "completions/mean_length": 856.6808471679688,
      "completions/mean_terminated_length": 620.0359497070312,
      "completions/min_length": 158.0,
      "completions/min_terminated_length": 158.0,
      "epoch": 5.419825072886297,
      "grad_norm": 0.14334967732429504,
      "learning_rate": 1e-06,
      "loss": -0.0068,
      "num_tokens": 346916697.0,
      "reward": 0.660714328289032,
      "reward_std": 0.1626298427581787,
      "rewards/verify_math_reward/mean": 0.6607142686843872,
      "rewards/verify_math_reward/std": 0.4737313687801361,
      "step": 580
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.078125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3971.0,
      "completions/mean_length": 867.802490234375,
      "completions/mean_terminated_length": 594.2264404296875,
      "completions/min_length": 170.0,
      "completions/min_terminated_length": 170.0,
      "epoch": 5.429154518950437,
      "grad_norm": 0.11689605563879013,
      "learning_rate": 1e-06,
      "loss": -0.0325,
      "num_tokens": 347493752.0,
      "reward": 0.6283482313156128,
      "reward_std": 0.10968157649040222,
      "rewards/verify_math_reward/mean": 0.6283482313156128,
      "rewards/verify_math_reward/std": 0.4835159480571747,
      "step": 581
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0837053571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4056.0,
      "completions/mean_length": 933.2210083007812,
      "completions/mean_terminated_length": 644.2947998046875,
      "completions/min_length": 130.0,
      "completions/min_terminated_length": 130.0,
      "epoch": 5.438483965014577,
      "grad_norm": 0.12936632335186005,
      "learning_rate": 1e-06,
      "loss": -0.0147,
      "num_tokens": 348104246.0,
      "reward": 0.6640625,
      "reward_std": 0.1634860783815384,
      "rewards/verify_math_reward/mean": 0.6640625,
      "rewards/verify_math_reward/std": 0.4725809693336487,
      "step": 582
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0792410714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3923.0,
      "completions/mean_length": 874.5904541015625,
      "completions/mean_terminated_length": 597.3539428710938,
      "completions/min_length": 157.0,
      "completions/min_terminated_length": 157.0,
      "epoch": 5.447813411078717,
      "grad_norm": 0.16096006333827972,
      "learning_rate": 1e-06,
      "loss": -0.0344,
      "num_tokens": 348679951.0,
      "reward": 0.6517857313156128,
      "reward_std": 0.1748744547367096,
      "rewards/verify_math_reward/mean": 0.6517857313156128,
      "rewards/verify_math_reward/std": 0.47667041420936584,
      "step": 583
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0736607142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2761.0,
      "completions/mean_length": 848.8047485351562,
      "completions/mean_terminated_length": 590.593994140625,
      "completions/min_length": 148.0,
      "completions/min_terminated_length": 148.0,
      "epoch": 5.457142857142857,
      "grad_norm": 0.13853366672992706,
      "learning_rate": 1e-06,
      "loss": -0.0398,
      "num_tokens": 349254784.0,
      "reward": 0.6238839626312256,
      "reward_std": 0.17081758379936218,
      "rewards/verify_math_reward/mean": 0.6238839030265808,
      "rewards/verify_math_reward/std": 0.48468026518821716,
      "step": 584
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2573.0,
      "completions/mean_length": 974.8248291015625,
      "completions/mean_terminated_length": 621.9962768554688,
      "completions/min_length": 162.0,
      "completions/min_terminated_length": 162.0,
      "epoch": 5.466472303206997,
      "grad_norm": 0.1280086636543274,
      "learning_rate": 1e-06,
      "loss": -0.0426,
      "num_tokens": 349852459.0,
      "reward": 0.598214328289032,
      "reward_std": 0.13921892642974854,
      "rewards/verify_math_reward/mean": 0.5982142686843872,
      "rewards/verify_math_reward/std": 0.49053290486335754,
      "step": 585
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.078125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3792.0,
      "completions/mean_length": 865.4520263671875,
      "completions/mean_terminated_length": 591.6767578125,
      "completions/min_length": 195.0,
      "completions/min_terminated_length": 195.0,
      "epoch": 5.475801749271137,
      "grad_norm": 0.15977877378463745,
      "learning_rate": 1e-06,
      "loss": -0.017,
      "num_tokens": 350428920.0,
      "reward": 0.6383928656578064,
      "reward_std": 0.1852443516254425,
      "rewards/verify_math_reward/mean": 0.6383928656578064,
      "rewards/verify_math_reward/std": 0.4807341694831848,
      "step": 586
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0803571428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2997.0,
      "completions/mean_length": 929.4063110351562,
      "completions/mean_terminated_length": 652.713623046875,
      "completions/min_length": 156.0,
      "completions/min_terminated_length": 156.0,
      "epoch": 5.485131195335277,
      "grad_norm": 0.1367771029472351,
      "learning_rate": 1e-06,
      "loss": -0.0308,
      "num_tokens": 351065372.0,
      "reward": 0.5814732313156128,
      "reward_std": 0.15942853689193726,
      "rewards/verify_math_reward/mean": 0.5814732313156128,
      "rewards/verify_math_reward/std": 0.4935929775238037,
      "step": 587
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1227678571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3689.0,
      "completions/mean_length": 1064.630615234375,
      "completions/mean_terminated_length": 640.3931274414062,
      "completions/min_length": 146.0,
      "completions/min_terminated_length": 146.0,
      "epoch": 5.494460641399417,
      "grad_norm": 0.14162762463092804,
      "learning_rate": 1e-06,
      "loss": -0.027,
      "num_tokens": 351666393.0,
      "reward": 0.5959821939468384,
      "reward_std": 0.15916681289672852,
      "rewards/verify_math_reward/mean": 0.5959821343421936,
      "rewards/verify_math_reward/std": 0.490975022315979,
      "step": 588
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0758928571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3185.0,
      "completions/mean_length": 873.7422485351562,
      "completions/mean_terminated_length": 609.1123046875,
      "completions/min_length": 139.0,
      "completions/min_terminated_length": 139.0,
      "epoch": 5.503790087463557,
      "grad_norm": 0.14134083688259125,
      "learning_rate": 1e-06,
      "loss": -0.0427,
      "num_tokens": 352256394.0,
      "reward": 0.6830357313156128,
      "reward_std": 0.1574750393629074,
      "rewards/verify_math_reward/mean": 0.6830357313156128,
      "rewards/verify_math_reward/std": 0.46555325388908386,
      "step": 589
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.060267857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1970.0,
      "completions/mean_length": 777.568115234375,
      "completions/mean_terminated_length": 564.7470703125,
      "completions/min_length": 139.0,
      "completions/min_terminated_length": 139.0,
      "epoch": 5.513119533527696,
      "grad_norm": 0.15470553934574127,
      "learning_rate": 1e-06,
      "loss": -0.0376,
      "num_tokens": 352815631.0,
      "reward": 0.6540178656578064,
      "reward_std": 0.16101224720478058,
      "rewards/verify_math_reward/mean": 0.6540178656578064,
      "rewards/verify_math_reward/std": 0.4759531021118164,
      "step": 590
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1071428571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3057.0,
      "completions/mean_length": 985.7645263671875,
      "completions/mean_terminated_length": 612.5362548828125,
      "completions/min_length": 151.0,
      "completions/min_terminated_length": 151.0,
      "epoch": 5.522448979591837,
      "grad_norm": 0.15244746208190918,
      "learning_rate": 1e-06,
      "loss": -0.0393,
      "num_tokens": 353401740.0,
      "reward": 0.6183035969734192,
      "reward_std": 0.17235924303531647,
      "rewards/verify_math_reward/mean": 0.6183035969734192,
      "rewards/verify_math_reward/std": 0.4860740303993225,
      "step": 591
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0669642857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2796.0,
      "completions/mean_length": 849.8058471679688,
      "completions/mean_terminated_length": 616.8253173828125,
      "completions/min_length": 143.0,
      "completions/min_terminated_length": 143.0,
      "epoch": 5.531778425655976,
      "grad_norm": 0.13224495947360992,
      "learning_rate": 1e-06,
      "loss": -0.044,
      "num_tokens": 354018638.0,
      "reward": 0.652901828289032,
      "reward_std": 0.16323533654212952,
      "rewards/verify_math_reward/mean": 0.6529017686843872,
      "rewards/verify_math_reward/std": 0.47631317377090454,
      "step": 592
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.09375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3486.0,
      "completions/mean_length": 911.5011596679688,
      "completions/mean_terminated_length": 582.0701904296875,
      "completions/min_length": 138.0,
      "completions/min_terminated_length": 138.0,
      "epoch": 5.541107871720117,
      "grad_norm": 0.14421240985393524,
      "learning_rate": 1e-06,
      "loss": -0.0531,
      "num_tokens": 354587599.0,
      "reward": 0.6696428656578064,
      "reward_std": 0.1471051275730133,
      "rewards/verify_math_reward/mean": 0.6696428656578064,
      "rewards/verify_math_reward/std": 0.47060438990592957,
      "step": 593
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1216517857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3826.0,
      "completions/mean_length": 1075.540283203125,
      "completions/mean_terminated_length": 657.2045288085938,
      "completions/min_length": 145.0,
      "completions/min_terminated_length": 145.0,
      "epoch": 5.550437317784256,
      "grad_norm": 0.16855870187282562,
      "learning_rate": 1e-06,
      "loss": -0.083,
      "num_tokens": 355189395.0,
      "reward": 0.6049107313156128,
      "reward_std": 0.20005299150943756,
      "rewards/verify_math_reward/mean": 0.6049107313156128,
      "rewards/verify_math_reward/std": 0.48914289474487305,
      "step": 594
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1049107142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3282.0,
      "completions/mean_length": 1002.5625610351562,
      "completions/mean_terminated_length": 639.9900512695312,
      "completions/min_length": 115.0,
      "completions/min_terminated_length": 115.0,
      "epoch": 5.559766763848397,
      "grad_norm": 0.17673636972904205,
      "learning_rate": 1e-06,
      "loss": -0.0474,
      "num_tokens": 355809707.0,
      "reward": 0.5602678656578064,
      "reward_std": 0.18821631371974945,
      "rewards/verify_math_reward/mean": 0.5602678656578064,
      "rewards/verify_math_reward/std": 0.4966317415237427,
      "step": 595
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1049107142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2931.0,
      "completions/mean_length": 989.51123046875,
      "completions/mean_terminated_length": 625.4089965820312,
      "completions/min_length": 170.0,
      "completions/min_terminated_length": 170.0,
      "epoch": 5.569096209912536,
      "grad_norm": 0.1684071272611618,
      "learning_rate": 1e-06,
      "loss": -0.0339,
      "num_tokens": 356387085.0,
      "reward": 0.6238839626312256,
      "reward_std": 0.1985434591770172,
      "rewards/verify_math_reward/mean": 0.6238839030265808,
      "rewards/verify_math_reward/std": 0.4846802353858948,
      "step": 596
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0714285714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2428.0,
      "completions/mean_length": 852.7142944335938,
      "completions/mean_terminated_length": 603.2307739257812,
      "completions/min_length": 156.0,
      "completions/min_terminated_length": 156.0,
      "epoch": 5.578425655976677,
      "grad_norm": 0.14479760825634003,
      "learning_rate": 1e-06,
      "loss": -0.0301,
      "num_tokens": 356966557.0,
      "reward": 0.6473214626312256,
      "reward_std": 0.1407930701971054,
      "rewards/verify_math_reward/mean": 0.6473214030265808,
      "rewards/verify_math_reward/std": 0.47807061672210693,
      "step": 597
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0881696428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2759.0,
      "completions/mean_length": 927.8638916015625,
      "completions/mean_terminated_length": 621.5202026367188,
      "completions/min_length": 130.0,
      "completions/min_terminated_length": 130.0,
      "epoch": 5.587755102040816,
      "grad_norm": 0.15837636590003967,
      "learning_rate": 1e-06,
      "loss": -0.0301,
      "num_tokens": 357558851.0,
      "reward": 0.598214328289032,
      "reward_std": 0.17728371918201447,
      "rewards/verify_math_reward/mean": 0.5982142686843872,
      "rewards/verify_math_reward/std": 0.49053290486335754,
      "step": 598
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1116071428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3263.0,
      "completions/mean_length": 997.7422485351562,
      "completions/mean_terminated_length": 608.5137939453125,
      "completions/min_length": 167.0,
      "completions/min_terminated_length": 167.0,
      "epoch": 5.597084548104956,
      "grad_norm": 0.1677587926387787,
      "learning_rate": 1e-06,
      "loss": -0.0465,
      "num_tokens": 358134820.0,
      "reward": 0.5870535969734192,
      "reward_std": 0.18231727182865143,
      "rewards/verify_math_reward/mean": 0.5870535969734192,
      "rewards/verify_math_reward/std": 0.49263834953308105,
      "step": 599
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1149553571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2665.0,
      "completions/mean_length": 1024.2623291015625,
      "completions/mean_terminated_length": 625.2849731445312,
      "completions/min_length": 162.0,
      "completions/min_terminated_length": 162.0,
      "epoch": 5.606413994169096,
      "grad_norm": 0.15459483861923218,
      "learning_rate": 1e-06,
      "loss": -0.0294,
      "num_tokens": 358717983.0,
      "reward": 0.6049107313156128,
      "reward_std": 0.16773755848407745,
      "rewards/verify_math_reward/mean": 0.6049107313156128,
      "rewards/verify_math_reward/std": 0.48914292454719543,
      "step": 600
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.052455357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2661.0,
      "completions/mean_length": 812.6585083007812,
      "completions/mean_terminated_length": 630.8952026367188,
      "completions/min_length": 157.0,
      "completions/min_terminated_length": 157.0,
      "epoch": 5.615743440233236,
      "grad_norm": 0.1608906388282776,
      "learning_rate": 1e-06,
      "loss": -0.0203,
      "num_tokens": 359342053.0,
      "reward": 0.6551339626312256,
      "reward_std": 0.19813409447669983,
      "rewards/verify_math_reward/mean": 0.6551339030265808,
      "rewards/verify_math_reward/std": 0.4755900502204895,
      "step": 601
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0636160714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3789.0,
      "completions/mean_length": 817.8248291015625,
      "completions/mean_terminated_length": 595.112060546875,
      "completions/min_length": 154.0,
      "completions/min_terminated_length": 154.0,
      "epoch": 5.625072886297376,
      "grad_norm": 0.13706611096858978,
      "learning_rate": 1e-06,
      "loss": -0.0247,
      "num_tokens": 359920952.0,
      "reward": 0.6752232313156128,
      "reward_std": 0.16265869140625,
      "rewards/verify_math_reward/mean": 0.6752232313156128,
      "rewards/verify_math_reward/std": 0.46855294704437256,
      "step": 602
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0803571428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3890.0,
      "completions/mean_length": 894.86279296875,
      "completions/mean_terminated_length": 615.1517333984375,
      "completions/min_length": 144.0,
      "completions/min_terminated_length": 144.0,
      "epoch": 5.634402332361516,
      "grad_norm": 0.16253483295440674,
      "learning_rate": 1e-06,
      "loss": -0.0182,
      "num_tokens": 360522613.0,
      "reward": 0.6830357313156128,
      "reward_std": 0.18742607533931732,
      "rewards/verify_math_reward/mean": 0.6830357313156128,
      "rewards/verify_math_reward/std": 0.46555325388908386,
      "step": 603
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0558035714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2333.0,
      "completions/mean_length": 820.6361694335938,
      "completions/mean_terminated_length": 627.0567626953125,
      "completions/min_length": 139.0,
      "completions/min_terminated_length": 139.0,
      "epoch": 5.643731778425656,
      "grad_norm": 0.14758019149303436,
      "learning_rate": 1e-06,
      "loss": -0.0292,
      "num_tokens": 361139543.0,
      "reward": 0.6551339626312256,
      "reward_std": 0.1923847645521164,
      "rewards/verify_math_reward/mean": 0.6551339030265808,
      "rewards/verify_math_reward/std": 0.4755900502204895,
      "step": 604
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0837053571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3689.0,
      "completions/mean_length": 901.40185546875,
      "completions/mean_terminated_length": 609.56884765625,
      "completions/min_length": 154.0,
      "completions/min_terminated_length": 154.0,
      "epoch": 5.653061224489796,
      "grad_norm": 0.13753700256347656,
      "learning_rate": 1e-06,
      "loss": -0.0462,
      "num_tokens": 361729487.0,
      "reward": 0.6506696939468384,
      "reward_std": 0.17171913385391235,
      "rewards/verify_math_reward/mean": 0.6506696343421936,
      "rewards/verify_math_reward/std": 0.47702476382255554,
      "step": 605
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0837053571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3492.0,
      "completions/mean_length": 891.83154296875,
      "completions/mean_terminated_length": 599.124267578125,
      "completions/min_length": 141.0,
      "completions/min_terminated_length": 141.0,
      "epoch": 5.662390670553936,
      "grad_norm": 0.17821872234344482,
      "learning_rate": 1e-06,
      "loss": -0.032,
      "num_tokens": 362308552.0,
      "reward": 0.660714328289032,
      "reward_std": 0.21707123517990112,
      "rewards/verify_math_reward/mean": 0.6607142686843872,
      "rewards/verify_math_reward/std": 0.4737313687801361,
      "step": 606
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0859375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2894.0,
      "completions/mean_length": 888.2053833007812,
      "completions/mean_terminated_length": 586.6178588867188,
      "completions/min_length": 137.0,
      "completions/min_terminated_length": 137.0,
      "epoch": 5.671720116618076,
      "grad_norm": 0.14055386185646057,
      "learning_rate": 1e-06,
      "loss": -0.039,
      "num_tokens": 362875784.0,
      "reward": 0.6930803656578064,
      "reward_std": 0.14669284224510193,
      "rewards/verify_math_reward/mean": 0.6930803656578064,
      "rewards/verify_math_reward/std": 0.46147334575653076,
      "step": 607
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0535714285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2822.0,
      "completions/mean_length": 765.9252319335938,
      "completions/mean_terminated_length": 577.430419921875,
      "completions/min_length": 130.0,
      "completions/min_terminated_length": 130.0,
      "epoch": 5.681049562682215,
      "grad_norm": 0.11932545900344849,
      "learning_rate": 1e-06,
      "loss": -0.0173,
      "num_tokens": 363459509.0,
      "reward": 0.6897321939468384,
      "reward_std": 0.11712367087602615,
      "rewards/verify_math_reward/mean": 0.6897321343421936,
      "rewards/verify_math_reward/std": 0.4628615975379944,
      "step": 608
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0859375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3767.0,
      "completions/mean_length": 926.708740234375,
      "completions/mean_terminated_length": 628.7411499023438,
      "completions/min_length": 93.0,
      "completions/min_terminated_length": 93.0,
      "epoch": 5.690379008746356,
      "grad_norm": 0.15615519881248474,
      "learning_rate": 1e-06,
      "loss": -0.0428,
      "num_tokens": 364063832.0,
      "reward": 0.5915178656578064,
      "reward_std": 0.18648220598697662,
      "rewards/verify_math_reward/mean": 0.5915178656578064,
      "rewards/verify_math_reward/std": 0.49182769656181335,
      "step": 609
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.060267857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3095.0,
      "completions/mean_length": 774.9642944335938,
      "completions/mean_terminated_length": 561.9762573242188,
      "completions/min_length": 126.0,
      "completions/min_terminated_length": 126.0,
      "epoch": 5.699708454810495,
      "grad_norm": 0.1408005803823471,
      "learning_rate": 1e-06,
      "loss": -0.0312,
      "num_tokens": 364628504.0,
      "reward": 0.6819196939468384,
      "reward_std": 0.149286150932312,
      "rewards/verify_math_reward/mean": 0.6819196343421936,
      "rewards/verify_math_reward/std": 0.46599099040031433,
      "step": 610
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0680803571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3978.0,
      "completions/mean_length": 851.0123291015625,
      "completions/mean_terminated_length": 613.9533081054688,
      "completions/min_length": 119.0,
      "completions/min_terminated_length": 119.0,
      "epoch": 5.709037900874636,
      "grad_norm": 0.13900849223136902,
      "learning_rate": 1e-06,
      "loss": -0.0322,
      "num_tokens": 365236555.0,
      "reward": 0.6361607313156128,
      "reward_std": 0.16037030518054962,
      "rewards/verify_math_reward/mean": 0.6361607313156128,
      "rewards/verify_math_reward/std": 0.4813718795776367,
      "step": 611
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1127232142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3284.0,
      "completions/mean_length": 1006.2589721679688,
      "completions/mean_terminated_length": 613.7257690429688,
      "completions/min_length": 170.0,
      "completions/min_terminated_length": 170.0,
      "epoch": 5.718367346938775,
      "grad_norm": 0.16850095987319946,
      "learning_rate": 1e-06,
      "loss": -0.0764,
      "num_tokens": 365824115.0,
      "reward": 0.6506696939468384,
      "reward_std": 0.18874019384384155,
      "rewards/verify_math_reward/mean": 0.6506696343421936,
      "rewards/verify_math_reward/std": 0.47702476382255554,
      "step": 612
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0870535714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3931.0,
      "completions/mean_length": 891.1272583007812,
      "completions/mean_terminated_length": 585.5281372070312,
      "completions/min_length": 144.0,
      "completions/min_terminated_length": 144.0,
      "epoch": 5.727696793002916,
      "grad_norm": 0.13965724408626556,
      "learning_rate": 1e-06,
      "loss": -0.0406,
      "num_tokens": 366392733.0,
      "reward": 0.6428571939468384,
      "reward_std": 0.1321905255317688,
      "rewards/verify_math_reward/mean": 0.6428571343421936,
      "rewards/verify_math_reward/std": 0.47942501306533813,
      "step": 613
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0658482142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2521.0,
      "completions/mean_length": 823.0614013671875,
      "completions/mean_terminated_length": 592.3524169921875,
      "completions/min_length": 141.0,
      "completions/min_terminated_length": 141.0,
      "epoch": 5.737026239067055,
      "grad_norm": 0.14950597286224365,
      "learning_rate": 1e-06,
      "loss": -0.0333,
      "num_tokens": 366982268.0,
      "reward": 0.6484375,
      "reward_std": 0.16465751826763153,
      "rewards/verify_math_reward/mean": 0.6484375,
      "rewards/verify_math_reward/std": 0.4777248501777649,
      "step": 614
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0647321428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2915.0,
      "completions/mean_length": 821.654052734375,
      "completions/mean_terminated_length": 595.0286865234375,
      "completions/min_length": 147.0,
      "completions/min_terminated_length": 147.0,
      "epoch": 5.746355685131196,
      "grad_norm": 0.14539076387882233,
      "learning_rate": 1e-06,
      "loss": -0.0274,
      "num_tokens": 367569398.0,
      "reward": 0.6082589626312256,
      "reward_std": 0.17461413145065308,
      "rewards/verify_math_reward/mean": 0.6082589030265808,
      "rewards/verify_math_reward/std": 0.4884119927883148,
      "step": 615
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0770089285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3374.0,
      "completions/mean_length": 858.5859985351562,
      "completions/mean_terminated_length": 588.4752197265625,
      "completions/min_length": 136.0,
      "completions/min_terminated_length": 136.0,
      "epoch": 5.755685131195335,
      "grad_norm": 0.13074429333209991,
      "learning_rate": 1e-06,
      "loss": -0.0385,
      "num_tokens": 368148899.0,
      "reward": 0.6707589626312256,
      "reward_std": 0.1418115496635437,
      "rewards/verify_math_reward/mean": 0.6707589030265808,
      "rewards/verify_math_reward/std": 0.4702001214027405,
      "step": 616
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0647321428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2440.0,
      "completions/mean_length": 815.708740234375,
      "completions/mean_terminated_length": 588.671875,
      "completions/min_length": 112.0,
      "completions/min_terminated_length": 112.0,
      "epoch": 5.765014577259475,
      "grad_norm": 0.16770143806934357,
      "learning_rate": 1e-06,
      "loss": -0.0411,
      "num_tokens": 368728542.0,
      "reward": 0.6573660969734192,
      "reward_std": 0.1802103966474533,
      "rewards/verify_math_reward/mean": 0.6573660969734192,
      "rewards/verify_math_reward/std": 0.47485533356666565,
      "step": 617
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0982142857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2782.0,
      "completions/mean_length": 987.958740234375,
      "completions/mean_terminated_length": 649.4591674804688,
      "completions/min_length": 162.0,
      "completions/min_terminated_length": 162.0,
      "epoch": 5.774344023323615,
      "grad_norm": 0.13639651238918304,
      "learning_rate": 1e-06,
      "loss": -0.0538,
      "num_tokens": 369337593.0,
      "reward": 0.6674107313156128,
      "reward_std": 0.1692018061876297,
      "rewards/verify_math_reward/mean": 0.6674107313156128,
      "rewards/verify_math_reward/std": 0.47140392661094666,
      "step": 618
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0948660714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4059.0,
      "completions/mean_length": 927.35498046875,
      "completions/mean_terminated_length": 595.2527465820312,
      "completions/min_length": 159.0,
      "completions/min_terminated_length": 159.0,
      "epoch": 5.783673469387755,
      "grad_norm": 0.13305982947349548,
      "learning_rate": 1e-06,
      "loss": -0.0333,
      "num_tokens": 369909847.0,
      "reward": 0.640625,
      "reward_std": 0.12426057457923889,
      "rewards/verify_math_reward/mean": 0.640625,
      "rewards/verify_math_reward/std": 0.48008525371551514,
      "step": 619
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0770089285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3511.0,
      "completions/mean_length": 833.2511596679688,
      "completions/mean_terminated_length": 561.026611328125,
      "completions/min_length": 161.0,
      "completions/min_terminated_length": 161.0,
      "epoch": 5.793002915451895,
      "grad_norm": 0.14414291083812714,
      "learning_rate": 1e-06,
      "loss": -0.0495,
      "num_tokens": 370470696.0,
      "reward": 0.6584821939468384,
      "reward_std": 0.15962466597557068,
      "rewards/verify_math_reward/mean": 0.6584821343421936,
      "rewards/verify_math_reward/std": 0.4744836091995239,
      "step": 620
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0825892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2733.0,
      "completions/mean_length": 896.3136596679688,
      "completions/mean_terminated_length": 608.2639770507812,
      "completions/min_length": 152.0,
      "completions/min_terminated_length": 152.0,
      "epoch": 5.802332361516035,
      "grad_norm": 0.1310935616493225,
      "learning_rate": 1e-06,
      "loss": -0.0325,
      "num_tokens": 371057769.0,
      "reward": 0.6350446939468384,
      "reward_std": 0.14684367179870605,
      "rewards/verify_math_reward/mean": 0.6350446343421936,
      "rewards/verify_math_reward/std": 0.481686532497406,
      "step": 621
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0736607142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3498.0,
      "completions/mean_length": 882.8281860351562,
      "completions/mean_terminated_length": 627.3228759765625,
      "completions/min_length": 145.0,
      "completions/min_terminated_length": 145.0,
      "epoch": 5.811661807580175,
      "grad_norm": 0.13719427585601807,
      "learning_rate": 1e-06,
      "loss": -0.0353,
      "num_tokens": 371662655.0,
      "reward": 0.668526828289032,
      "reward_std": 0.16405907273292542,
      "rewards/verify_math_reward/mean": 0.6685267686843872,
      "rewards/verify_math_reward/std": 0.4710056483745575,
      "step": 622
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1004464285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3710.0,
      "completions/mean_length": 1014.232177734375,
      "completions/mean_terminated_length": 670.1141357421875,
      "completions/min_length": 124.0,
      "completions/min_terminated_length": 124.0,
      "epoch": 5.820991253644315,
      "grad_norm": 0.14407393336296082,
      "learning_rate": 1e-06,
      "loss": -0.037,
      "num_tokens": 372303167.0,
      "reward": 0.6227678656578064,
      "reward_std": 0.1522217094898224,
      "rewards/verify_math_reward/mean": 0.6227678656578064,
      "rewards/verify_math_reward/std": 0.4849644899368286,
      "step": 623
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0837053571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3407.0,
      "completions/mean_length": 933.95654296875,
      "completions/mean_terminated_length": 645.0974731445312,
      "completions/min_length": 145.0,
      "completions/min_terminated_length": 145.0,
      "epoch": 5.830320699708455,
      "grad_norm": 0.14550761878490448,
      "learning_rate": 1e-06,
      "loss": -0.0548,
      "num_tokens": 372934568.0,
      "reward": 0.609375,
      "reward_std": 0.16999204456806183,
      "rewards/verify_math_reward/mean": 0.609375,
      "rewards/verify_math_reward/std": 0.48816296458244324,
      "step": 624
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0736607142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3052.0,
      "completions/mean_length": 844.0904541015625,
      "completions/mean_terminated_length": 585.5048217773438,
      "completions/min_length": 143.0,
      "completions/min_terminated_length": 143.0,
      "epoch": 5.839650145772595,
      "grad_norm": 0.14165332913398743,
      "learning_rate": 1e-06,
      "loss": -0.0429,
      "num_tokens": 373500169.0,
      "reward": 0.7087053656578064,
      "reward_std": 0.1527032107114792,
      "rewards/verify_math_reward/mean": 0.7087053656578064,
      "rewards/verify_math_reward/std": 0.45461273193359375,
      "step": 625
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0479910714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3868.0,
      "completions/mean_length": 763.7120971679688,
      "completions/mean_terminated_length": 595.7303466796875,
      "completions/min_length": 164.0,
      "completions/min_terminated_length": 164.0,
      "epoch": 5.848979591836734,
      "grad_norm": 0.12370767444372177,
      "learning_rate": 1e-06,
      "loss": -0.0117,
      "num_tokens": 374088303.0,
      "reward": 0.6640625,
      "reward_std": 0.11896559596061707,
      "rewards/verify_math_reward/mean": 0.6640625,
      "rewards/verify_math_reward/std": 0.4725809693336487,
      "step": 626
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0691964285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3027.0,
      "completions/mean_length": 820.0826416015625,
      "completions/mean_terminated_length": 576.5491943359375,
      "completions/min_length": 123.0,
      "completions/min_terminated_length": 123.0,
      "epoch": 5.858309037900875,
      "grad_norm": 0.17080122232437134,
      "learning_rate": 1e-06,
      "loss": -0.0177,
      "num_tokens": 374664105.0,
      "reward": 0.6741071939468384,
      "reward_std": 0.1446651816368103,
      "rewards/verify_math_reward/mean": 0.6741071343421936,
      "rewards/verify_math_reward/std": 0.4689692556858063,
      "step": 627
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0915178571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3773.0,
      "completions/mean_length": 922.950927734375,
      "completions/mean_terminated_length": 603.30712890625,
      "completions/min_length": 155.0,
      "completions/min_terminated_length": 155.0,
      "epoch": 5.867638483965014,
      "grad_norm": 0.16178250312805176,
      "learning_rate": 1e-06,
      "loss": -0.0351,
      "num_tokens": 375235501.0,
      "reward": 0.6506696939468384,
      "reward_std": 0.16724829375743866,
      "rewards/verify_math_reward/mean": 0.6506696343421936,
      "rewards/verify_math_reward/std": 0.47702476382255554,
      "step": 628
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0803571428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4056.0,
      "completions/mean_length": 899.0167846679688,
      "completions/mean_terminated_length": 619.668701171875,
      "completions/min_length": 153.0,
      "completions/min_terminated_length": 153.0,
      "epoch": 5.876967930029155,
      "grad_norm": 0.14244891703128815,
      "learning_rate": 1e-06,
      "loss": -0.0533,
      "num_tokens": 375844228.0,
      "reward": 0.629464328289032,
      "reward_std": 0.1816418617963791,
      "rewards/verify_math_reward/mean": 0.6294642686843872,
      "rewards/verify_math_reward/std": 0.4832179844379425,
      "step": 629
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0870535714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3564.0,
      "completions/mean_length": 906.47998046875,
      "completions/mean_terminated_length": 602.3447875976562,
      "completions/min_length": 133.0,
      "completions/min_terminated_length": 133.0,
      "epoch": 5.886297376093294,
      "grad_norm": 0.14157941937446594,
      "learning_rate": 1e-06,
      "loss": -0.0483,
      "num_tokens": 376428978.0,
      "reward": 0.6584821939468384,
      "reward_std": 0.165178582072258,
      "rewards/verify_math_reward/mean": 0.6584821343421936,
      "rewards/verify_math_reward/std": 0.4744836091995239,
      "step": 630
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0658482142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2815.0,
      "completions/mean_length": 818.3705444335938,
      "completions/mean_terminated_length": 587.3309326171875,
      "completions/min_length": 156.0,
      "completions/min_terminated_length": 156.0,
      "epoch": 5.895626822157435,
      "grad_norm": 0.14939922094345093,
      "learning_rate": 1e-06,
      "loss": -0.0346,
      "num_tokens": 377008262.0,
      "reward": 0.637276828289032,
      "reward_std": 0.15132339298725128,
      "rewards/verify_math_reward/mean": 0.6372767686843872,
      "rewards/verify_math_reward/std": 0.481054425239563,
      "step": 631
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0825892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3528.0,
      "completions/mean_length": 937.2980346679688,
      "completions/mean_terminated_length": 652.9379272460938,
      "completions/min_length": 164.0,
      "completions/min_terminated_length": 164.0,
      "epoch": 5.904956268221574,
      "grad_norm": 0.14546416699886322,
      "learning_rate": 1e-06,
      "loss": -0.0222,
      "num_tokens": 377641929.0,
      "reward": 0.6037946939468384,
      "reward_std": 0.17341090738773346,
      "rewards/verify_math_reward/mean": 0.6037946343421936,
      "rewards/verify_math_reward/std": 0.48938122391700745,
      "step": 632
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.052455357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2029.0,
      "completions/mean_length": 757.5145263671875,
      "completions/mean_terminated_length": 572.698486328125,
      "completions/min_length": 176.0,
      "completions/min_terminated_length": 176.0,
      "epoch": 5.914285714285715,
      "grad_norm": 0.14108310639858246,
      "learning_rate": 1e-06,
      "loss": -0.0389,
      "num_tokens": 378209158.0,
      "reward": 0.6674107313156128,
      "reward_std": 0.13973930478096008,
      "rewards/verify_math_reward/mean": 0.6674107313156128,
      "rewards/verify_math_reward/std": 0.47140392661094666,
      "step": 633
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2733.0,
      "completions/mean_length": 801.0960083007812,
      "completions/mean_terminated_length": 581.4357299804688,
      "completions/min_length": 140.0,
      "completions/min_terminated_length": 140.0,
      "epoch": 5.923615160349854,
      "grad_norm": 0.17102012038230896,
      "learning_rate": 1e-06,
      "loss": -0.0148,
      "num_tokens": 378789804.0,
      "reward": 0.6640625,
      "reward_std": 0.18152238428592682,
      "rewards/verify_math_reward/mean": 0.6640625,
      "rewards/verify_math_reward/std": 0.4725809693336487,
      "step": 634
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1049107142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3288.0,
      "completions/mean_length": 983.6239013671875,
      "completions/mean_terminated_length": 618.8316650390625,
      "completions/min_length": 156.0,
      "completions/min_terminated_length": 156.0,
      "epoch": 5.932944606413994,
      "grad_norm": 0.16276925802230835,
      "learning_rate": 1e-06,
      "loss": -0.0524,
      "num_tokens": 379383507.0,
      "reward": 0.6305803656578064,
      "reward_std": 0.19148434698581696,
      "rewards/verify_math_reward/mean": 0.6305803656578064,
      "rewards/verify_math_reward/std": 0.4829172194004059,
      "step": 635
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0848214285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2827.0,
      "completions/mean_length": 908.6529541015625,
      "completions/mean_terminated_length": 613.240234375,
      "completions/min_length": 148.0,
      "completions/min_terminated_length": 148.0,
      "epoch": 5.942274052478134,
      "grad_norm": 0.14920274913311005,
      "learning_rate": 1e-06,
      "loss": -0.0109,
      "num_tokens": 379973484.0,
      "reward": 0.6417410969734192,
      "reward_std": 0.14635765552520752,
      "rewards/verify_math_reward/mean": 0.6417410969734192,
      "rewards/verify_math_reward/std": 0.47975653409957886,
      "step": 636
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0837053571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3118.0,
      "completions/mean_length": 908.0469360351562,
      "completions/mean_terminated_length": 616.8209838867188,
      "completions/min_length": 94.0,
      "completions/min_terminated_length": 94.0,
      "epoch": 5.9516034985422746,
      "grad_norm": 0.1606222540140152,
      "learning_rate": 1e-06,
      "loss": -0.0182,
      "num_tokens": 380575622.0,
      "reward": 0.598214328289032,
      "reward_std": 0.16540497541427612,
      "rewards/verify_math_reward/mean": 0.5982142686843872,
      "rewards/verify_math_reward/std": 0.49053290486335754,
      "step": 637
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0915178571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3884.0,
      "completions/mean_length": 978.2891235351562,
      "completions/mean_terminated_length": 664.2199096679688,
      "completions/min_length": 150.0,
      "completions/min_terminated_length": 150.0,
      "epoch": 5.960932944606414,
      "grad_norm": 0.13233332335948944,
      "learning_rate": 1e-06,
      "loss": -0.0298,
      "num_tokens": 381212033.0,
      "reward": 0.6183035969734192,
      "reward_std": 0.15060026943683624,
      "rewards/verify_math_reward/mean": 0.6183035969734192,
      "rewards/verify_math_reward/std": 0.4860740303993225,
      "step": 638
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0647321428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3942.0,
      "completions/mean_length": 805.2656860351562,
      "completions/mean_terminated_length": 577.5059814453125,
      "completions/min_length": 146.0,
      "completions/min_terminated_length": 146.0,
      "epoch": 5.970262390670554,
      "grad_norm": 0.141335129737854,
      "learning_rate": 1e-06,
      "loss": -0.035,
      "num_tokens": 381787199.0,
      "reward": 0.6339285969734192,
      "reward_std": 0.16029614210128784,
      "rewards/verify_math_reward/mean": 0.6339285969734192,
      "rewards/verify_math_reward/std": 0.48199835419654846,
      "step": 639
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0758928571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2849.0,
      "completions/mean_length": 804.2098388671875,
      "completions/mean_terminated_length": 533.8695678710938,
      "completions/min_length": 155.0,
      "completions/min_terminated_length": 155.0,
      "epoch": 5.979591836734694,
      "grad_norm": 0.14169426262378693,
      "learning_rate": 1e-06,
      "loss": -0.0223,
      "num_tokens": 382309483.0,
      "reward": 0.65625,
      "reward_std": 0.12125539779663086,
      "rewards/verify_math_reward/mean": 0.65625,
      "rewards/verify_math_reward/std": 0.4752241373062134,
      "step": 640
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1383928571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3972.0,
      "completions/mean_length": 1099.232177734375,
      "completions/mean_terminated_length": 617.885986328125,
      "completions/min_length": 165.0,
      "completions/min_terminated_length": 165.0,
      "epoch": 5.988921282798834,
      "grad_norm": 0.16632375121116638,
      "learning_rate": 1e-06,
      "loss": -0.0578,
      "num_tokens": 382886459.0,
      "reward": 0.5714285969734192,
      "reward_std": 0.18400652706623077,
      "rewards/verify_math_reward/mean": 0.5714285969734192,
      "rewards/verify_math_reward/std": 0.49514806270599365,
      "step": 641
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.05965909090909094,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2588.0,
      "completions/mean_length": 834.4375,
      "completions/mean_terminated_length": 627.5105590820312,
      "completions/min_length": 159.0,
      "completions/min_terminated_length": 159.0,
      "epoch": 5.998250728862974,
      "grad_norm": 0.14725947380065918,
      "learning_rate": 1e-06,
      "loss": -0.0428,
      "num_tokens": 383457042.0,
      "reward": 0.6629464626312256,
      "reward_std": 0.14534805715084076,
      "rewards/verify_math_reward/mean": 0.6629464030265808,
      "rewards/verify_math_reward/std": 0.47296738624572754,
      "step": 642
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0736607142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3828.0,
      "completions/mean_length": 875.0067138671875,
      "completions/mean_terminated_length": 618.8795166015625,
      "completions/min_length": 159.0,
      "completions/min_terminated_length": 159.0,
      "epoch": 6.0093294460641395,
      "grad_norm": 0.15770813822746277,
      "learning_rate": 1e-06,
      "loss": -0.0384,
      "num_tokens": 384075656.0,
      "reward": 0.6104910969734192,
      "reward_std": 0.16390934586524963,
      "rewards/verify_math_reward/mean": 0.6104910969734192,
      "rewards/verify_math_reward/std": 0.48791128396987915,
      "step": 643
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2629.0,
      "completions/mean_length": 771.4564819335938,
      "completions/mean_terminated_length": 549.8202514648438,
      "completions/min_length": 134.0,
      "completions/min_terminated_length": 134.0,
      "epoch": 6.01865889212828,
      "grad_norm": 0.16204263269901276,
      "learning_rate": 1e-06,
      "loss": -0.0345,
      "num_tokens": 384615121.0,
      "reward": 0.6774553656578064,
      "reward_std": 0.17325752973556519,
      "rewards/verify_math_reward/mean": 0.6774553656578064,
      "rewards/verify_math_reward/std": 0.4677111804485321,
      "step": 644
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0814732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3206.0,
      "completions/mean_length": 888.7801513671875,
      "completions/mean_terminated_length": 604.3001708984375,
      "completions/min_length": 159.0,
      "completions/min_terminated_length": 159.0,
      "epoch": 6.0279883381924195,
      "grad_norm": 0.18138130009174347,
      "learning_rate": 1e-06,
      "loss": -0.0451,
      "num_tokens": 385203988.0,
      "reward": 0.6852678656578064,
      "reward_std": 0.20534615218639374,
      "rewards/verify_math_reward/mean": 0.6852678656578064,
      "rewards/verify_math_reward/std": 0.46466848254203796,
      "step": 645
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0881696428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3978.0,
      "completions/mean_length": 933.7120971679688,
      "completions/mean_terminated_length": 627.9338989257812,
      "completions/min_length": 145.0,
      "completions/min_terminated_length": 145.0,
      "epoch": 6.03731778425656,
      "grad_norm": 0.15825672447681427,
      "learning_rate": 1e-06,
      "loss": -0.0275,
      "num_tokens": 385817842.0,
      "reward": 0.5725446939468384,
      "reward_std": 0.15420952439308167,
      "rewards/verify_math_reward/mean": 0.5725446343421936,
      "rewards/verify_math_reward/std": 0.49498558044433594,
      "step": 646
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0747767857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3903.0,
      "completions/mean_length": 952.71435546875,
      "completions/mean_terminated_length": 698.673095703125,
      "completions/min_length": 127.0,
      "completions/min_terminated_length": 127.0,
      "epoch": 6.0466472303206995,
      "grad_norm": 0.139588862657547,
      "learning_rate": 1e-06,
      "loss": -0.0544,
      "num_tokens": 386481378.0,
      "reward": 0.6540178656578064,
      "reward_std": 0.1636785864830017,
      "rewards/verify_math_reward/mean": 0.6540178656578064,
      "rewards/verify_math_reward/std": 0.4759531021118164,
      "step": 647
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0948660714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3614.0,
      "completions/mean_length": 919.12841796875,
      "completions/mean_terminated_length": 586.1640014648438,
      "completions/min_length": 153.0,
      "completions/min_terminated_length": 153.0,
      "epoch": 6.05597667638484,
      "grad_norm": 0.14342287182807922,
      "learning_rate": 1e-06,
      "loss": -0.0257,
      "num_tokens": 387044293.0,
      "reward": 0.6696428656578064,
      "reward_std": 0.15349416434764862,
      "rewards/verify_math_reward/mean": 0.6696428656578064,
      "rewards/verify_math_reward/std": 0.47060438990592957,
      "step": 648
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0959821428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3892.0,
      "completions/mean_length": 959.8035888671875,
      "completions/mean_terminated_length": 626.82470703125,
      "completions/min_length": 161.0,
      "completions/min_terminated_length": 161.0,
      "epoch": 6.0653061224489795,
      "grad_norm": 0.14265646040439606,
      "learning_rate": 1e-06,
      "loss": -0.0264,
      "num_tokens": 387639741.0,
      "reward": 0.6462053656578064,
      "reward_std": 0.14004167914390564,
      "rewards/verify_math_reward/mean": 0.6462053656578064,
      "rewards/verify_math_reward/std": 0.478413462638855,
      "step": 649
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.078125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3879.0,
      "completions/mean_length": 851.2813110351562,
      "completions/mean_terminated_length": 576.3051147460938,
      "completions/min_length": 119.0,
      "completions/min_terminated_length": 119.0,
      "epoch": 6.07463556851312,
      "grad_norm": 0.14192552864551544,
      "learning_rate": 1e-06,
      "loss": -0.0435,
      "num_tokens": 388207105.0,
      "reward": 0.6573660969734192,
      "reward_std": 0.1554897278547287,
      "rewards/verify_math_reward/mean": 0.6573660969734192,
      "rewards/verify_math_reward/std": 0.47485536336898804,
      "step": 650
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.056919642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2246.0,
      "completions/mean_length": 759.1049194335938,
      "completions/mean_terminated_length": 557.7064819335938,
      "completions/min_length": 128.0,
      "completions/min_terminated_length": 128.0,
      "epoch": 6.0839650145772595,
      "grad_norm": 0.15565773844718933,
      "learning_rate": 1e-06,
      "loss": -0.0365,
      "num_tokens": 388769359.0,
      "reward": 0.6651785969734192,
      "reward_std": 0.15338537096977234,
      "rewards/verify_math_reward/mean": 0.6651785969734192,
      "rewards/verify_math_reward/std": 0.47219157218933105,
      "step": 651
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0770089285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2537.0,
      "completions/mean_length": 843.5045166015625,
      "completions/mean_terminated_length": 572.1354370117188,
      "completions/min_length": 145.0,
      "completions/min_terminated_length": 145.0,
      "epoch": 6.093294460641399,
      "grad_norm": 0.14776359498500824,
      "learning_rate": 1e-06,
      "loss": -0.0192,
      "num_tokens": 389330691.0,
      "reward": 0.6495535969734192,
      "reward_std": 0.1338823139667511,
      "rewards/verify_math_reward/mean": 0.6495535969734192,
      "rewards/verify_math_reward/std": 0.477376252412796,
      "step": 652
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0892857142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3314.0,
      "completions/mean_length": 912.7701416015625,
      "completions/mean_terminated_length": 600.688720703125,
      "completions/min_length": 158.0,
      "completions/min_terminated_length": 158.0,
      "epoch": 6.1026239067055394,
      "grad_norm": 0.15512509644031525,
      "learning_rate": 1e-06,
      "loss": -0.0472,
      "num_tokens": 389911645.0,
      "reward": 0.6272321939468384,
      "reward_std": 0.15631134808063507,
      "rewards/verify_math_reward/mean": 0.6272321343421936,
      "rewards/verify_math_reward/std": 0.4838111698627472,
      "step": 653
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0915178571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3418.0,
      "completions/mean_length": 948.036865234375,
      "completions/mean_terminated_length": 630.920166015625,
      "completions/min_length": 112.0,
      "completions/min_terminated_length": 112.0,
      "epoch": 6.111953352769679,
      "grad_norm": 0.14921864867210388,
      "learning_rate": 1e-06,
      "loss": -0.062,
      "num_tokens": 390522430.0,
      "reward": 0.6272321939468384,
      "reward_std": 0.18329153954982758,
      "rewards/verify_math_reward/mean": 0.6272321343421936,
      "rewards/verify_math_reward/std": 0.4838111698627472,
      "step": 654
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0848214285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3554.0,
      "completions/mean_length": 862.6663208007812,
      "completions/mean_terminated_length": 562.991455078125,
      "completions/min_length": 158.0,
      "completions/min_terminated_length": 158.0,
      "epoch": 6.121282798833819,
      "grad_norm": 0.3301600515842438,
      "learning_rate": 1e-06,
      "loss": -0.0307,
      "num_tokens": 391066227.0,
      "reward": 0.6439732313156128,
      "reward_std": 0.15555500984191895,
      "rewards/verify_math_reward/mean": 0.6439732313156128,
      "rewards/verify_math_reward/std": 0.47909072041511536,
      "step": 655
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0758928571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2953.0,
      "completions/mean_length": 813.7756958007812,
      "completions/mean_terminated_length": 544.2210083007812,
      "completions/min_length": 151.0,
      "completions/min_terminated_length": 151.0,
      "epoch": 6.130612244897959,
      "grad_norm": 0.15756727755069733,
      "learning_rate": 1e-06,
      "loss": -0.0258,
      "num_tokens": 391594074.0,
      "reward": 0.723214328289032,
      "reward_std": 0.1493610143661499,
      "rewards/verify_math_reward/mean": 0.7232142686843872,
      "rewards/verify_math_reward/std": 0.44765952229499817,
      "step": 656
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0647321428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3012.0,
      "completions/mean_length": 845.3281860351562,
      "completions/mean_terminated_length": 620.34130859375,
      "completions/min_length": 122.0,
      "completions/min_terminated_length": 122.0,
      "epoch": 6.139941690962099,
      "grad_norm": 0.1489861011505127,
      "learning_rate": 1e-06,
      "loss": -0.0558,
      "num_tokens": 392198920.0,
      "reward": 0.684151828289032,
      "reward_std": 0.16773684322834015,
      "rewards/verify_math_reward/mean": 0.6841517686843872,
      "rewards/verify_math_reward/std": 0.4651124179363251,
      "step": 657
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1060267857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4003.0,
      "completions/mean_length": 957.08154296875,
      "completions/mean_terminated_length": 584.8002319335938,
      "completions/min_length": 151.0,
      "completions/min_terminated_length": 151.0,
      "epoch": 6.149271137026239,
      "grad_norm": 0.16252826154232025,
      "learning_rate": 1e-06,
      "loss": -0.047,
      "num_tokens": 392761153.0,
      "reward": 0.629464328289032,
      "reward_std": 0.14079166948795319,
      "rewards/verify_math_reward/mean": 0.6294642686843872,
      "rewards/verify_math_reward/std": 0.4832179844379425,
      "step": 658
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2455.0,
      "completions/mean_length": 796.4174194335938,
      "completions/mean_terminated_length": 576.4452514648438,
      "completions/min_length": 114.0,
      "completions/min_terminated_length": 114.0,
      "epoch": 6.158600583090379,
      "grad_norm": 0.13597793877124786,
      "learning_rate": 1e-06,
      "loss": -0.0395,
      "num_tokens": 393330439.0,
      "reward": 0.7243303656578064,
      "reward_std": 0.1406836062669754,
      "rewards/verify_math_reward/mean": 0.7243303656578064,
      "rewards/verify_math_reward/std": 0.4471006691455841,
      "step": 659
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0848214285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3844.0,
      "completions/mean_length": 888.8013916015625,
      "completions/mean_terminated_length": 591.5487670898438,
      "completions/min_length": 160.0,
      "completions/min_terminated_length": 160.0,
      "epoch": 6.167930029154519,
      "grad_norm": 0.15661506354808807,
      "learning_rate": 1e-06,
      "loss": -0.0279,
      "num_tokens": 393902053.0,
      "reward": 0.6417410969734192,
      "reward_std": 0.1735963523387909,
      "rewards/verify_math_reward/mean": 0.6417410969734192,
      "rewards/verify_math_reward/std": 0.47975656390190125,
      "step": 660
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1194196428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3138.0,
      "completions/mean_length": 1075.360595703125,
      "completions/mean_terminated_length": 665.7173461914062,
      "completions/min_length": 141.0,
      "completions/min_terminated_length": 141.0,
      "epoch": 6.1772594752186585,
      "grad_norm": 0.1538289487361908,
      "learning_rate": 1e-06,
      "loss": -0.0465,
      "num_tokens": 394529584.0,
      "reward": 0.5524553656578064,
      "reward_std": 0.17043782770633698,
      "rewards/verify_math_reward/mean": 0.5524553656578064,
      "rewards/verify_math_reward/std": 0.49751853942871094,
      "step": 661
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0502232142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3255.0,
      "completions/mean_length": 769.1730346679688,
      "completions/mean_terminated_length": 593.2537841796875,
      "completions/min_length": 125.0,
      "completions/min_terminated_length": 125.0,
      "epoch": 6.186588921282799,
      "grad_norm": 0.13282759487628937,
      "learning_rate": 1e-06,
      "loss": -0.0284,
      "num_tokens": 395117371.0,
      "reward": 0.6986607313156128,
      "reward_std": 0.14158585667610168,
      "rewards/verify_math_reward/mean": 0.6986607313156128,
      "rewards/verify_math_reward/std": 0.4590960443019867,
      "step": 662
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0837053571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3696.0,
      "completions/mean_length": 878.6585083007812,
      "completions/mean_terminated_length": 584.7479248046875,
      "completions/min_length": 155.0,
      "completions/min_terminated_length": 155.0,
      "epoch": 6.1959183673469385,
      "grad_norm": 0.16318635642528534,
      "learning_rate": 1e-06,
      "loss": -0.044,
      "num_tokens": 395690865.0,
      "reward": 0.6908482313156128,
      "reward_std": 0.17367054522037506,
      "rewards/verify_math_reward/mean": 0.6908482313156128,
      "rewards/verify_math_reward/std": 0.46240198612213135,
      "step": 663
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0647321428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2016.0,
      "completions/mean_length": 810.0279541015625,
      "completions/mean_terminated_length": 582.597900390625,
      "completions/min_length": 172.0,
      "completions/min_terminated_length": 172.0,
      "epoch": 6.205247813411079,
      "grad_norm": 0.1334102898836136,
      "learning_rate": 1e-06,
      "loss": -0.0216,
      "num_tokens": 396260210.0,
      "reward": 0.6205357313156128,
      "reward_std": 0.130649596452713,
      "rewards/verify_math_reward/mean": 0.6205357313156128,
      "rewards/verify_math_reward/std": 0.4855247139930725,
      "step": 664
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0803571428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3396.0,
      "completions/mean_length": 929.9386596679688,
      "completions/mean_terminated_length": 653.29248046875,
      "completions/min_length": 135.0,
      "completions/min_terminated_length": 135.0,
      "epoch": 6.214577259475218,
      "grad_norm": 0.15028053522109985,
      "learning_rate": 1e-06,
      "loss": -0.0303,
      "num_tokens": 396897403.0,
      "reward": 0.551339328289032,
      "reward_std": 0.1418864130973816,
      "rewards/verify_math_reward/mean": 0.5513392686843872,
      "rewards/verify_math_reward/std": 0.4976350665092468,
      "step": 665
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0837053571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3659.0,
      "completions/mean_length": 831.505615234375,
      "completions/mean_terminated_length": 533.2874755859375,
      "completions/min_length": 120.0,
      "completions/min_terminated_length": 120.0,
      "epoch": 6.223906705539359,
      "grad_norm": 0.16576990485191345,
      "learning_rate": 1e-06,
      "loss": -0.0515,
      "num_tokens": 397413608.0,
      "reward": 0.6863839626312256,
      "reward_std": 0.15488353371620178,
      "rewards/verify_math_reward/mean": 0.6863839030265808,
      "rewards/verify_math_reward/std": 0.46422144770622253,
      "step": 666
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0915178571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3694.0,
      "completions/mean_length": 964.036865234375,
      "completions/mean_terminated_length": 648.5319213867188,
      "completions/min_length": 119.0,
      "completions/min_terminated_length": 119.0,
      "epoch": 6.233236151603498,
      "grad_norm": 0.14228703081607819,
      "learning_rate": 1e-06,
      "loss": -0.0499,
      "num_tokens": 398023633.0,
      "reward": 0.640625,
      "reward_std": 0.16134853661060333,
      "rewards/verify_math_reward/mean": 0.640625,
      "rewards/verify_math_reward/std": 0.48008525371551514,
      "step": 667
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1138392857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3856.0,
      "completions/mean_length": 1026.0201416015625,
      "completions/mean_terminated_length": 631.6397705078125,
      "completions/min_length": 143.0,
      "completions/min_terminated_length": 143.0,
      "epoch": 6.242565597667639,
      "grad_norm": 0.15054574608802795,
      "learning_rate": 1e-06,
      "loss": -0.0327,
      "num_tokens": 398620939.0,
      "reward": 0.6127232313156128,
      "reward_std": 0.1465405970811844,
      "rewards/verify_math_reward/mean": 0.6127232313156128,
      "rewards/verify_math_reward/std": 0.4873998463153839,
      "step": 668
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0993303571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3837.0,
      "completions/mean_length": 977.7511596679688,
      "completions/mean_terminated_length": 633.8550415039062,
      "completions/min_length": 137.0,
      "completions/min_terminated_length": 137.0,
      "epoch": 6.251895043731778,
      "grad_norm": 0.1700843870639801,
      "learning_rate": 1e-06,
      "loss": -0.0235,
      "num_tokens": 399223812.0,
      "reward": 0.5859375,
      "reward_std": 0.1597301959991455,
      "rewards/verify_math_reward/mean": 0.5859375,
      "rewards/verify_math_reward/std": 0.4928344786167145,
      "step": 669
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0881696428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3357.0,
      "completions/mean_length": 883.9832763671875,
      "completions/mean_terminated_length": 573.3966064453125,
      "completions/min_length": 166.0,
      "completions/min_terminated_length": 166.0,
      "epoch": 6.261224489795918,
      "grad_norm": 0.15604212880134583,
      "learning_rate": 1e-06,
      "loss": -0.0112,
      "num_tokens": 399782509.0,
      "reward": 0.6752232313156128,
      "reward_std": 0.11960498988628387,
      "rewards/verify_math_reward/mean": 0.6752232313156128,
      "rewards/verify_math_reward/std": 0.46855294704437256,
      "step": 670
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0859375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3368.0,
      "completions/mean_length": 888.8516235351562,
      "completions/mean_terminated_length": 587.3248291015625,
      "completions/min_length": 123.0,
      "completions/min_terminated_length": 123.0,
      "epoch": 6.270553935860058,
      "grad_norm": 0.15224426984786987,
      "learning_rate": 1e-06,
      "loss": -0.0577,
      "num_tokens": 400352568.0,
      "reward": 0.6729910969734192,
      "reward_std": 0.14992554485797882,
      "rewards/verify_math_reward/mean": 0.6729910969734192,
      "rewards/verify_math_reward/std": 0.46938255429267883,
      "step": 671
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0993303571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2380.0,
      "completions/mean_length": 969.661865234375,
      "completions/mean_terminated_length": 624.8735961914062,
      "completions/min_length": 115.0,
      "completions/min_terminated_length": 115.0,
      "epoch": 6.279883381924198,
      "grad_norm": 0.14326012134552002,
      "learning_rate": 1e-06,
      "loss": -0.051,
      "num_tokens": 400951145.0,
      "reward": 0.6651785969734192,
      "reward_std": 0.1510196030139923,
      "rewards/verify_math_reward/mean": 0.6651785969734192,
      "rewards/verify_math_reward/std": 0.47219157218933105,
      "step": 672
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1071428571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3373.0,
      "completions/mean_length": 981.7578735351562,
      "completions/mean_terminated_length": 608.0487060546875,
      "completions/min_length": 169.0,
      "completions/min_terminated_length": 169.0,
      "epoch": 6.289212827988338,
      "grad_norm": 0.1547728329896927,
      "learning_rate": 1e-06,
      "loss": -0.0398,
      "num_tokens": 401537312.0,
      "reward": 0.625,
      "reward_std": 0.16330133378505707,
      "rewards/verify_math_reward/mean": 0.625,
      "rewards/verify_math_reward/std": 0.48439329862594604,
      "step": 673
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.078125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2486.0,
      "completions/mean_length": 867.833740234375,
      "completions/mean_terminated_length": 594.2603149414062,
      "completions/min_length": 161.0,
      "completions/min_terminated_length": 161.0,
      "epoch": 6.298542274052478,
      "grad_norm": 0.1469501405954361,
      "learning_rate": 1e-06,
      "loss": -0.0268,
      "num_tokens": 402115011.0,
      "reward": 0.6629464626312256,
      "reward_std": 0.16927708685398102,
      "rewards/verify_math_reward/mean": 0.6629464030265808,
      "rewards/verify_math_reward/std": 0.47296738624572754,
      "step": 674
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.09375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2878.0,
      "completions/mean_length": 939.9074096679688,
      "completions/mean_terminated_length": 613.4150390625,
      "completions/min_length": 120.0,
      "completions/min_terminated_length": 120.0,
      "epoch": 6.307871720116618,
      "grad_norm": 0.15620948374271393,
      "learning_rate": 1e-06,
      "loss": -0.0613,
      "num_tokens": 402688848.0,
      "reward": 0.660714328289032,
      "reward_std": 0.1834438145160675,
      "rewards/verify_math_reward/mean": 0.6607142686843872,
      "rewards/verify_math_reward/std": 0.4737313687801361,
      "step": 675
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0915178571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3498.0,
      "completions/mean_length": 946.30029296875,
      "completions/mean_terminated_length": 629.0086059570312,
      "completions/min_length": 124.0,
      "completions/min_terminated_length": 124.0,
      "epoch": 6.317201166180758,
      "grad_norm": 0.14087562263011932,
      "learning_rate": 1e-06,
      "loss": -0.0244,
      "num_tokens": 403287965.0,
      "reward": 0.6439732313156128,
      "reward_std": 0.1571815013885498,
      "rewards/verify_math_reward/mean": 0.6439732313156128,
      "rewards/verify_math_reward/std": 0.47909072041511536,
      "step": 676
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1026785714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3362.0,
      "completions/mean_length": 950.5714721679688,
      "completions/mean_terminated_length": 590.646728515625,
      "completions/min_length": 126.0,
      "completions/min_terminated_length": 126.0,
      "epoch": 6.326530612244898,
      "grad_norm": 0.14991827309131622,
      "learning_rate": 1e-06,
      "loss": -0.055,
      "num_tokens": 403854205.0,
      "reward": 0.6272321939468384,
      "reward_std": 0.16101223230361938,
      "rewards/verify_math_reward/mean": 0.6272321343421936,
      "rewards/verify_math_reward/std": 0.4838111698627472,
      "step": 677
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1004464285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2886.0,
      "completions/mean_length": 942.6875610351562,
      "completions/mean_terminated_length": 590.5806274414062,
      "completions/min_length": 144.0,
      "completions/min_terminated_length": 144.0,
      "epoch": 6.335860058309038,
      "grad_norm": 0.15213103592395782,
      "learning_rate": 1e-06,
      "loss": -0.0426,
      "num_tokens": 404433261.0,
      "reward": 0.6328125,
      "reward_std": 0.14977329969406128,
      "rewards/verify_math_reward/mean": 0.6328125,
      "rewards/verify_math_reward/std": 0.48230743408203125,
      "step": 678
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0870535714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2945.0,
      "completions/mean_length": 893.1641235351562,
      "completions/mean_terminated_length": 587.7592163085938,
      "completions/min_length": 133.0,
      "completions/min_terminated_length": 133.0,
      "epoch": 6.345189504373177,
      "grad_norm": 0.15677736699581146,
      "learning_rate": 1e-06,
      "loss": -0.0666,
      "num_tokens": 405002544.0,
      "reward": 0.6830357313156128,
      "reward_std": 0.17269553244113922,
      "rewards/verify_math_reward/mean": 0.6830357313156128,
      "rewards/verify_math_reward/std": 0.46555325388908386,
      "step": 679
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0825892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3432.0,
      "completions/mean_length": 929.7857666015625,
      "completions/mean_terminated_length": 644.7493896484375,
      "completions/min_length": 153.0,
      "completions/min_terminated_length": 153.0,
      "epoch": 6.354518950437318,
      "grad_norm": 0.14856281876564026,
      "learning_rate": 1e-06,
      "loss": -0.0339,
      "num_tokens": 405618392.0,
      "reward": 0.6718750596046448,
      "reward_std": 0.16991788148880005,
      "rewards/verify_math_reward/mean": 0.671875,
      "rewards/verify_math_reward/std": 0.46979284286499023,
      "step": 680
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1183035714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3868.0,
      "completions/mean_length": 1069.368408203125,
      "completions/mean_terminated_length": 663.2633056640625,
      "completions/min_length": 146.0,
      "completions/min_terminated_length": 146.0,
      "epoch": 6.363848396501457,
      "grad_norm": 0.153412863612175,
      "learning_rate": 1e-06,
      "loss": -0.0667,
      "num_tokens": 406233170.0,
      "reward": 0.6082589626312256,
      "reward_std": 0.16529551148414612,
      "rewards/verify_math_reward/mean": 0.6082589030265808,
      "rewards/verify_math_reward/std": 0.4884119927883148,
      "step": 681
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1127232142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3957.0,
      "completions/mean_length": 1008.0469360351562,
      "completions/mean_terminated_length": 615.7408447265625,
      "completions/min_length": 144.0,
      "completions/min_terminated_length": 144.0,
      "epoch": 6.373177842565598,
      "grad_norm": 0.163439080119133,
      "learning_rate": 1e-06,
      "loss": -0.0518,
      "num_tokens": 406818028.0,
      "reward": 0.613839328289032,
      "reward_std": 0.17754334211349487,
      "rewards/verify_math_reward/mean": 0.6138392686843872,
      "rewards/verify_math_reward/std": 0.48714008927345276,
      "step": 682
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0736607142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2578.0,
      "completions/mean_length": 835.3660888671875,
      "completions/mean_terminated_length": 576.0867309570312,
      "completions/min_length": 141.0,
      "completions/min_terminated_length": 141.0,
      "epoch": 6.382507288629737,
      "grad_norm": 0.14261586964130402,
      "learning_rate": 1e-06,
      "loss": -0.04,
      "num_tokens": 407388780.0,
      "reward": 0.715401828289032,
      "reward_std": 0.13482409715652466,
      "rewards/verify_math_reward/mean": 0.7154017686843872,
      "rewards/verify_math_reward/std": 0.4514748752117157,
      "step": 683
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2400.0,
      "completions/mean_length": 962.1138916015625,
      "completions/mean_terminated_length": 607.8484497070312,
      "completions/min_length": 120.0,
      "completions/min_terminated_length": 120.0,
      "epoch": 6.391836734693878,
      "grad_norm": 0.14774686098098755,
      "learning_rate": 1e-06,
      "loss": -0.0399,
      "num_tokens": 407968418.0,
      "reward": 0.5926339626312256,
      "reward_std": 0.16604506969451904,
      "rewards/verify_math_reward/mean": 0.5926339030265808,
      "rewards/verify_math_reward/std": 0.49161848425865173,
      "step": 684
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1149553571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2411.0,
      "completions/mean_length": 948.3147583007812,
      "completions/mean_terminated_length": 539.472900390625,
      "completions/min_length": 153.0,
      "completions/min_terminated_length": 153.0,
      "epoch": 6.401166180758017,
      "grad_norm": 0.1840965747833252,
      "learning_rate": 1e-06,
      "loss": -0.055,
      "num_tokens": 408487868.0,
      "reward": 0.7053571939468384,
      "reward_std": 0.18036557734012604,
      "rewards/verify_math_reward/mean": 0.7053571343421936,
      "rewards/verify_math_reward/std": 0.45613664388656616,
      "step": 685
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1238839285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3128.0,
      "completions/mean_length": 1002.8817138671875,
      "completions/mean_terminated_length": 565.5108642578125,
      "completions/min_length": 137.0,
      "completions/min_terminated_length": 137.0,
      "epoch": 6.410495626822158,
      "grad_norm": 0.15381786227226257,
      "learning_rate": 1e-06,
      "loss": -0.0622,
      "num_tokens": 409019298.0,
      "reward": 0.6462053656578064,
      "reward_std": 0.14263640344142914,
      "rewards/verify_math_reward/mean": 0.6462053656578064,
      "rewards/verify_math_reward/std": 0.478413462638855,
      "step": 686
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0948660714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2662.0,
      "completions/mean_length": 957.87060546875,
      "completions/mean_terminated_length": 628.9666748046875,
      "completions/min_length": 146.0,
      "completions/min_terminated_length": 146.0,
      "epoch": 6.419825072886297,
      "grad_norm": 0.15057718753814697,
      "learning_rate": 1e-06,
      "loss": -0.0517,
      "num_tokens": 409613822.0,
      "reward": 0.6395089626312256,
      "reward_std": 0.16856171190738678,
      "rewards/verify_math_reward/mean": 0.6395089030265808,
      "rewards/verify_math_reward/std": 0.4804111421108246,
      "step": 687
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1082589285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2931.0,
      "completions/mean_length": 1012.1160888671875,
      "completions/mean_terminated_length": 637.7271728515625,
      "completions/min_length": 155.0,
      "completions/min_terminated_length": 155.0,
      "epoch": 6.429154518950437,
      "grad_norm": 0.15426890552043915,
      "learning_rate": 1e-06,
      "loss": -0.0488,
      "num_tokens": 410210918.0,
      "reward": 0.6238839626312256,
      "reward_std": 0.16337618231773376,
      "rewards/verify_math_reward/mean": 0.6238839030265808,
      "rewards/verify_math_reward/std": 0.4846802353858948,
      "step": 688
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0982142857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3988.0,
      "completions/mean_length": 976.1038208007812,
      "completions/mean_terminated_length": 636.3131103515625,
      "completions/min_length": 115.0,
      "completions/min_terminated_length": 115.0,
      "epoch": 6.438483965014577,
      "grad_norm": 0.13959239423274994,
      "learning_rate": 1e-06,
      "loss": -0.037,
      "num_tokens": 410815923.0,
      "reward": 0.5993303656578064,
      "reward_std": 0.16412687301635742,
      "rewards/verify_math_reward/mean": 0.5993303656578064,
      "rewards/verify_math_reward/std": 0.49030786752700806,
      "step": 689
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.09375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3614.0,
      "completions/mean_length": 940.3504638671875,
      "completions/mean_terminated_length": 613.9039306640625,
      "completions/min_length": 135.0,
      "completions/min_terminated_length": 135.0,
      "epoch": 6.447813411078717,
      "grad_norm": 0.15441806614398956,
      "learning_rate": 1e-06,
      "loss": -0.0496,
      "num_tokens": 411399245.0,
      "reward": 0.6595982313156128,
      "reward_std": 0.18085232377052307,
      "rewards/verify_math_reward/mean": 0.6595982313156128,
      "rewards/verify_math_reward/std": 0.4741089344024658,
      "step": 690
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1060267857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3517.0,
      "completions/mean_length": 1002.0123291015625,
      "completions/mean_terminated_length": 635.0599365234375,
      "completions/min_length": 146.0,
      "completions/min_terminated_length": 146.0,
      "epoch": 6.457142857142857,
      "grad_norm": 0.14723995327949524,
      "learning_rate": 1e-06,
      "loss": -0.0437,
      "num_tokens": 411996400.0,
      "reward": 0.6350446939468384,
      "reward_std": 0.1742357611656189,
      "rewards/verify_math_reward/mean": 0.6350446343421936,
      "rewards/verify_math_reward/std": 0.481686532497406,
      "step": 691
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0703125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3049.0,
      "completions/mean_length": 844.5234985351562,
      "completions/mean_terminated_length": 598.6134033203125,
      "completions/min_length": 140.0,
      "completions/min_terminated_length": 140.0,
      "epoch": 6.466472303206997,
      "grad_norm": 0.14656583964824677,
      "learning_rate": 1e-06,
      "loss": -0.0125,
      "num_tokens": 412585333.0,
      "reward": 0.676339328289032,
      "reward_std": 0.1426345854997635,
      "rewards/verify_math_reward/mean": 0.6763392686843872,
      "rewards/verify_math_reward/std": 0.4681335985660553,
      "step": 692
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3937.0,
      "completions/mean_length": 934.34716796875,
      "completions/mean_terminated_length": 576.94287109375,
      "completions/min_length": 95.0,
      "completions/min_terminated_length": 95.0,
      "epoch": 6.475801749271137,
      "grad_norm": 0.1560421884059906,
      "learning_rate": 1e-06,
      "loss": -0.0638,
      "num_tokens": 413132164.0,
      "reward": 0.6830357313156128,
      "reward_std": 0.1557832509279251,
      "rewards/verify_math_reward/mean": 0.6830357313156128,
      "rewards/verify_math_reward/std": 0.46555325388908386,
      "step": 693
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.078125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3249.0,
      "completions/mean_length": 845.8192138671875,
      "completions/mean_terminated_length": 570.3801879882812,
      "completions/min_length": 120.0,
      "completions/min_terminated_length": 120.0,
      "epoch": 6.485131195335277,
      "grad_norm": 0.14496955275535583,
      "learning_rate": 1e-06,
      "loss": -0.0152,
      "num_tokens": 413694866.0,
      "reward": 0.6662946939468384,
      "reward_std": 0.13842660188674927,
      "rewards/verify_math_reward/mean": 0.6662946343421936,
      "rewards/verify_math_reward/std": 0.47179925441741943,
      "step": 694
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0837053571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3939.0,
      "completions/mean_length": 880.7600708007812,
      "completions/mean_terminated_length": 587.0414428710938,
      "completions/min_length": 145.0,
      "completions/min_terminated_length": 145.0,
      "epoch": 6.494460641399417,
      "grad_norm": 0.15243299305438995,
      "learning_rate": 1e-06,
      "loss": -0.0283,
      "num_tokens": 414264187.0,
      "reward": 0.6517857313156128,
      "reward_std": 0.15800592303276062,
      "rewards/verify_math_reward/mean": 0.6517857313156128,
      "rewards/verify_math_reward/std": 0.47667041420936584,
      "step": 695
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1082589285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3640.0,
      "completions/mean_length": 981.44873046875,
      "completions/mean_terminated_length": 603.336669921875,
      "completions/min_length": 153.0,
      "completions/min_terminated_length": 153.0,
      "epoch": 6.503790087463557,
      "grad_norm": 0.14356254041194916,
      "learning_rate": 1e-06,
      "loss": -0.0238,
      "num_tokens": 414839845.0,
      "reward": 0.6183035969734192,
      "reward_std": 0.13845908641815186,
      "rewards/verify_math_reward/mean": 0.6183035969734192,
      "rewards/verify_math_reward/std": 0.4860740303993225,
      "step": 696
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3930.0,
      "completions/mean_length": 1060.068115234375,
      "completions/mean_terminated_length": 626.363525390625,
      "completions/min_length": 151.0,
      "completions/min_terminated_length": 151.0,
      "epoch": 6.513119533527696,
      "grad_norm": 0.1592957228422165,
      "learning_rate": 1e-06,
      "loss": -0.0907,
      "num_tokens": 415427586.0,
      "reward": 0.6462053656578064,
      "reward_std": 0.1828383058309555,
      "rewards/verify_math_reward/mean": 0.6462053656578064,
      "rewards/verify_math_reward/std": 0.478413462638855,
      "step": 697
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1037946428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3997.0,
      "completions/mean_length": 972.755615234375,
      "completions/mean_terminated_length": 611.0348510742188,
      "completions/min_length": 132.0,
      "completions/min_terminated_length": 132.0,
      "epoch": 6.522448979591837,
      "grad_norm": 0.13217735290527344,
      "learning_rate": 1e-06,
      "loss": -0.0333,
      "num_tokens": 416017999.0,
      "reward": 0.6573660969734192,
      "reward_std": 0.1420711725950241,
      "rewards/verify_math_reward/mean": 0.6573660969734192,
      "rewards/verify_math_reward/std": 0.47485533356666565,
      "step": 698
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0691964285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3059.0,
      "completions/mean_length": 800.0535888671875,
      "completions/mean_terminated_length": 555.0311889648438,
      "completions/min_length": 129.0,
      "completions/min_terminated_length": 129.0,
      "epoch": 6.531778425655976,
      "grad_norm": 0.14009703695774078,
      "learning_rate": 1e-06,
      "loss": -0.0235,
      "num_tokens": 416570623.0,
      "reward": 0.7020089626312256,
      "reward_std": 0.12625475227832794,
      "rewards/verify_math_reward/mean": 0.7020089030265808,
      "rewards/verify_math_reward/std": 0.45763099193573,
      "step": 699
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0904017857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2204.0,
      "completions/mean_length": 870.8850708007812,
      "completions/mean_terminated_length": 550.3521728515625,
      "completions/min_length": 166.0,
      "completions/min_terminated_length": 166.0,
      "epoch": 6.541107871720117,
      "grad_norm": 0.1271519809961319,
      "learning_rate": 1e-06,
      "loss": -0.0095,
      "num_tokens": 417099344.0,
      "reward": 0.6741071939468384,
      "reward_std": 0.10058976709842682,
      "rewards/verify_math_reward/mean": 0.6741071343421936,
      "rewards/verify_math_reward/std": 0.4689692556858063,
      "step": 700
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1004464285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3150.0,
      "completions/mean_length": 974.5078735351562,
      "completions/mean_terminated_length": 625.9541015625,
      "completions/min_length": 114.0,
      "completions/min_terminated_length": 114.0,
      "epoch": 6.550437317784256,
      "grad_norm": 0.14003746211528778,
      "learning_rate": 1e-06,
      "loss": -0.0479,
      "num_tokens": 417688999.0,
      "reward": 0.6428571939468384,
      "reward_std": 0.14102695882320404,
      "rewards/verify_math_reward/mean": 0.6428571343421936,
      "rewards/verify_math_reward/std": 0.47942501306533813,
      "step": 701
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0747767857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4076.0,
      "completions/mean_length": 865.8560791015625,
      "completions/mean_terminated_length": 604.794921875,
      "completions/min_length": 122.0,
      "completions/min_terminated_length": 122.0,
      "epoch": 6.559766763848397,
      "grad_norm": 0.15287365019321442,
      "learning_rate": 1e-06,
      "loss": -0.0032,
      "num_tokens": 418274678.0,
      "reward": 0.6584821939468384,
      "reward_std": 0.13602055609226227,
      "rewards/verify_math_reward/mean": 0.6584821343421936,
      "rewards/verify_math_reward/std": 0.4744836091995239,
      "step": 702
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1361607142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3689.0,
      "completions/mean_length": 1132.7578125,
      "completions/mean_terminated_length": 665.6834716796875,
      "completions/min_length": 97.0,
      "completions/min_terminated_length": 97.0,
      "epoch": 6.569096209912536,
      "grad_norm": 0.14975379407405853,
      "learning_rate": 1e-06,
      "loss": -0.0888,
      "num_tokens": 418866261.0,
      "reward": 0.5970982313156128,
      "reward_std": 0.15274415910243988,
      "rewards/verify_math_reward/mean": 0.5970982313156128,
      "rewards/verify_math_reward/std": 0.49075525999069214,
      "step": 703
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.078125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3912.0,
      "completions/mean_length": 856.1395263671875,
      "completions/mean_terminated_length": 581.5750732421875,
      "completions/min_length": 185.0,
      "completions/min_terminated_length": 185.0,
      "epoch": 6.578425655976677,
      "grad_norm": 0.1418183445930481,
      "learning_rate": 1e-06,
      "loss": -0.0261,
      "num_tokens": 419432194.0,
      "reward": 0.6830357313156128,
      "reward_std": 0.14699524641036987,
      "rewards/verify_math_reward/mean": 0.6830357313156128,
      "rewards/verify_math_reward/std": 0.46555325388908386,
      "step": 704
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1004464285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3742.0,
      "completions/mean_length": 979.10498046875,
      "completions/mean_terminated_length": 631.0645141601562,
      "completions/min_length": 139.0,
      "completions/min_terminated_length": 139.0,
      "epoch": 6.587755102040816,
      "grad_norm": 0.1584358662366867,
      "learning_rate": 1e-06,
      "loss": -0.0529,
      "num_tokens": 420021992.0,
      "reward": 0.6953125596046448,
      "reward_std": 0.17622952163219452,
      "rewards/verify_math_reward/mean": 0.6953125,
      "rewards/verify_math_reward/std": 0.4605320394039154,
      "step": 705
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1082589285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4037.0,
      "completions/mean_length": 988.0614013671875,
      "completions/mean_terminated_length": 610.752197265625,
      "completions/min_length": 110.0,
      "completions/min_terminated_length": 110.0,
      "epoch": 6.597084548104956,
      "grad_norm": 0.15502335131168365,
      "learning_rate": 1e-06,
      "loss": -0.0331,
      "num_tokens": 420603039.0,
      "reward": 0.6395089626312256,
      "reward_std": 0.15613025426864624,
      "rewards/verify_math_reward/mean": 0.6395089030265808,
      "rewards/verify_math_reward/std": 0.4804111123085022,
      "step": 706
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0803571428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3301.0,
      "completions/mean_length": 858.583740234375,
      "completions/mean_terminated_length": 575.7026977539062,
      "completions/min_length": 158.0,
      "completions/min_terminated_length": 158.0,
      "epoch": 6.606413994169096,
      "grad_norm": 0.14917011559009552,
      "learning_rate": 1e-06,
      "loss": -0.0362,
      "num_tokens": 421168810.0,
      "reward": 0.6495535969734192,
      "reward_std": 0.13624556362628937,
      "rewards/verify_math_reward/mean": 0.6495535969734192,
      "rewards/verify_math_reward/std": 0.47737622261047363,
      "step": 707
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0848214285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3962.0,
      "completions/mean_length": 882.0402221679688,
      "completions/mean_terminated_length": 584.1609497070312,
      "completions/min_length": 101.0,
      "completions/min_terminated_length": 101.0,
      "epoch": 6.615743440233236,
      "grad_norm": 0.15988053381443024,
      "learning_rate": 1e-06,
      "loss": -0.0371,
      "num_tokens": 421737950.0,
      "reward": 0.6819196939468384,
      "reward_std": 0.17844446003437042,
      "rewards/verify_math_reward/mean": 0.6819196343421936,
      "rewards/verify_math_reward/std": 0.46599099040031433,
      "step": 708
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1339285714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3947.0,
      "completions/mean_length": 1074.094970703125,
      "completions/mean_terminated_length": 606.7899169921875,
      "completions/min_length": 103.0,
      "completions/min_terminated_length": 103.0,
      "epoch": 6.625072886297376,
      "grad_norm": 0.16448982059955597,
      "learning_rate": 1e-06,
      "loss": -0.0746,
      "num_tokens": 422298115.0,
      "reward": 0.645089328289032,
      "reward_std": 0.1699153631925583,
      "rewards/verify_math_reward/mean": 0.6450892686843872,
      "rewards/verify_math_reward/std": 0.4787535071372986,
      "step": 709
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1294642857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3173.0,
      "completions/mean_length": 1038.110595703125,
      "completions/mean_terminated_length": 583.3474731445312,
      "completions/min_length": 158.0,
      "completions/min_terminated_length": 158.0,
      "epoch": 6.634402332361516,
      "grad_norm": 0.1447528451681137,
      "learning_rate": 1e-06,
      "loss": -0.0829,
      "num_tokens": 422852918.0,
      "reward": 0.6696428656578064,
      "reward_std": 0.16698938608169556,
      "rewards/verify_math_reward/mean": 0.6696428656578064,
      "rewards/verify_math_reward/std": 0.47060438990592957,
      "step": 710
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0770089285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3998.0,
      "completions/mean_length": 899.0881958007812,
      "completions/mean_terminated_length": 632.3567504882812,
      "completions/min_length": 172.0,
      "completions/min_terminated_length": 172.0,
      "epoch": 6.643731778425656,
      "grad_norm": 0.12234325706958771,
      "learning_rate": 1e-06,
      "loss": -0.0315,
      "num_tokens": 423459733.0,
      "reward": 0.6517857313156128,
      "reward_std": 0.12125399708747864,
      "rewards/verify_math_reward/mean": 0.6517857313156128,
      "rewards/verify_math_reward/std": 0.47667041420936584,
      "step": 711
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.140625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2934.0,
      "completions/mean_length": 1070.560302734375,
      "completions/mean_terminated_length": 575.48828125,
      "completions/min_length": 130.0,
      "completions/min_terminated_length": 130.0,
      "epoch": 6.653061224489796,
      "grad_norm": 0.15985053777694702,
      "learning_rate": 1e-06,
      "loss": -0.0578,
      "num_tokens": 423999227.0,
      "reward": 0.6037946939468384,
      "reward_std": 0.16266122460365295,
      "rewards/verify_math_reward/mean": 0.6037946343421936,
      "rewards/verify_math_reward/std": 0.48938122391700745,
      "step": 712
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.09375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3614.0,
      "completions/mean_length": 907.1574096679688,
      "completions/mean_terminated_length": 577.277099609375,
      "completions/min_length": 119.0,
      "completions/min_terminated_length": 119.0,
      "epoch": 6.662390670553936,
      "grad_norm": 0.14064276218414307,
      "learning_rate": 1e-06,
      "loss": -0.0373,
      "num_tokens": 424558992.0,
      "reward": 0.6674107313156128,
      "reward_std": 0.11727311462163925,
      "rewards/verify_math_reward/mean": 0.6674107313156128,
      "rewards/verify_math_reward/std": 0.47140392661094666,
      "step": 713
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1104910714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3741.0,
      "completions/mean_length": 990.9085083007812,
      "completions/mean_terminated_length": 605.20703125,
      "completions/min_length": 145.0,
      "completions/min_terminated_length": 145.0,
      "epoch": 6.671720116618076,
      "grad_norm": 0.13120871782302856,
      "learning_rate": 1e-06,
      "loss": -0.0305,
      "num_tokens": 425140454.0,
      "reward": 0.6071428656578064,
      "reward_std": 0.12302273511886597,
      "rewards/verify_math_reward/mean": 0.6071428656578064,
      "rewards/verify_math_reward/std": 0.48865827918052673,
      "step": 714
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1171875,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2889.0,
      "completions/mean_length": 1005.5324096679688,
      "completions/mean_terminated_length": 595.2933349609375,
      "completions/min_length": 155.0,
      "completions/min_terminated_length": 155.0,
      "epoch": 6.681049562682215,
      "grad_norm": 0.14877988398075104,
      "learning_rate": 1e-06,
      "loss": -0.0609,
      "num_tokens": 425702067.0,
      "reward": 0.6551339626312256,
      "reward_std": 0.1471467763185501,
      "rewards/verify_math_reward/mean": 0.6551339030265808,
      "rewards/verify_math_reward/std": 0.4755900800228119,
      "step": 715
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.109375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3806.0,
      "completions/mean_length": 1020.2545166015625,
      "completions/mean_terminated_length": 642.5313110351562,
      "completions/min_length": 142.0,
      "completions/min_terminated_length": 142.0,
      "epoch": 6.690379008746356,
      "grad_norm": 0.1527351588010788,
      "learning_rate": 1e-06,
      "loss": -0.0566,
      "num_tokens": 426305559.0,
      "reward": 0.6305803656578064,
      "reward_std": 0.17446216940879822,
      "rewards/verify_math_reward/mean": 0.6305803656578064,
      "rewards/verify_math_reward/std": 0.48291724920272827,
      "step": 716
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1261160714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3579.0,
      "completions/mean_length": 1064.0592041015625,
      "completions/mean_terminated_length": 626.4993286132812,
      "completions/min_length": 135.0,
      "completions/min_terminated_length": 135.0,
      "epoch": 6.699708454810495,
      "grad_norm": 0.12764711678028107,
      "learning_rate": 1e-06,
      "loss": -0.0524,
      "num_tokens": 426885116.0,
      "reward": 0.609375,
      "reward_std": 0.12692692875862122,
      "rewards/verify_math_reward/mean": 0.609375,
      "rewards/verify_math_reward/std": 0.48816296458244324,
      "step": 717
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0859375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3393.0,
      "completions/mean_length": 908.0770263671875,
      "completions/mean_terminated_length": 608.3577880859375,
      "completions/min_length": 128.0,
      "completions/min_terminated_length": 128.0,
      "epoch": 6.709037900874636,
      "grad_norm": 0.15886114537715912,
      "learning_rate": 1e-06,
      "loss": -0.0312,
      "num_tokens": 427467681.0,
      "reward": 0.6662946939468384,
      "reward_std": 0.1766424924135208,
      "rewards/verify_math_reward/mean": 0.6662946343421936,
      "rewards/verify_math_reward/std": 0.47179925441741943,
      "step": 718
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2535.0,
      "completions/mean_length": 946.1339721679688,
      "completions/mean_terminated_length": 590.0620727539062,
      "completions/min_length": 147.0,
      "completions/min_terminated_length": 147.0,
      "epoch": 6.718367346938775,
      "grad_norm": 0.17554457485675812,
      "learning_rate": 1e-06,
      "loss": -0.0394,
      "num_tokens": 428041561.0,
      "reward": 0.6551339626312256,
      "reward_std": 0.17559263110160828,
      "rewards/verify_math_reward/mean": 0.6551339030265808,
      "rewards/verify_math_reward/std": 0.4755900502204895,
      "step": 719
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1004464285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3210.0,
      "completions/mean_length": 967.8438110351562,
      "completions/mean_terminated_length": 618.5458984375,
      "completions/min_length": 172.0,
      "completions/min_terminated_length": 172.0,
      "epoch": 6.727696793002916,
      "grad_norm": 0.1546453833580017,
      "learning_rate": 1e-06,
      "loss": -0.0399,
      "num_tokens": 428629029.0,
      "reward": 0.6149553656578064,
      "reward_std": 0.1616523265838623,
      "rewards/verify_math_reward/mean": 0.6149553656578064,
      "rewards/verify_math_reward/std": 0.4868776500225067,
      "step": 720
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0870535714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3660.0,
      "completions/mean_length": 926.0402221679688,
      "completions/mean_terminated_length": 623.7702026367188,
      "completions/min_length": 161.0,
      "completions/min_terminated_length": 161.0,
      "epoch": 6.737026239067055,
      "grad_norm": 0.14390479028224945,
      "learning_rate": 1e-06,
      "loss": -0.0167,
      "num_tokens": 429220193.0,
      "reward": 0.6573660969734192,
      "reward_std": 0.1571369171142578,
      "rewards/verify_math_reward/mean": 0.6573660969734192,
      "rewards/verify_math_reward/std": 0.47485533356666565,
      "step": 721
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0926339285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2530.0,
      "completions/mean_length": 869.489990234375,
      "completions/mean_terminated_length": 540.0922241210938,
      "completions/min_length": 154.0,
      "completions/min_terminated_length": 154.0,
      "epoch": 6.746355685131196,
      "grad_norm": 0.15528041124343872,
      "learning_rate": 1e-06,
      "loss": -0.048,
      "num_tokens": 429762064.0,
      "reward": 0.6640625,
      "reward_std": 0.15924306213855743,
      "rewards/verify_math_reward/mean": 0.6640625,
      "rewards/verify_math_reward/std": 0.4725809693336487,
      "step": 722
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1183035714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2770.0,
      "completions/mean_length": 997.4542846679688,
      "completions/mean_terminated_length": 581.7000122070312,
      "completions/min_length": 147.0,
      "completions/min_terminated_length": 147.0,
      "epoch": 6.755685131195335,
      "grad_norm": 0.18460367619991302,
      "learning_rate": 1e-06,
      "loss": -0.0649,
      "num_tokens": 430309543.0,
      "reward": 0.6361607313156128,
      "reward_std": 0.1305394172668457,
      "rewards/verify_math_reward/mean": 0.6361607313156128,
      "rewards/verify_math_reward/std": 0.4813718795776367,
      "step": 723
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0714285714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3328.0,
      "completions/mean_length": 829.0089721679688,
      "completions/mean_terminated_length": 577.7019653320312,
      "completions/min_length": 159.0,
      "completions/min_terminated_length": 159.0,
      "epoch": 6.765014577259475,
      "grad_norm": 0.12850549817085266,
      "learning_rate": 1e-06,
      "loss": -0.0466,
      "num_tokens": 430871223.0,
      "reward": 0.707589328289032,
      "reward_std": 0.14263710379600525,
      "rewards/verify_math_reward/mean": 0.7075892686843872,
      "rewards/verify_math_reward/std": 0.45512402057647705,
      "step": 724
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1071428571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3886.0,
      "completions/mean_length": 1013.01123046875,
      "completions/mean_terminated_length": 643.052490234375,
      "completions/min_length": 111.0,
      "completions/min_terminated_length": 111.0,
      "epoch": 6.774344023323615,
      "grad_norm": 0.14496265351772308,
      "learning_rate": 1e-06,
      "loss": -0.0438,
      "num_tokens": 431471313.0,
      "reward": 0.609375,
      "reward_std": 0.16386516392230988,
      "rewards/verify_math_reward/mean": 0.609375,
      "rewards/verify_math_reward/std": 0.48816296458244324,
      "step": 725
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0948660714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3083.0,
      "completions/mean_length": 942.8683471679688,
      "completions/mean_terminated_length": 612.39208984375,
      "completions/min_length": 167.0,
      "completions/min_terminated_length": 167.0,
      "epoch": 6.783673469387755,
      "grad_norm": 0.16144829988479614,
      "learning_rate": 1e-06,
      "loss": -0.0452,
      "num_tokens": 432055411.0,
      "reward": 0.6729910969734192,
      "reward_std": 0.1521814614534378,
      "rewards/verify_math_reward/mean": 0.6729910969734192,
      "rewards/verify_math_reward/std": 0.46938255429267883,
      "step": 726
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1138392857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2320.0,
      "completions/mean_length": 1024.888427734375,
      "completions/mean_terminated_length": 630.3626708984375,
      "completions/min_length": 155.0,
      "completions/min_terminated_length": 155.0,
      "epoch": 6.793002915451895,
      "grad_norm": 0.16446854174137115,
      "learning_rate": 1e-06,
      "loss": -0.0607,
      "num_tokens": 432646319.0,
      "reward": 0.5993303656578064,
      "reward_std": 0.18010301887989044,
      "rewards/verify_math_reward/mean": 0.5993303656578064,
      "rewards/verify_math_reward/std": 0.49030786752700806,
      "step": 727
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1283482142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3900.0,
      "completions/mean_length": 1074.12841796875,
      "completions/mean_terminated_length": 629.1664428710938,
      "completions/min_length": 147.0,
      "completions/min_terminated_length": 147.0,
      "epoch": 6.802332361516035,
      "grad_norm": 0.1925697773694992,
      "learning_rate": 1e-06,
      "loss": -0.0472,
      "num_tokens": 433221706.0,
      "reward": 0.5870535969734192,
      "reward_std": 0.18829300999641418,
      "rewards/verify_math_reward/mean": 0.5870535969734192,
      "rewards/verify_math_reward/std": 0.49263837933540344,
      "step": 728
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1071428571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3842.0,
      "completions/mean_length": 990.4285888671875,
      "completions/mean_terminated_length": 617.760009765625,
      "completions/min_length": 137.0,
      "completions/min_terminated_length": 137.0,
      "epoch": 6.811661807580175,
      "grad_norm": 0.1481935977935791,
      "learning_rate": 1e-06,
      "loss": -0.0512,
      "num_tokens": 433797290.0,
      "reward": 0.6808035969734192,
      "reward_std": 0.16597020626068115,
      "rewards/verify_math_reward/mean": 0.6808035969734192,
      "rewards/verify_math_reward/std": 0.4664256274700165,
      "step": 729
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1272321428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3786.0,
      "completions/mean_length": 1064.4654541015625,
      "completions/mean_terminated_length": 622.5281372070312,
      "completions/min_length": 155.0,
      "completions/min_terminated_length": 155.0,
      "epoch": 6.820991253644315,
      "grad_norm": 0.1566762775182724,
      "learning_rate": 1e-06,
      "loss": -0.0441,
      "num_tokens": 434360643.0,
      "reward": 0.606026828289032,
      "reward_std": 0.17442122101783752,
      "rewards/verify_math_reward/mean": 0.6060267686843872,
      "rewards/verify_math_reward/std": 0.48890191316604614,
      "step": 730
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1149553571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4006.0,
      "completions/mean_length": 1022.6785888671875,
      "completions/mean_terminated_length": 623.49560546875,
      "completions/min_length": 170.0,
      "completions/min_terminated_length": 170.0,
      "epoch": 6.830320699708455,
      "grad_norm": 0.1799861043691635,
      "learning_rate": 1e-06,
      "loss": -0.0309,
      "num_tokens": 434943875.0,
      "reward": 0.629464328289032,
      "reward_std": 0.16037283837795258,
      "rewards/verify_math_reward/mean": 0.6294642686843872,
      "rewards/verify_math_reward/std": 0.4832179844379425,
      "step": 731
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1026785714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3300.0,
      "completions/mean_length": 995.41748046875,
      "completions/mean_terminated_length": 640.6243896484375,
      "completions/min_length": 105.0,
      "completions/min_terminated_length": 105.0,
      "epoch": 6.839650145772595,
      "grad_norm": 0.15543977916240692,
      "learning_rate": 1e-06,
      "loss": -0.0294,
      "num_tokens": 435549521.0,
      "reward": 0.6350446939468384,
      "reward_std": 0.16244256496429443,
      "rewards/verify_math_reward/mean": 0.6350446343421936,
      "rewards/verify_math_reward/std": 0.481686532497406,
      "step": 732
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0892857142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3254.0,
      "completions/mean_length": 913.9263916015625,
      "completions/mean_terminated_length": 601.9583740234375,
      "completions/min_length": 137.0,
      "completions/min_terminated_length": 137.0,
      "epoch": 6.848979591836734,
      "grad_norm": 0.14535638689994812,
      "learning_rate": 1e-06,
      "loss": -0.057,
      "num_tokens": 436141807.0,
      "reward": 0.6417410969734192,
      "reward_std": 0.1573990434408188,
      "rewards/verify_math_reward/mean": 0.6417410969734192,
      "rewards/verify_math_reward/std": 0.47975656390190125,
      "step": 733
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1194196428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2443.0,
      "completions/mean_length": 1001.9732666015625,
      "completions/mean_terminated_length": 582.377685546875,
      "completions/min_length": 145.0,
      "completions/min_terminated_length": 145.0,
      "epoch": 6.858309037900875,
      "grad_norm": 0.1419903188943863,
      "learning_rate": 1e-06,
      "loss": -0.0394,
      "num_tokens": 436690015.0,
      "reward": 0.6361607313156128,
      "reward_std": 0.14409995079040527,
      "rewards/verify_math_reward/mean": 0.6361607313156128,
      "rewards/verify_math_reward/std": 0.4813718795776367,
      "step": 734
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1484375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3257.0,
      "completions/mean_length": 1119.2410888671875,
      "completions/mean_terminated_length": 600.3565063476562,
      "completions/min_length": 145.0,
      "completions/min_terminated_length": 145.0,
      "epoch": 6.867638483965014,
      "grad_norm": 0.15156620740890503,
      "learning_rate": 1e-06,
      "loss": -0.0817,
      "num_tokens": 437247871.0,
      "reward": 0.629464328289032,
      "reward_std": 0.15469737350940704,
      "rewards/verify_math_reward/mean": 0.6294642686843872,
      "rewards/verify_math_reward/std": 0.4832179844379425,
      "step": 735
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0859375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2847.0,
      "completions/mean_length": 913.8527221679688,
      "completions/mean_terminated_length": 614.6764526367188,
      "completions/min_length": 149.0,
      "completions/min_terminated_length": 149.0,
      "epoch": 6.876967930029155,
      "grad_norm": 0.148374542593956,
      "learning_rate": 1e-06,
      "loss": -0.0378,
      "num_tokens": 437851427.0,
      "reward": 0.6908482313156128,
      "reward_std": 0.14635653793811798,
      "rewards/verify_math_reward/mean": 0.6908482313156128,
      "rewards/verify_math_reward/std": 0.46240198612213135,
      "step": 736
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0636160714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3818.0,
      "completions/mean_length": 819.4017944335938,
      "completions/mean_terminated_length": 596.7962036132812,
      "completions/min_length": 144.0,
      "completions/min_terminated_length": 144.0,
      "epoch": 6.886297376093294,
      "grad_norm": 0.14753498136997223,
      "learning_rate": 1e-06,
      "loss": -0.0375,
      "num_tokens": 438451155.0,
      "reward": 0.6651785969734192,
      "reward_std": 0.1374947875738144,
      "rewards/verify_math_reward/mean": 0.6651785969734192,
      "rewards/verify_math_reward/std": 0.47219160199165344,
      "step": 737
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1037946428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3703.0,
      "completions/mean_length": 1006.2266235351562,
      "completions/mean_terminated_length": 648.38232421875,
      "completions/min_length": 155.0,
      "completions/min_terminated_length": 155.0,
      "epoch": 6.895626822157435,
      "grad_norm": 0.14850552380084991,
      "learning_rate": 1e-06,
      "loss": -0.0263,
      "num_tokens": 439065878.0,
      "reward": 0.6037946939468384,
      "reward_std": 0.1515752673149109,
      "rewards/verify_math_reward/mean": 0.6037946343421936,
      "rewards/verify_math_reward/std": 0.48938122391700745,
      "step": 738
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3984.0,
      "completions/mean_length": 942.4163208007812,
      "completions/mean_terminated_length": 585.9241943359375,
      "completions/min_length": 106.0,
      "completions/min_terminated_length": 106.0,
      "epoch": 6.904956268221574,
      "grad_norm": 0.15253846347332,
      "learning_rate": 1e-06,
      "loss": -0.028,
      "num_tokens": 439618731.0,
      "reward": 0.6651785969734192,
      "reward_std": 0.14977288246154785,
      "rewards/verify_math_reward/mean": 0.6651785969734192,
      "rewards/verify_math_reward/std": 0.47219157218933105,
      "step": 739
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1238839285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3940.0,
      "completions/mean_length": 1075.78466796875,
      "completions/mean_terminated_length": 648.7222900390625,
      "completions/min_length": 157.0,
      "completions/min_terminated_length": 157.0,
      "epoch": 6.914285714285715,
      "grad_norm": 0.13778938353061676,
      "learning_rate": 1e-06,
      "loss": -0.0507,
      "num_tokens": 440222378.0,
      "reward": 0.5948660969734192,
      "reward_std": 0.1310618817806244,
      "rewards/verify_math_reward/mean": 0.5948660969734192,
      "rewards/verify_math_reward/std": 0.49119213223457336,
      "step": 740
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0915178571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3878.0,
      "completions/mean_length": 904.69091796875,
      "completions/mean_terminated_length": 583.2075805664062,
      "completions/min_length": 138.0,
      "completions/min_terminated_length": 138.0,
      "epoch": 6.923615160349854,
      "grad_norm": 0.16547341644763947,
      "learning_rate": 1e-06,
      "loss": -0.0479,
      "num_tokens": 440777597.0,
      "reward": 0.6808035969734192,
      "reward_std": 0.1654791235923767,
      "rewards/verify_math_reward/mean": 0.6808035969734192,
      "rewards/verify_math_reward/std": 0.4664256274700165,
      "step": 741
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0926339285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3951.0,
      "completions/mean_length": 931.1998291015625,
      "completions/mean_terminated_length": 608.10205078125,
      "completions/min_length": 120.0,
      "completions/min_terminated_length": 120.0,
      "epoch": 6.932944606413994,
      "grad_norm": 0.1453477442264557,
      "learning_rate": 1e-06,
      "loss": -0.0428,
      "num_tokens": 441356488.0,
      "reward": 0.6517857313156128,
      "reward_std": 0.14203867316246033,
      "rewards/verify_math_reward/mean": 0.6517857313156128,
      "rewards/verify_math_reward/std": 0.47667041420936584,
      "step": 742
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0926339285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2650.0,
      "completions/mean_length": 905.6473388671875,
      "completions/mean_terminated_length": 579.94091796875,
      "completions/min_length": 161.0,
      "completions/min_terminated_length": 161.0,
      "epoch": 6.942274052478134,
      "grad_norm": 0.13660424947738647,
      "learning_rate": 1e-06,
      "loss": -0.0392,
      "num_tokens": 441922948.0,
      "reward": 0.640625,
      "reward_std": 0.14992626011371613,
      "rewards/verify_math_reward/mean": 0.640625,
      "rewards/verify_math_reward/std": 0.48008525371551514,
      "step": 743
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.140625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3565.0,
      "completions/mean_length": 1148.8125,
      "completions/mean_terminated_length": 666.5454711914062,
      "completions/min_length": 109.0,
      "completions/min_terminated_length": 109.0,
      "epoch": 6.9516034985422746,
      "grad_norm": 0.15369150042533875,
      "learning_rate": 1e-06,
      "loss": -0.0847,
      "num_tokens": 442522492.0,
      "reward": 0.5881696939468384,
      "reward_std": 0.17758752405643463,
      "rewards/verify_math_reward/mean": 0.5881696343421936,
      "rewards/verify_math_reward/std": 0.4924395978450775,
      "step": 744
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1037946428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3203.0,
      "completions/mean_length": 980.9855346679688,
      "completions/mean_terminated_length": 620.2178955078125,
      "completions/min_length": 132.0,
      "completions/min_terminated_length": 132.0,
      "epoch": 6.960932944606414,
      "grad_norm": 0.13597574830055237,
      "learning_rate": 1e-06,
      "loss": -0.0526,
      "num_tokens": 443111087.0,
      "reward": 0.6674107313156128,
      "reward_std": 0.13699373602867126,
      "rewards/verify_math_reward/mean": 0.6674107313156128,
      "rewards/verify_math_reward/std": 0.47140389680862427,
      "step": 745
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0982142857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2414.0,
      "completions/mean_length": 947.6395263671875,
      "completions/mean_terminated_length": 604.748779296875,
      "completions/min_length": 151.0,
      "completions/min_terminated_length": 151.0,
      "epoch": 6.970262390670554,
      "grad_norm": 0.15619629621505737,
      "learning_rate": 1e-06,
      "loss": -0.055,
      "num_tokens": 443690644.0,
      "reward": 0.6517857313156128,
      "reward_std": 0.14707191288471222,
      "rewards/verify_math_reward/mean": 0.6517857313156128,
      "rewards/verify_math_reward/std": 0.47667041420936584,
      "step": 746
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1104910714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2988.0,
      "completions/mean_length": 975.1797485351562,
      "completions/mean_terminated_length": 587.5244750976562,
      "completions/min_length": 124.0,
      "completions/min_terminated_length": 124.0,
      "epoch": 6.979591836734694,
      "grad_norm": 0.17877110838890076,
      "learning_rate": 1e-06,
      "loss": -0.0382,
      "num_tokens": 444245413.0,
      "reward": 0.637276828289032,
      "reward_std": 0.15500116348266602,
      "rewards/verify_math_reward/mean": 0.6372767686843872,
      "rewards/verify_math_reward/std": 0.481054425239563,
      "step": 747
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0959821428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3919.0,
      "completions/mean_length": 935.318115234375,
      "completions/mean_terminated_length": 599.7395629882812,
      "completions/min_length": 143.0,
      "completions/min_terminated_length": 143.0,
      "epoch": 6.988921282798834,
      "grad_norm": 0.16470494866371155,
      "learning_rate": 1e-06,
      "loss": -0.0359,
      "num_tokens": 444811578.0,
      "reward": 0.6886160969734192,
      "reward_std": 0.15000112354755402,
      "rewards/verify_math_reward/mean": 0.6886160969734192,
      "rewards/verify_math_reward/std": 0.46331802010536194,
      "step": 748
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.13068181818181823,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2026.0,
      "completions/mean_length": 1023.7614135742188,
      "completions/mean_terminated_length": 561.9215698242188,
      "completions/min_length": 143.0,
      "completions/min_terminated_length": 143.0,
      "epoch": 6.998250728862974,
      "grad_norm": 0.1541009545326233,
      "learning_rate": 1e-06,
      "loss": -0.0582,
      "num_tokens": 445353419.0,
      "reward": 0.606026828289032,
      "reward_std": 0.1420711725950241,
      "rewards/verify_math_reward/mean": 0.6060267686843872,
      "rewards/verify_math_reward/std": 0.48890194296836853,
      "step": 749
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1160714285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3964.0,
      "completions/mean_length": 1002.380615234375,
      "completions/mean_terminated_length": 596.147705078125,
      "completions/min_length": 145.0,
      "completions/min_terminated_length": 145.0,
      "epoch": 7.0093294460641395,
      "grad_norm": 0.17025308310985565,
      "learning_rate": 1e-06,
      "loss": -0.0418,
      "num_tokens": 445921456.0,
      "reward": 0.6462053656578064,
      "reward_std": 0.16296431422233582,
      "rewards/verify_math_reward/mean": 0.6462053656578064,
      "rewards/verify_math_reward/std": 0.478413462638855,
      "step": 750
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1026785714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3364.0,
      "completions/mean_length": 930.5301513671875,
      "completions/mean_terminated_length": 568.3121948242188,
      "completions/min_length": 129.0,
      "completions/min_terminated_length": 129.0,
      "epoch": 7.01865889212828,
      "grad_norm": 0.15315227210521698,
      "learning_rate": 1e-06,
      "loss": -0.073,
      "num_tokens": 446469563.0,
      "reward": 0.6852678656578064,
      "reward_std": 0.16258524358272552,
      "rewards/verify_math_reward/mean": 0.6852678656578064,
      "rewards/verify_math_reward/std": 0.46466848254203796,
      "step": 751
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1037946428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3210.0,
      "completions/mean_length": 967.51904296875,
      "completions/mean_terminated_length": 605.1917724609375,
      "completions/min_length": 170.0,
      "completions/min_terminated_length": 170.0,
      "epoch": 7.0279883381924195,
      "grad_norm": 0.14817029237747192,
      "learning_rate": 1e-06,
      "loss": -0.0538,
      "num_tokens": 447039844.0,
      "reward": 0.6584821939468384,
      "reward_std": 0.1579635739326477,
      "rewards/verify_math_reward/mean": 0.6584821343421936,
      "rewards/verify_math_reward/std": 0.4744836091995239,
      "step": 752
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1238839285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3686.0,
      "completions/mean_length": 1061.4320068359375,
      "completions/mean_terminated_length": 632.3401489257812,
      "completions/min_length": 146.0,
      "completions/min_terminated_length": 146.0,
      "epoch": 7.03731778425656,
      "grad_norm": 0.12313847243785858,
      "learning_rate": 1e-06,
      "loss": -0.0621,
      "num_tokens": 447621887.0,
      "reward": 0.652901828289032,
      "reward_std": 0.124261274933815,
      "rewards/verify_math_reward/mean": 0.6529017686843872,
      "rewards/verify_math_reward/std": 0.47631317377090454,
      "step": 753
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.109375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3793.0,
      "completions/mean_length": 982.216552734375,
      "completions/mean_terminated_length": 599.822021484375,
      "completions/min_length": 155.0,
      "completions/min_terminated_length": 155.0,
      "epoch": 7.0466472303206995,
      "grad_norm": 0.16664567589759827,
      "learning_rate": 1e-06,
      "loss": -0.0284,
      "num_tokens": 448187449.0,
      "reward": 0.6674107313156128,
      "reward_std": 0.13154971599578857,
      "rewards/verify_math_reward/mean": 0.6674107313156128,
      "rewards/verify_math_reward/std": 0.47140392661094666,
      "step": 754
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0982142857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3134.0,
      "completions/mean_length": 973.4129638671875,
      "completions/mean_terminated_length": 633.3292236328125,
      "completions/min_length": 150.0,
      "completions/min_terminated_length": 150.0,
      "epoch": 7.05597667638484,
      "grad_norm": 0.15151433646678925,
      "learning_rate": 1e-06,
      "loss": -0.0368,
      "num_tokens": 448799859.0,
      "reward": 0.6484375,
      "reward_std": 0.15251775085926056,
      "rewards/verify_math_reward/mean": 0.6484375,
      "rewards/verify_math_reward/std": 0.4777248501777649,
      "step": 755
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0959821428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3613.0,
      "completions/mean_length": 969.2623291015625,
      "completions/mean_terminated_length": 637.2876586914062,
      "completions/min_length": 143.0,
      "completions/min_terminated_length": 143.0,
      "epoch": 7.0653061224489795,
      "grad_norm": 0.16572339832782745,
      "learning_rate": 1e-06,
      "loss": -0.0277,
      "num_tokens": 449405134.0,
      "reward": 0.6696428656578064,
      "reward_std": 0.1702873855829239,
      "rewards/verify_math_reward/mean": 0.6696428656578064,
      "rewards/verify_math_reward/std": 0.47060438990592957,
      "step": 756
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3684.0,
      "completions/mean_length": 921.01904296875,
      "completions/mean_terminated_length": 562.1080932617188,
      "completions/min_length": 147.0,
      "completions/min_terminated_length": 147.0,
      "epoch": 7.07463556851312,
      "grad_norm": 0.1490185409784317,
      "learning_rate": 1e-06,
      "loss": -0.0276,
      "num_tokens": 449944871.0,
      "reward": 0.6640625,
      "reward_std": 0.12549659609794617,
      "rewards/verify_math_reward/mean": 0.6640625,
      "rewards/verify_math_reward/std": 0.4725809693336487,
      "step": 757
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1116071428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3951.0,
      "completions/mean_length": 989.7902221679688,
      "completions/mean_terminated_length": 599.5628051757812,
      "completions/min_length": 136.0,
      "completions/min_terminated_length": 136.0,
      "epoch": 7.0839650145772595,
      "grad_norm": 0.17254501581192017,
      "learning_rate": 1e-06,
      "loss": -0.0489,
      "num_tokens": 450510115.0,
      "reward": 0.6830357313156128,
      "reward_std": 0.16882342100143433,
      "rewards/verify_math_reward/mean": 0.6830357313156128,
      "rewards/verify_math_reward/std": 0.46555325388908386,
      "step": 758
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0948660714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2861.0,
      "completions/mean_length": 924.8203735351562,
      "completions/mean_terminated_length": 592.4525146484375,
      "completions/min_length": 97.0,
      "completions/min_terminated_length": 97.0,
      "epoch": 7.093294460641399,
      "grad_norm": 0.15800072252750397,
      "learning_rate": 1e-06,
      "loss": -0.0347,
      "num_tokens": 451082794.0,
      "reward": 0.637276828289032,
      "reward_std": 0.15526078641414642,
      "rewards/verify_math_reward/mean": 0.6372767686843872,
      "rewards/verify_math_reward/std": 0.481054425239563,
      "step": 759
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0837053571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3650.0,
      "completions/mean_length": 909.0703735351562,
      "completions/mean_terminated_length": 617.9379272460938,
      "completions/min_length": 132.0,
      "completions/min_terminated_length": 132.0,
      "epoch": 7.1026239067055394,
      "grad_norm": 0.14929740130901337,
      "learning_rate": 1e-06,
      "loss": -0.0263,
      "num_tokens": 451682041.0,
      "reward": 0.668526828289032,
      "reward_std": 0.15680059790611267,
      "rewards/verify_math_reward/mean": 0.6685267686843872,
      "rewards/verify_math_reward/std": 0.4710056483745575,
      "step": 760
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1328125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3891.0,
      "completions/mean_length": 1049.28466796875,
      "completions/mean_terminated_length": 582.6705322265625,
      "completions/min_length": 141.0,
      "completions/min_terminated_length": 141.0,
      "epoch": 7.111953352769679,
      "grad_norm": 0.13264919817447662,
      "learning_rate": 1e-06,
      "loss": -0.0653,
      "num_tokens": 452217504.0,
      "reward": 0.660714328289032,
      "reward_std": 0.11817465722560883,
      "rewards/verify_math_reward/mean": 0.6607142686843872,
      "rewards/verify_math_reward/std": 0.4737313687801361,
      "step": 761
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1205357142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3851.0,
      "completions/mean_length": 1064.227783203125,
      "completions/mean_terminated_length": 648.70556640625,
      "completions/min_length": 159.0,
      "completions/min_terminated_length": 159.0,
      "epoch": 7.121282798833819,
      "grad_norm": 0.13209392130374908,
      "learning_rate": 1e-06,
      "loss": -0.0529,
      "num_tokens": 452819028.0,
      "reward": 0.6116071939468384,
      "reward_std": 0.13383881747722626,
      "rewards/verify_math_reward/mean": 0.6116071343421936,
      "rewards/verify_math_reward/std": 0.48765692114830017,
      "step": 762
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0915178571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3683.0,
      "completions/mean_length": 948.7813110351562,
      "completions/mean_terminated_length": 631.7395629882812,
      "completions/min_length": 139.0,
      "completions/min_terminated_length": 139.0,
      "epoch": 7.130612244897959,
      "grad_norm": 0.16158021986484528,
      "learning_rate": 1e-06,
      "loss": -0.0274,
      "num_tokens": 453428016.0,
      "reward": 0.6205357313156128,
      "reward_std": 0.18678276240825653,
      "rewards/verify_math_reward/mean": 0.6205357313156128,
      "rewards/verify_math_reward/std": 0.4855247139930725,
      "step": 763
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.078125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2822.0,
      "completions/mean_length": 890.9029541015625,
      "completions/mean_terminated_length": 619.2845458984375,
      "completions/min_length": 119.0,
      "completions/min_terminated_length": 119.0,
      "epoch": 7.139941690962099,
      "grad_norm": 0.15226466953754425,
      "learning_rate": 1e-06,
      "loss": -0.028,
      "num_tokens": 454019745.0,
      "reward": 0.6964285969734192,
      "reward_std": 0.1516169160604477,
      "rewards/verify_math_reward/mean": 0.6964285969734192,
      "rewards/verify_math_reward/std": 0.4600566029548645,
      "step": 764
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1171875,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3380.0,
      "completions/mean_length": 1041.1239013671875,
      "completions/mean_terminated_length": 635.609375,
      "completions/min_length": 131.0,
      "completions/min_terminated_length": 131.0,
      "epoch": 7.149271137026239,
      "grad_norm": 0.13410533964633942,
      "learning_rate": 1e-06,
      "loss": -0.0143,
      "num_tokens": 454621984.0,
      "reward": 0.6473214626312256,
      "reward_std": 0.14710794389247894,
      "rewards/verify_math_reward/mean": 0.6473214030265808,
      "rewards/verify_math_reward/std": 0.47807061672210693,
      "step": 765
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1160714285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3638.0,
      "completions/mean_length": 989.8973388671875,
      "completions/mean_terminated_length": 582.0252685546875,
      "completions/min_length": 169.0,
      "completions/min_terminated_length": 169.0,
      "epoch": 7.158600583090379,
      "grad_norm": 0.26457151770591736,
      "learning_rate": 1e-06,
      "loss": -0.0207,
      "num_tokens": 455165900.0,
      "reward": 0.6729910969734192,
      "reward_std": 0.10431172698736191,
      "rewards/verify_math_reward/mean": 0.6729910969734192,
      "rewards/verify_math_reward/std": 0.46938255429267883,
      "step": 766
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1004464285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2437.0,
      "completions/mean_length": 978.0636596679688,
      "completions/mean_terminated_length": 629.9069213867188,
      "completions/min_length": 137.0,
      "completions/min_terminated_length": 137.0,
      "epoch": 7.167930029154519,
      "grad_norm": 0.14636722207069397,
      "learning_rate": 1e-06,
      "loss": -0.0351,
      "num_tokens": 455766989.0,
      "reward": 0.6082589626312256,
      "reward_std": 0.15860366821289062,
      "rewards/verify_math_reward/mean": 0.6082589030265808,
      "rewards/verify_math_reward/std": 0.4884119927883148,
      "step": 767
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1116071428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3490.0,
      "completions/mean_length": 975.2277221679688,
      "completions/mean_terminated_length": 583.1708374023438,
      "completions/min_length": 181.0,
      "completions/min_terminated_length": 181.0,
      "epoch": 7.1772594752186585,
      "grad_norm": 0.15124312043190002,
      "learning_rate": 1e-06,
      "loss": -0.0414,
      "num_tokens": 456317273.0,
      "reward": 0.676339328289032,
      "reward_std": 0.14913208782672882,
      "rewards/verify_math_reward/mean": 0.6763392686843872,
      "rewards/verify_math_reward/std": 0.4681335985660553,
      "step": 768
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0870535714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2608.0,
      "completions/mean_length": 921.6317138671875,
      "completions/mean_terminated_length": 618.9413452148438,
      "completions/min_length": 162.0,
      "completions/min_terminated_length": 162.0,
      "epoch": 7.186588921282799,
      "grad_norm": 0.15408408641815186,
      "learning_rate": 1e-06,
      "loss": -0.0599,
      "num_tokens": 456911607.0,
      "reward": 0.6718750596046448,
      "reward_std": 0.1886628121137619,
      "rewards/verify_math_reward/mean": 0.671875,
      "rewards/verify_math_reward/std": 0.46979284286499023,
      "step": 769
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1160714285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2836.0,
      "completions/mean_length": 1032.5279541015625,
      "completions/mean_terminated_length": 630.2537841796875,
      "completions/min_length": 122.0,
      "completions/min_terminated_length": 122.0,
      "epoch": 7.1959183673469385,
      "grad_norm": 0.1400507092475891,
      "learning_rate": 1e-06,
      "loss": -0.0645,
      "num_tokens": 457503288.0,
      "reward": 0.6495535969734192,
      "reward_std": 0.13929423689842224,
      "rewards/verify_math_reward/mean": 0.6495535969734192,
      "rewards/verify_math_reward/std": 0.477376252412796,
      "step": 770
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1104910714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3766.0,
      "completions/mean_length": 1033.84716796875,
      "completions/mean_terminated_length": 653.4793090820312,
      "completions/min_length": 162.0,
      "completions/min_terminated_length": 162.0,
      "epoch": 7.205247813411079,
      "grad_norm": 0.14792729914188385,
      "learning_rate": 1e-06,
      "loss": -0.0609,
      "num_tokens": 458109543.0,
      "reward": 0.6160714626312256,
      "reward_std": 0.13737602531909943,
      "rewards/verify_math_reward/mean": 0.6160714030265808,
      "rewards/verify_math_reward/std": 0.486612468957901,
      "step": 771
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4023.0,
      "completions/mean_length": 1061.501220703125,
      "completions/mean_terminated_length": 628.0012817382812,
      "completions/min_length": 98.0,
      "completions/min_terminated_length": 98.0,
      "epoch": 7.214577259475218,
      "grad_norm": 0.15368112921714783,
      "learning_rate": 1e-06,
      "loss": -0.0489,
      "num_tokens": 458695288.0,
      "reward": 0.6506696939468384,
      "reward_std": 0.1612711399793625,
      "rewards/verify_math_reward/mean": 0.6506696343421936,
      "rewards/verify_math_reward/std": 0.47702476382255554,
      "step": 772
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.109375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2485.0,
      "completions/mean_length": 947.9676513671875,
      "completions/mean_terminated_length": 561.3671875,
      "completions/min_length": 165.0,
      "completions/min_terminated_length": 165.0,
      "epoch": 7.223906705539359,
      "grad_norm": 0.17014342546463013,
      "learning_rate": 1e-06,
      "loss": -0.0722,
      "num_tokens": 459238139.0,
      "reward": 0.6116071939468384,
      "reward_std": 0.15965421497821808,
      "rewards/verify_math_reward/mean": 0.6116071343421936,
      "rewards/verify_math_reward/std": 0.4876568913459778,
      "step": 773
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0814732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3735.0,
      "completions/mean_length": 879.8114013671875,
      "completions/mean_terminated_length": 594.535888671875,
      "completions/min_length": 160.0,
      "completions/min_terminated_length": 160.0,
      "epoch": 7.233236151603498,
      "grad_norm": 0.16187119483947754,
      "learning_rate": 1e-06,
      "loss": -0.0145,
      "num_tokens": 459819258.0,
      "reward": 0.6316964626312256,
      "reward_std": 0.16394074261188507,
      "rewards/verify_math_reward/mean": 0.6316964030265808,
      "rewards/verify_math_reward/std": 0.4826137125492096,
      "step": 774
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1071428571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3808.0,
      "completions/mean_length": 1040.4710693359375,
      "completions/mean_terminated_length": 673.8074951171875,
      "completions/min_length": 165.0,
      "completions/min_terminated_length": 165.0,
      "epoch": 7.242565597667639,
      "grad_norm": 0.14009040594100952,
      "learning_rate": 1e-06,
      "loss": -0.0546,
      "num_tokens": 460449592.0,
      "reward": 0.6707589626312256,
      "reward_std": 0.16818967461585999,
      "rewards/verify_math_reward/mean": 0.6707589030265808,
      "rewards/verify_math_reward/std": 0.4702001214027405,
      "step": 775
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1004464285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3036.0,
      "completions/mean_length": 983.7344360351562,
      "completions/mean_terminated_length": 636.2109375,
      "completions/min_length": 151.0,
      "completions/min_terminated_length": 151.0,
      "epoch": 7.251895043731778,
      "grad_norm": 0.15002386271953583,
      "learning_rate": 1e-06,
      "loss": -0.0631,
      "num_tokens": 461048362.0,
      "reward": 0.65625,
      "reward_std": 0.1685623973608017,
      "rewards/verify_math_reward/mean": 0.65625,
      "rewards/verify_math_reward/std": 0.4752241373062134,
      "step": 776
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0948660714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3524.0,
      "completions/mean_length": 930.8270263671875,
      "completions/mean_terminated_length": 599.0887451171875,
      "completions/min_length": 159.0,
      "completions/min_terminated_length": 159.0,
      "epoch": 7.261224489795918,
      "grad_norm": 0.1424115151166916,
      "learning_rate": 1e-06,
      "loss": -0.0463,
      "num_tokens": 461616487.0,
      "reward": 0.6863839626312256,
      "reward_std": 0.1451198309659958,
      "rewards/verify_math_reward/mean": 0.6863839030265808,
      "rewards/verify_math_reward/std": 0.46422144770622253,
      "step": 777
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0792410714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3574.0,
      "completions/mean_length": 919.7489013671875,
      "completions/mean_terminated_length": 646.3988037109375,
      "completions/min_length": 104.0,
      "completions/min_terminated_length": 104.0,
      "epoch": 7.270553935860058,
      "grad_norm": 0.14443738758563995,
      "learning_rate": 1e-06,
      "loss": -0.0261,
      "num_tokens": 462235238.0,
      "reward": 0.6819196939468384,
      "reward_std": 0.15488353371620178,
      "rewards/verify_math_reward/mean": 0.6819196343421936,
      "rewards/verify_math_reward/std": 0.46599099040031433,
      "step": 778
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1573660714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3732.0,
      "completions/mean_length": 1221.9866943359375,
      "completions/mean_terminated_length": 685.2503662109375,
      "completions/min_length": 176.0,
      "completions/min_terminated_length": 176.0,
      "epoch": 7.279883381924198,
      "grad_norm": 0.18153712153434753,
      "learning_rate": 1e-06,
      "loss": -0.0503,
      "num_tokens": 462845138.0,
      "reward": 0.546875,
      "reward_std": 0.20715807378292084,
      "rewards/verify_math_reward/mean": 0.546875,
      "rewards/verify_math_reward/std": 0.4980759024620056,
      "step": 779
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1004464285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2835.0,
      "completions/mean_length": 972.5569458007812,
      "completions/mean_terminated_length": 623.7853393554688,
      "completions/min_length": 120.0,
      "completions/min_terminated_length": 120.0,
      "epoch": 7.289212827988338,
      "grad_norm": 0.14053218066692352,
      "learning_rate": 1e-06,
      "loss": -0.0342,
      "num_tokens": 463436133.0,
      "reward": 0.6361607313156128,
      "reward_std": 0.139630526304245,
      "rewards/verify_math_reward/mean": 0.6361607313156128,
      "rewards/verify_math_reward/std": 0.4813718795776367,
      "step": 780
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0848214285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3962.0,
      "completions/mean_length": 902.7254638671875,
      "completions/mean_terminated_length": 606.763427734375,
      "completions/min_length": 121.0,
      "completions/min_terminated_length": 121.0,
      "epoch": 7.298542274052478,
      "grad_norm": 0.14001287519931793,
      "learning_rate": 1e-06,
      "loss": -0.0687,
      "num_tokens": 464015983.0,
      "reward": 0.6886160969734192,
      "reward_std": 0.1677689254283905,
      "rewards/verify_math_reward/mean": 0.6886160969734192,
      "rewards/verify_math_reward/std": 0.46331802010536194,
      "step": 781
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1138392857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4016.0,
      "completions/mean_length": 1044.6451416015625,
      "completions/mean_terminated_length": 652.6574096679688,
      "completions/min_length": 143.0,
      "completions/min_terminated_length": 143.0,
      "epoch": 7.307871720116618,
      "grad_norm": 0.16297949850559235,
      "learning_rate": 1e-06,
      "loss": -0.0346,
      "num_tokens": 464627601.0,
      "reward": 0.6395089626312256,
      "reward_std": 0.17611047625541687,
      "rewards/verify_math_reward/mean": 0.6395089030265808,
      "rewards/verify_math_reward/std": 0.4804111421108246,
      "step": 782
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1049107142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2878.0,
      "completions/mean_length": 986.1417846679688,
      "completions/mean_terminated_length": 621.6446533203125,
      "completions/min_length": 110.0,
      "completions/min_terminated_length": 110.0,
      "epoch": 7.317201166180758,
      "grad_norm": 0.14394626021385193,
      "learning_rate": 1e-06,
      "loss": -0.0294,
      "num_tokens": 465211504.0,
      "reward": 0.6573660969734192,
      "reward_std": 0.15000224113464355,
      "rewards/verify_math_reward/mean": 0.6573660969734192,
      "rewards/verify_math_reward/std": 0.47485533356666565,
      "step": 783
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0926339285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3153.0,
      "completions/mean_length": 921.0301513671875,
      "completions/mean_terminated_length": 596.8942260742188,
      "completions/min_length": 126.0,
      "completions/min_terminated_length": 126.0,
      "epoch": 7.326530612244898,
      "grad_norm": 0.15465307235717773,
      "learning_rate": 1e-06,
      "loss": -0.0799,
      "num_tokens": 465786723.0,
      "reward": 0.6975446939468384,
      "reward_std": 0.17600058019161224,
      "rewards/verify_math_reward/mean": 0.6975446343421936,
      "rewards/verify_math_reward/std": 0.45957788825035095,
      "step": 784
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0825892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3377.0,
      "completions/mean_length": 900.732177734375,
      "completions/mean_terminated_length": 613.0802612304688,
      "completions/min_length": 156.0,
      "completions/min_terminated_length": 156.0,
      "epoch": 7.335860058309038,
      "grad_norm": 0.13416793942451477,
      "learning_rate": 1e-06,
      "loss": -0.0431,
      "num_tokens": 466372315.0,
      "reward": 0.6819196939468384,
      "reward_std": 0.1421799510717392,
      "rewards/verify_math_reward/mean": 0.6819196343421936,
      "rewards/verify_math_reward/std": 0.46599099040031433,
      "step": 785
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0870535714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3611.0,
      "completions/mean_length": 924.8359985351562,
      "completions/mean_terminated_length": 622.4511108398438,
      "completions/min_length": 158.0,
      "completions/min_terminated_length": 158.0,
      "epoch": 7.345189504373177,
      "grad_norm": 0.14363907277584076,
      "learning_rate": 1e-06,
      "loss": -0.0271,
      "num_tokens": 466971464.0,
      "reward": 0.6183035969734192,
      "reward_std": 0.14354610443115234,
      "rewards/verify_math_reward/mean": 0.6183035969734192,
      "rewards/verify_math_reward/std": 0.4860740303993225,
      "step": 786
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1238839285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3771.0,
      "completions/mean_length": 1063.4296875,
      "completions/mean_terminated_length": 634.620361328125,
      "completions/min_length": 105.0,
      "completions/min_terminated_length": 105.0,
      "epoch": 7.354518950437318,
      "grad_norm": 0.15622581541538239,
      "learning_rate": 1e-06,
      "loss": -0.0362,
      "num_tokens": 467549897.0,
      "reward": 0.652901828289032,
      "reward_std": 0.13921935856342316,
      "rewards/verify_math_reward/mean": 0.6529017686843872,
      "rewards/verify_math_reward/std": 0.47631317377090454,
      "step": 787
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0926339285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4048.0,
      "completions/mean_length": 977.0938110351562,
      "completions/mean_terminated_length": 658.681396484375,
      "completions/min_length": 145.0,
      "completions/min_terminated_length": 145.0,
      "epoch": 7.363848396501457,
      "grad_norm": 0.15423189103603363,
      "learning_rate": 1e-06,
      "loss": -0.0425,
      "num_tokens": 468180973.0,
      "reward": 0.6517857313156128,
      "reward_std": 0.15936140716075897,
      "rewards/verify_math_reward/mean": 0.6517857313156128,
      "rewards/verify_math_reward/std": 0.47667041420936584,
      "step": 788
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0948660714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3122.0,
      "completions/mean_length": 928.0714721679688,
      "completions/mean_terminated_length": 596.0443725585938,
      "completions/min_length": 170.0,
      "completions/min_terminated_length": 170.0,
      "epoch": 7.373177842565598,
      "grad_norm": 0.14016598463058472,
      "learning_rate": 1e-06,
      "loss": -0.0115,
      "num_tokens": 468750205.0,
      "reward": 0.7064732313156128,
      "reward_std": 0.1352359503507614,
      "rewards/verify_math_reward/mean": 0.7064732313156128,
      "rewards/verify_math_reward/std": 0.4556320011615753,
      "step": 789
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0725446428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2782.0,
      "completions/mean_length": 848.5178833007812,
      "completions/mean_terminated_length": 594.5030517578125,
      "completions/min_length": 143.0,
      "completions/min_terminated_length": 143.0,
      "epoch": 7.382507288629737,
      "grad_norm": 0.14416192471981049,
      "learning_rate": 1e-06,
      "loss": -0.0284,
      "num_tokens": 469334237.0,
      "reward": 0.699776828289032,
      "reward_std": 0.15526191890239716,
      "rewards/verify_math_reward/mean": 0.6997767686843872,
      "rewards/verify_math_reward/std": 0.4586109220981598,
      "step": 790
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0859375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3404.0,
      "completions/mean_length": 886.6295166015625,
      "completions/mean_terminated_length": 584.893798828125,
      "completions/min_length": 123.0,
      "completions/min_terminated_length": 123.0,
      "epoch": 7.391836734693878,
      "grad_norm": 0.15903300046920776,
      "learning_rate": 1e-06,
      "loss": -0.0207,
      "num_tokens": 469903841.0,
      "reward": 0.7031250596046448,
      "reward_std": 0.13200506567955017,
      "rewards/verify_math_reward/mean": 0.703125,
      "rewards/verify_math_reward/std": 0.4571361541748047,
      "step": 791
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0837053571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3602.0,
      "completions/mean_length": 870.7902221679688,
      "completions/mean_terminated_length": 576.1608276367188,
      "completions/min_length": 126.0,
      "completions/min_terminated_length": 126.0,
      "epoch": 7.401166180758017,
      "grad_norm": 0.15618284046649933,
      "learning_rate": 1e-06,
      "loss": -0.0152,
      "num_tokens": 470474541.0,
      "reward": 0.6573660969734192,
      "reward_std": 0.14060944318771362,
      "rewards/verify_math_reward/mean": 0.6573660969734192,
      "rewards/verify_math_reward/std": 0.47485533356666565,
      "step": 792
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1049107142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2756.0,
      "completions/mean_length": 954.94873046875,
      "completions/mean_terminated_length": 586.7955322265625,
      "completions/min_length": 174.0,
      "completions/min_terminated_length": 174.0,
      "epoch": 7.410495626822158,
      "grad_norm": 0.14724324643611908,
      "learning_rate": 1e-06,
      "loss": -0.059,
      "num_tokens": 471034199.0,
      "reward": 0.6964285969734192,
      "reward_std": 0.12918534874916077,
      "rewards/verify_math_reward/mean": 0.6964285969734192,
      "rewards/verify_math_reward/std": 0.4600566029548645,
      "step": 793
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0848214285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4016.0,
      "completions/mean_length": 923.33935546875,
      "completions/mean_terminated_length": 629.2877807617188,
      "completions/min_length": 140.0,
      "completions/min_terminated_length": 140.0,
      "epoch": 7.419825072886297,
      "grad_norm": 0.13723978400230408,
      "learning_rate": 1e-06,
      "loss": -0.0633,
      "num_tokens": 471636063.0,
      "reward": 0.691964328289032,
      "reward_std": 0.15713873505592346,
      "rewards/verify_math_reward/mean": 0.6919642686843872,
      "rewards/verify_math_reward/std": 0.4619392454624176,
      "step": 794
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0926339285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2838.0,
      "completions/mean_length": 902.7031860351562,
      "completions/mean_terminated_length": 576.6961669921875,
      "completions/min_length": 130.0,
      "completions/min_terminated_length": 130.0,
      "epoch": 7.429154518950437,
      "grad_norm": 0.13166898488998413,
      "learning_rate": 1e-06,
      "loss": -0.051,
      "num_tokens": 472185189.0,
      "reward": 0.7008928656578064,
      "reward_std": 0.1248999685049057,
      "rewards/verify_math_reward/mean": 0.7008928656578064,
      "rewards/verify_math_reward/std": 0.458122581243515,
      "step": 795
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1450892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3947.0,
      "completions/mean_length": 1197.5625,
      "completions/mean_terminated_length": 705.6605834960938,
      "completions/min_length": 126.0,
      "completions/min_terminated_length": 126.0,
      "epoch": 7.438483965014577,
      "grad_norm": 0.14812512695789337,
      "learning_rate": 1e-06,
      "loss": -0.0419,
      "num_tokens": 472825341.0,
      "reward": 0.5725446939468384,
      "reward_std": 0.16634789109230042,
      "rewards/verify_math_reward/mean": 0.5725446343421936,
      "rewards/verify_math_reward/std": 0.49498558044433594,
      "step": 796
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1328125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3671.0,
      "completions/mean_length": 1129.048095703125,
      "completions/mean_terminated_length": 674.6499633789062,
      "completions/min_length": 160.0,
      "completions/min_terminated_length": 160.0,
      "epoch": 7.447813411078717,
      "grad_norm": 0.17269019782543182,
      "learning_rate": 1e-06,
      "loss": -0.0854,
      "num_tokens": 473436480.0,
      "reward": 0.6082589626312256,
      "reward_std": 0.19043196737766266,
      "rewards/verify_math_reward/mean": 0.6082589030265808,
      "rewards/verify_math_reward/std": 0.48841196298599243,
      "step": 797
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1227678571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3639.0,
      "completions/mean_length": 1082.1038818359375,
      "completions/mean_terminated_length": 660.3117065429688,
      "completions/min_length": 149.0,
      "completions/min_terminated_length": 149.0,
      "epoch": 7.457142857142857,
      "grad_norm": 0.16014207899570465,
      "learning_rate": 1e-06,
      "loss": -0.0604,
      "num_tokens": 474051653.0,
      "reward": 0.6506696939468384,
      "reward_std": 0.16878922283649445,
      "rewards/verify_math_reward/mean": 0.6506696343421936,
      "rewards/verify_math_reward/std": 0.47702476382255554,
      "step": 798
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1116071428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3215.0,
      "completions/mean_length": 1040.265625,
      "completions/mean_terminated_length": 656.37939453125,
      "completions/min_length": 179.0,
      "completions/min_terminated_length": 179.0,
      "epoch": 7.466472303206997,
      "grad_norm": 0.16901756823062897,
      "learning_rate": 1e-06,
      "loss": -0.0898,
      "num_tokens": 474665787.0,
      "reward": 0.660714328289032,
      "reward_std": 0.162215456366539,
      "rewards/verify_math_reward/mean": 0.6607142686843872,
      "rewards/verify_math_reward/std": 0.4737313687801361,
      "step": 799
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0948660714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3149.0,
      "completions/mean_length": 981.9163208007812,
      "completions/mean_terminated_length": 655.5326538085938,
      "completions/min_length": 207.0,
      "completions/min_terminated_length": 207.0,
      "epoch": 7.475801749271137,
      "grad_norm": 0.12940995395183563,
      "learning_rate": 1e-06,
      "loss": -0.0486,
      "num_tokens": 475288656.0,
      "reward": 0.6886160969734192,
      "reward_std": 0.13203758001327515,
      "rewards/verify_math_reward/mean": 0.6886160969734192,
      "rewards/verify_math_reward/std": 0.46331802010536194,
      "step": 800
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2638.0,
      "completions/mean_length": 1063.0067138671875,
      "completions/mean_terminated_length": 629.721923828125,
      "completions/min_length": 147.0,
      "completions/min_terminated_length": 147.0,
      "epoch": 7.485131195335277,
      "grad_norm": 0.15580123662948608,
      "learning_rate": 1e-06,
      "loss": -0.0486,
      "num_tokens": 475865158.0,
      "reward": 0.6205357313156128,
      "reward_std": 0.16495990753173828,
      "rewards/verify_math_reward/mean": 0.6205357313156128,
      "rewards/verify_math_reward/std": 0.4855247139930725,
      "step": 801
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1741071428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3152.0,
      "completions/mean_length": 1256.993408203125,
      "completions/mean_terminated_length": 658.5,
      "completions/min_length": 161.0,
      "completions/min_terminated_length": 161.0,
      "epoch": 7.494460641399417,
      "grad_norm": 0.15995453298091888,
      "learning_rate": 1e-06,
      "loss": -0.1085,
      "num_tokens": 476437328.0,
      "reward": 0.621651828289032,
      "reward_std": 0.1811874955892563,
      "rewards/verify_math_reward/mean": 0.6216517686843872,
      "rewards/verify_math_reward/std": 0.4852459728717804,
      "step": 802
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1328125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2366.0,
      "completions/mean_length": 1078.6451416015625,
      "completions/mean_terminated_length": 616.5276489257812,
      "completions/min_length": 135.0,
      "completions/min_terminated_length": 135.0,
      "epoch": 7.503790087463557,
      "grad_norm": 0.15283119678497314,
      "learning_rate": 1e-06,
      "loss": -0.0598,
      "num_tokens": 477011042.0,
      "reward": 0.6752232313156128,
      "reward_std": 0.13583439588546753,
      "rewards/verify_math_reward/mean": 0.6752232313156128,
      "rewards/verify_math_reward/std": 0.46855294704437256,
      "step": 803
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1328125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3054.0,
      "completions/mean_length": 1082.6473388671875,
      "completions/mean_terminated_length": 621.1428833007812,
      "completions/min_length": 146.0,
      "completions/min_terminated_length": 146.0,
      "epoch": 7.513119533527696,
      "grad_norm": 0.16292116045951843,
      "learning_rate": 1e-06,
      "loss": -0.087,
      "num_tokens": 477585510.0,
      "reward": 0.6551339626312256,
      "reward_std": 0.1931736022233963,
      "rewards/verify_math_reward/mean": 0.6551339030265808,
      "rewards/verify_math_reward/std": 0.4755900800228119,
      "step": 804
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1294642857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2621.0,
      "completions/mean_length": 1074.352783203125,
      "completions/mean_terminated_length": 624.9794921875,
      "completions/min_length": 142.0,
      "completions/min_terminated_length": 142.0,
      "epoch": 7.522448979591837,
      "grad_norm": 0.1713329255580902,
      "learning_rate": 1e-06,
      "loss": -0.0794,
      "num_tokens": 478158146.0,
      "reward": 0.6517857313156128,
      "reward_std": 0.1782582849264145,
      "rewards/verify_math_reward/mean": 0.6517857313156128,
      "rewards/verify_math_reward/std": 0.47667041420936584,
      "step": 805
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1305803571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4011.0,
      "completions/mean_length": 1094.685302734375,
      "completions/mean_terminated_length": 643.91015625,
      "completions/min_length": 152.0,
      "completions/min_terminated_length": 152.0,
      "epoch": 7.531778425655976,
      "grad_norm": 0.1417725533246994,
      "learning_rate": 1e-06,
      "loss": -0.0742,
      "num_tokens": 478739040.0,
      "reward": 0.6540178656578064,
      "reward_std": 0.14011907577514648,
      "rewards/verify_math_reward/mean": 0.6540178656578064,
      "rewards/verify_math_reward/std": 0.475953072309494,
      "step": 806
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1272321428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3183.0,
      "completions/mean_length": 1092.0614013671875,
      "completions/mean_terminated_length": 654.1470336914062,
      "completions/min_length": 146.0,
      "completions/min_terminated_length": 146.0,
      "epoch": 7.541107871720117,
      "grad_norm": 0.15278641879558563,
      "learning_rate": 1e-06,
      "loss": -0.0658,
      "num_tokens": 479338223.0,
      "reward": 0.621651828289032,
      "reward_std": 0.12959763407707214,
      "rewards/verify_math_reward/mean": 0.6216517686843872,
      "rewards/verify_math_reward/std": 0.4852459728717804,
      "step": 807
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0837053571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3578.0,
      "completions/mean_length": 996.060302734375,
      "completions/mean_terminated_length": 712.8745727539062,
      "completions/min_length": 134.0,
      "completions/min_terminated_length": 134.0,
      "epoch": 7.550437317784256,
      "grad_norm": 0.11239632219076157,
      "learning_rate": 1e-06,
      "loss": -0.0411,
      "num_tokens": 480002885.0,
      "reward": 0.6171875,
      "reward_std": 0.13151581585407257,
      "rewards/verify_math_reward/mean": 0.6171875,
      "rewards/verify_math_reward/std": 0.4863446056842804,
      "step": 808
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1004464285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3557.0,
      "completions/mean_length": 1045.708740234375,
      "completions/mean_terminated_length": 705.10546875,
      "completions/min_length": 166.0,
      "completions/min_terminated_length": 166.0,
      "epoch": 7.559766763848397,
      "grad_norm": 0.1540139764547348,
      "learning_rate": 1e-06,
      "loss": -0.0256,
      "num_tokens": 480659024.0,
      "reward": 0.6238839626312256,
      "reward_std": 0.18370524048805237,
      "rewards/verify_math_reward/mean": 0.6238839030265808,
      "rewards/verify_math_reward/std": 0.4846802353858948,
      "step": 809
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1238839285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3606.0,
      "completions/mean_length": 1049.35498046875,
      "completions/mean_terminated_length": 618.555419921875,
      "completions/min_length": 192.0,
      "completions/min_terminated_length": 192.0,
      "epoch": 7.569096209912536,
      "grad_norm": 0.15035966038703918,
      "learning_rate": 1e-06,
      "loss": -0.0504,
      "num_tokens": 481246630.0,
      "reward": 0.6183035969734192,
      "reward_std": 0.14207187294960022,
      "rewards/verify_math_reward/mean": 0.6183035969734192,
      "rewards/verify_math_reward/std": 0.4860740303993225,
      "step": 810
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1104910714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2692.0,
      "completions/mean_length": 1057.97216796875,
      "completions/mean_terminated_length": 680.6010131835938,
      "completions/min_length": 184.0,
      "completions/min_terminated_length": 184.0,
      "epoch": 7.578425655976677,
      "grad_norm": 0.1580764353275299,
      "learning_rate": 1e-06,
      "loss": -0.0441,
      "num_tokens": 481874621.0,
      "reward": 0.640625,
      "reward_std": 0.16927708685398102,
      "rewards/verify_math_reward/mean": 0.640625,
      "rewards/verify_math_reward/std": 0.48008525371551514,
      "step": 811
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1071428571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4018.0,
      "completions/mean_length": 1048.33935546875,
      "completions/mean_terminated_length": 682.6199951171875,
      "completions/min_length": 144.0,
      "completions/min_terminated_length": 144.0,
      "epoch": 7.587755102040816,
      "grad_norm": 0.15379290282726288,
      "learning_rate": 1e-06,
      "loss": -0.0333,
      "num_tokens": 482513693.0,
      "reward": 0.6261160969734192,
      "reward_std": 0.1696154773235321,
      "rewards/verify_math_reward/mean": 0.6261160969734192,
      "rewards/verify_math_reward/std": 0.48410359025001526,
      "step": 812
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1238839285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3848.0,
      "completions/mean_length": 1058.646240234375,
      "completions/mean_terminated_length": 629.1605224609375,
      "completions/min_length": 177.0,
      "completions/min_terminated_length": 177.0,
      "epoch": 7.597084548104956,
      "grad_norm": 0.13407360017299652,
      "learning_rate": 1e-06,
      "loss": -0.0602,
      "num_tokens": 483095040.0,
      "reward": 0.6863839626312256,
      "reward_std": 0.1445111334323883,
      "rewards/verify_math_reward/mean": 0.6863839030265808,
      "rewards/verify_math_reward/std": 0.46422144770622253,
      "step": 813
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0926339285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3925.0,
      "completions/mean_length": 896.4553833007812,
      "completions/mean_terminated_length": 569.810546875,
      "completions/min_length": 185.0,
      "completions/min_terminated_length": 185.0,
      "epoch": 7.606413994169096,
      "grad_norm": 0.2240392565727234,
      "learning_rate": 1e-06,
      "loss": -0.0296,
      "num_tokens": 483647472.0,
      "reward": 0.7444196939468384,
      "reward_std": 0.15161871910095215,
      "rewards/verify_math_reward/mean": 0.7444196343421936,
      "rewards/verify_math_reward/std": 0.43643057346343994,
      "step": 814
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1350446428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3023.0,
      "completions/mean_length": 1093.907470703125,
      "completions/mean_terminated_length": 625.1935424804688,
      "completions/min_length": 149.0,
      "completions/min_terminated_length": 149.0,
      "epoch": 7.615743440233236,
      "grad_norm": 0.1418483555316925,
      "learning_rate": 1e-06,
      "loss": -0.0578,
      "num_tokens": 484217533.0,
      "reward": 0.6383928656578064,
      "reward_std": 0.13403315842151642,
      "rewards/verify_math_reward/mean": 0.6383928656578064,
      "rewards/verify_math_reward/std": 0.4807341694831848,
      "step": 815
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1339285714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3518.0,
      "completions/mean_length": 1123.6507568359375,
      "completions/mean_terminated_length": 664.0089721679688,
      "completions/min_length": 146.0,
      "completions/min_terminated_length": 146.0,
      "epoch": 7.625072886297376,
      "grad_norm": 0.13304497301578522,
      "learning_rate": 1e-06,
      "loss": -0.0609,
      "num_tokens": 484818956.0,
      "reward": 0.6082589626312256,
      "reward_std": 0.16953739523887634,
      "rewards/verify_math_reward/mean": 0.6082589030265808,
      "rewards/verify_math_reward/std": 0.48841196298599243,
      "step": 816
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1183035714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2673.0,
      "completions/mean_length": 1026.4676513671875,
      "completions/mean_terminated_length": 614.6063232421875,
      "completions/min_length": 180.0,
      "completions/min_terminated_length": 180.0,
      "epoch": 7.634402332361516,
      "grad_norm": 0.13836099207401276,
      "learning_rate": 1e-06,
      "loss": -0.0356,
      "num_tokens": 485390127.0,
      "reward": 0.6417410969734192,
      "reward_std": 0.11907297372817993,
      "rewards/verify_math_reward/mean": 0.6417410969734192,
      "rewards/verify_math_reward/std": 0.47975656390190125,
      "step": 817
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.140625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3681.0,
      "completions/mean_length": 1113.1295166015625,
      "completions/mean_terminated_length": 625.0233764648438,
      "completions/min_length": 140.0,
      "completions/min_terminated_length": 140.0,
      "epoch": 7.643731778425656,
      "grad_norm": 0.14426197111606598,
      "learning_rate": 1e-06,
      "loss": -0.0532,
      "num_tokens": 485962811.0,
      "reward": 0.6160714626312256,
      "reward_std": 0.13737604022026062,
      "rewards/verify_math_reward/mean": 0.6160714030265808,
      "rewards/verify_math_reward/std": 0.486612468957901,
      "step": 818
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1183035714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3459.0,
      "completions/mean_length": 1091.7132568359375,
      "completions/mean_terminated_length": 688.6063232421875,
      "completions/min_length": 167.0,
      "completions/min_terminated_length": 167.0,
      "epoch": 7.653061224489796,
      "grad_norm": 0.1458161324262619,
      "learning_rate": 1e-06,
      "loss": -0.0759,
      "num_tokens": 486604378.0,
      "reward": 0.6495535969734192,
      "reward_std": 0.189790740609169,
      "rewards/verify_math_reward/mean": 0.6495535969734192,
      "rewards/verify_math_reward/std": 0.477376252412796,
      "step": 819
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0870535714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3846.0,
      "completions/mean_length": 922.2756958007812,
      "completions/mean_terminated_length": 619.646728515625,
      "completions/min_length": 164.0,
      "completions/min_terminated_length": 164.0,
      "epoch": 7.662390670553936,
      "grad_norm": 0.13000454008579254,
      "learning_rate": 1e-06,
      "loss": -0.0476,
      "num_tokens": 487193817.0,
      "reward": 0.7053571939468384,
      "reward_std": 0.1187373623251915,
      "rewards/verify_math_reward/mean": 0.7053571343421936,
      "rewards/verify_math_reward/std": 0.45613667368888855,
      "step": 820
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1060267857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4067.0,
      "completions/mean_length": 1062.982177734375,
      "completions/mean_terminated_length": 703.2609252929688,
      "completions/min_length": 156.0,
      "completions/min_terminated_length": 156.0,
      "epoch": 7.671720116618076,
      "grad_norm": 0.14187565445899963,
      "learning_rate": 1e-06,
      "loss": -0.0644,
      "num_tokens": 487836217.0,
      "reward": 0.637276828289032,
      "reward_std": 0.16116377711296082,
      "rewards/verify_math_reward/mean": 0.6372767686843872,
      "rewards/verify_math_reward/std": 0.481054425239563,
      "step": 821
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1205357142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3502.0,
      "completions/mean_length": 1070.126220703125,
      "completions/mean_terminated_length": 655.4124145507812,
      "completions/min_length": 152.0,
      "completions/min_terminated_length": 152.0,
      "epoch": 7.681049562682215,
      "grad_norm": 0.1762189120054245,
      "learning_rate": 1e-06,
      "loss": -0.0506,
      "num_tokens": 488448978.0,
      "reward": 0.598214328289032,
      "reward_std": 0.18814216554164886,
      "rewards/verify_math_reward/mean": 0.5982142686843872,
      "rewards/verify_math_reward/std": 0.49053287506103516,
      "step": 822
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1227678571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2943.0,
      "completions/mean_length": 1025.888427734375,
      "completions/mean_terminated_length": 596.22900390625,
      "completions/min_length": 151.0,
      "completions/min_terminated_length": 151.0,
      "epoch": 7.690379008746356,
      "grad_norm": 0.1632724404335022,
      "learning_rate": 1e-06,
      "loss": -0.0484,
      "num_tokens": 489003726.0,
      "reward": 0.6517857313156128,
      "reward_std": 0.15521803498268127,
      "rewards/verify_math_reward/mean": 0.6517857313156128,
      "rewards/verify_math_reward/std": 0.47667041420936584,
      "step": 823
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.140625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3276.0,
      "completions/mean_length": 1104.294677734375,
      "completions/mean_terminated_length": 614.7428588867188,
      "completions/min_length": 187.0,
      "completions/min_terminated_length": 187.0,
      "epoch": 7.699708454810495,
      "grad_norm": 0.14527326822280884,
      "learning_rate": 1e-06,
      "loss": -0.0351,
      "num_tokens": 489564022.0,
      "reward": 0.6283482313156128,
      "reward_std": 0.12482258677482605,
      "rewards/verify_math_reward/mean": 0.6283482313156128,
      "rewards/verify_math_reward/std": 0.4835159480571747,
      "step": 824
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1026785714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4034.0,
      "completions/mean_length": 1017.67529296875,
      "completions/mean_terminated_length": 665.4290771484375,
      "completions/min_length": 161.0,
      "completions/min_terminated_length": 161.0,
      "epoch": 7.709037900874636,
      "grad_norm": 0.14603237807750702,
      "learning_rate": 1e-06,
      "loss": -0.0497,
      "num_tokens": 490194075.0,
      "reward": 0.6473214626312256,
      "reward_std": 0.1614982634782791,
      "rewards/verify_math_reward/mean": 0.6473214030265808,
      "rewards/verify_math_reward/std": 0.47807058691978455,
      "step": 825
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3739.0,
      "completions/mean_length": 993.1105346679688,
      "completions/mean_terminated_length": 642.3490600585938,
      "completions/min_length": 135.0,
      "completions/min_terminated_length": 135.0,
      "epoch": 7.718367346938775,
      "grad_norm": 0.17904680967330933,
      "learning_rate": 1e-06,
      "loss": -0.0551,
      "num_tokens": 490805278.0,
      "reward": 0.6540178656578064,
      "reward_std": 0.14695174992084503,
      "rewards/verify_math_reward/mean": 0.6540178656578064,
      "rewards/verify_math_reward/std": 0.4759531021118164,
      "step": 826
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1171875,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3780.0,
      "completions/mean_length": 1037.20654296875,
      "completions/mean_terminated_length": 631.1719360351562,
      "completions/min_length": 159.0,
      "completions/min_terminated_length": 159.0,
      "epoch": 7.727696793002916,
      "grad_norm": 0.15991447865962982,
      "learning_rate": 1e-06,
      "loss": -0.0338,
      "num_tokens": 491400135.0,
      "reward": 0.6584821939468384,
      "reward_std": 0.17017750442028046,
      "rewards/verify_math_reward/mean": 0.6584821343421936,
      "rewards/verify_math_reward/std": 0.4744836091995239,
      "step": 827
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1294642857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3643.0,
      "completions/mean_length": 1124.3248291015625,
      "completions/mean_terminated_length": 682.3833618164062,
      "completions/min_length": 115.0,
      "completions/min_terminated_length": 115.0,
      "epoch": 7.737026239067055,
      "grad_norm": 0.12999098002910614,
      "learning_rate": 1e-06,
      "loss": -0.0559,
      "num_tokens": 492025138.0,
      "reward": 0.6015625,
      "reward_std": 0.11396666616201401,
      "rewards/verify_math_reward/mean": 0.6015625,
      "rewards/verify_math_reward/std": 0.48984986543655396,
      "step": 828
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1127232142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3304.0,
      "completions/mean_length": 972.638427734375,
      "completions/mean_terminated_length": 575.8339233398438,
      "completions/min_length": 84.0,
      "completions/min_terminated_length": 84.0,
      "epoch": 7.746355685131196,
      "grad_norm": 0.14956091344356537,
      "learning_rate": 1e-06,
      "loss": -0.0403,
      "num_tokens": 492573070.0,
      "reward": 0.7031250596046448,
      "reward_std": 0.13771232962608337,
      "rewards/verify_math_reward/mean": 0.703125,
      "rewards/verify_math_reward/std": 0.4571361541748047,
      "step": 829
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0982142857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2985.0,
      "completions/mean_length": 975.2902221679688,
      "completions/mean_terminated_length": 635.410888671875,
      "completions/min_length": 192.0,
      "completions/min_terminated_length": 192.0,
      "epoch": 7.755685131195335,
      "grad_norm": 0.15239976346492767,
      "learning_rate": 1e-06,
      "loss": -0.0317,
      "num_tokens": 493179298.0,
      "reward": 0.6417410969734192,
      "reward_std": 0.14263640344142914,
      "rewards/verify_math_reward/mean": 0.6417410969734192,
      "rewards/verify_math_reward/std": 0.47975659370422363,
      "step": 830
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1171875,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3021.0,
      "completions/mean_length": 1054.7545166015625,
      "completions/mean_terminated_length": 651.04931640625,
      "completions/min_length": 123.0,
      "completions/min_terminated_length": 123.0,
      "epoch": 7.765014577259475,
      "grad_norm": 0.1242513582110405,
      "learning_rate": 1e-06,
      "loss": -0.0688,
      "num_tokens": 493781014.0,
      "reward": 0.6729910969734192,
      "reward_std": 0.13557226955890656,
      "rewards/verify_math_reward/mean": 0.6729910969734192,
      "rewards/verify_math_reward/std": 0.46938255429267883,
      "step": 831
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2772.0,
      "completions/mean_length": 1100.0592041015625,
      "completions/mean_terminated_length": 672.0675659179688,
      "completions/min_length": 145.0,
      "completions/min_terminated_length": 145.0,
      "epoch": 7.774344023323615,
      "grad_norm": 0.14618077874183655,
      "learning_rate": 1e-06,
      "loss": -0.0528,
      "num_tokens": 494389459.0,
      "reward": 0.5926339626312256,
      "reward_std": 0.13508763909339905,
      "rewards/verify_math_reward/mean": 0.5926339030265808,
      "rewards/verify_math_reward/std": 0.49161845445632935,
      "step": 832
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0970982142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3181.0,
      "completions/mean_length": 951.7355346679688,
      "completions/mean_terminated_length": 613.6007080078125,
      "completions/min_length": 149.0,
      "completions/min_terminated_length": 149.0,
      "epoch": 7.783673469387755,
      "grad_norm": 0.15234903991222382,
      "learning_rate": 1e-06,
      "loss": -0.0455,
      "num_tokens": 494974054.0,
      "reward": 0.6584821939468384,
      "reward_std": 0.15811581909656525,
      "rewards/verify_math_reward/mean": 0.6584821343421936,
      "rewards/verify_math_reward/std": 0.4744836091995239,
      "step": 833
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1116071428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3760.0,
      "completions/mean_length": 995.435302734375,
      "completions/mean_terminated_length": 605.9170532226562,
      "completions/min_length": 149.0,
      "completions/min_terminated_length": 149.0,
      "epoch": 7.793002915451895,
      "grad_norm": 0.13999764621257782,
      "learning_rate": 1e-06,
      "loss": -0.0525,
      "num_tokens": 495553460.0,
      "reward": 0.6819196939468384,
      "reward_std": 0.16266010701656342,
      "rewards/verify_math_reward/mean": 0.6819196343421936,
      "rewards/verify_math_reward/std": 0.46599099040031433,
      "step": 834
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1283482142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3498.0,
      "completions/mean_length": 1097.5067138671875,
      "completions/mean_terminated_length": 655.9871826171875,
      "completions/min_length": 145.0,
      "completions/min_terminated_length": 145.0,
      "epoch": 7.802332361516035,
      "grad_norm": 0.1538587510585785,
      "learning_rate": 1e-06,
      "loss": -0.0211,
      "num_tokens": 496158426.0,
      "reward": 0.6316964626312256,
      "reward_std": 0.13955636322498322,
      "rewards/verify_math_reward/mean": 0.6316964030265808,
      "rewards/verify_math_reward/std": 0.4826137125492096,
      "step": 835
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1171875,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3301.0,
      "completions/mean_length": 1032.2489013671875,
      "completions/mean_terminated_length": 625.5562744140625,
      "completions/min_length": 161.0,
      "completions/min_terminated_length": 161.0,
      "epoch": 7.811661807580175,
      "grad_norm": 0.14663149416446686,
      "learning_rate": 1e-06,
      "loss": -0.062,
      "num_tokens": 496753913.0,
      "reward": 0.6428571939468384,
      "reward_std": 0.1385025829076767,
      "rewards/verify_math_reward/mean": 0.6428571343421936,
      "rewards/verify_math_reward/std": 0.47942501306533813,
      "step": 836
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1149553571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3735.0,
      "completions/mean_length": 1041.985595703125,
      "completions/mean_terminated_length": 645.3102416992188,
      "completions/min_length": 179.0,
      "completions/min_terminated_length": 179.0,
      "epoch": 7.820991253644315,
      "grad_norm": 0.13347220420837402,
      "learning_rate": 1e-06,
      "loss": -0.065,
      "num_tokens": 497351548.0,
      "reward": 0.6930803656578064,
      "reward_std": 0.14684508740901947,
      "rewards/verify_math_reward/mean": 0.6930803656578064,
      "rewards/verify_math_reward/std": 0.46147334575653076,
      "step": 837
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1138392857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4092.0,
      "completions/mean_length": 1015.90185546875,
      "completions/mean_terminated_length": 620.2216186523438,
      "completions/min_length": 157.0,
      "completions/min_terminated_length": 157.0,
      "epoch": 7.830320699708455,
      "grad_norm": 0.14678119122982025,
      "learning_rate": 1e-06,
      "loss": -0.0327,
      "num_tokens": 497931148.0,
      "reward": 0.6618303656578064,
      "reward_std": 0.13516180217266083,
      "rewards/verify_math_reward/mean": 0.6618303656578064,
      "rewards/verify_math_reward/std": 0.4733508229255676,
      "step": 838
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1116071428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3982.0,
      "completions/mean_length": 986.7745971679688,
      "completions/mean_terminated_length": 596.1683349609375,
      "completions/min_length": 116.0,
      "completions/min_terminated_length": 116.0,
      "epoch": 7.839650145772595,
      "grad_norm": 0.16466213762760162,
      "learning_rate": 1e-06,
      "loss": -0.0646,
      "num_tokens": 498500978.0,
      "reward": 0.6495535969734192,
      "reward_std": 0.16848431527614594,
      "rewards/verify_math_reward/mean": 0.6495535969734192,
      "rewards/verify_math_reward/std": 0.477376252412796,
      "step": 839
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.09375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2018.0,
      "completions/mean_length": 944.2288208007812,
      "completions/mean_terminated_length": 618.1834716796875,
      "completions/min_length": 136.0,
      "completions/min_terminated_length": 136.0,
      "epoch": 7.848979591836734,
      "grad_norm": 0.13148349523544312,
      "learning_rate": 1e-06,
      "loss": -0.047,
      "num_tokens": 499087823.0,
      "reward": 0.6395089626312256,
      "reward_std": 0.14383850991725922,
      "rewards/verify_math_reward/mean": 0.6395089030265808,
      "rewards/verify_math_reward/std": 0.4804111421108246,
      "step": 840
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0814732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3220.0,
      "completions/mean_length": 858.7857666015625,
      "completions/mean_terminated_length": 571.6452026367188,
      "completions/min_length": 150.0,
      "completions/min_terminated_length": 150.0,
      "epoch": 7.858309037900875,
      "grad_norm": 0.14904271066188812,
      "learning_rate": 1e-06,
      "loss": -0.0629,
      "num_tokens": 499655687.0,
      "reward": 0.7053571939468384,
      "reward_std": 0.1356482356786728,
      "rewards/verify_math_reward/mean": 0.7053571343421936,
      "rewards/verify_math_reward/std": 0.45613664388656616,
      "step": 841
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0892857142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2659.0,
      "completions/mean_length": 964.7120971679688,
      "completions/mean_terminated_length": 657.7230834960938,
      "completions/min_length": 141.0,
      "completions/min_terminated_length": 141.0,
      "epoch": 7.867638483965014,
      "grad_norm": 0.12897950410842896,
      "learning_rate": 1e-06,
      "loss": -0.0339,
      "num_tokens": 500276957.0,
      "reward": 0.6417410969734192,
      "reward_std": 0.15409217774868011,
      "rewards/verify_math_reward/mean": 0.6417410969734192,
      "rewards/verify_math_reward/std": 0.47975659370422363,
      "step": 842
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0993303571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3099.0,
      "completions/mean_length": 925.7645263671875,
      "completions/mean_terminated_length": 576.1350708007812,
      "completions/min_length": 143.0,
      "completions/min_terminated_length": 143.0,
      "epoch": 7.876967930029155,
      "grad_norm": 0.14820946753025055,
      "learning_rate": 1e-06,
      "loss": -0.0341,
      "num_tokens": 500831066.0,
      "reward": 0.6696428656578064,
      "reward_std": 0.146052747964859,
      "rewards/verify_math_reward/mean": 0.6696428656578064,
      "rewards/verify_math_reward/std": 0.47060438990592957,
      "step": 843
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0959821428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3196.0,
      "completions/mean_length": 905.3660888671875,
      "completions/mean_terminated_length": 566.607421875,
      "completions/min_length": 148.0,
      "completions/min_terminated_length": 148.0,
      "epoch": 7.886297376093294,
      "grad_norm": 0.14712035655975342,
      "learning_rate": 1e-06,
      "loss": -0.0541,
      "num_tokens": 501373170.0,
      "reward": 0.7098214626312256,
      "reward_std": 0.13688749074935913,
      "rewards/verify_math_reward/mean": 0.7098214030265808,
      "rewards/verify_math_reward/std": 0.454098105430603,
      "step": 844
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0792410714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2908.0,
      "completions/mean_length": 911.32373046875,
      "completions/mean_terminated_length": 637.2484741210938,
      "completions/min_length": 144.0,
      "completions/min_terminated_length": 144.0,
      "epoch": 7.895626822157435,
      "grad_norm": 0.1415589302778244,
      "learning_rate": 1e-06,
      "loss": -0.0431,
      "num_tokens": 501996252.0,
      "reward": 0.676339328289032,
      "reward_std": 0.1504133939743042,
      "rewards/verify_math_reward/mean": 0.6763392686843872,
      "rewards/verify_math_reward/std": 0.4681335985660553,
      "step": 845
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1238839285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3235.0,
      "completions/mean_length": 1041.485595703125,
      "completions/mean_terminated_length": 609.5732421875,
      "completions/min_length": 178.0,
      "completions/min_terminated_length": 178.0,
      "epoch": 7.904956268221574,
      "grad_norm": 0.1677154153585434,
      "learning_rate": 1e-06,
      "loss": -0.0498,
      "num_tokens": 502571431.0,
      "reward": 0.6473214626312256,
      "reward_std": 0.16217337548732758,
      "rewards/verify_math_reward/mean": 0.6473214030265808,
      "rewards/verify_math_reward/std": 0.47807058691978455,
      "step": 846
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0982142857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3534.0,
      "completions/mean_length": 983.2176513671875,
      "completions/mean_terminated_length": 644.2017211914062,
      "completions/min_length": 127.0,
      "completions/min_terminated_length": 127.0,
      "epoch": 7.914285714285715,
      "grad_norm": 0.14427697658538818,
      "learning_rate": 1e-06,
      "loss": -0.0399,
      "num_tokens": 503192778.0,
      "reward": 0.6908482313156128,
      "reward_std": 0.15485143661499023,
      "rewards/verify_math_reward/mean": 0.6908482313156128,
      "rewards/verify_math_reward/std": 0.46240198612213135,
      "step": 847
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1205357142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3536.0,
      "completions/mean_length": 1031.899658203125,
      "completions/mean_terminated_length": 611.9467163085938,
      "completions/min_length": 169.0,
      "completions/min_terminated_length": 169.0,
      "epoch": 7.923615160349854,
      "grad_norm": 0.15892471373081207,
      "learning_rate": 1e-06,
      "loss": -0.0866,
      "num_tokens": 503766520.0,
      "reward": 0.637276828289032,
      "reward_std": 0.1653289794921875,
      "rewards/verify_math_reward/mean": 0.6372767686843872,
      "rewards/verify_math_reward/std": 0.481054425239563,
      "step": 848
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1004464285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3217.0,
      "completions/mean_length": 968.3147583007812,
      "completions/mean_terminated_length": 619.0694580078125,
      "completions/min_length": 176.0,
      "completions/min_terminated_length": 176.0,
      "epoch": 7.932944606413994,
      "grad_norm": 0.15579116344451904,
      "learning_rate": 1e-06,
      "loss": -0.0589,
      "num_tokens": 504356722.0,
      "reward": 0.6551339626312256,
      "reward_std": 0.14910070598125458,
      "rewards/verify_math_reward/mean": 0.6551339030265808,
      "rewards/verify_math_reward/std": 0.4755900800228119,
      "step": 849
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1439732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3780.0,
      "completions/mean_length": 1141.2857666015625,
      "completions/mean_terminated_length": 644.3389892578125,
      "completions/min_length": 173.0,
      "completions/min_terminated_length": 173.0,
      "epoch": 7.942274052478134,
      "grad_norm": 0.19498465955257416,
      "learning_rate": 1e-06,
      "loss": -0.0861,
      "num_tokens": 504945706.0,
      "reward": 0.625,
      "reward_std": 0.18036307394504547,
      "rewards/verify_math_reward/mean": 0.625,
      "rewards/verify_math_reward/std": 0.48439329862594604,
      "step": 850
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1071428571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3677.0,
      "completions/mean_length": 994.5078735351562,
      "completions/mean_terminated_length": 622.3287353515625,
      "completions/min_length": 151.0,
      "completions/min_terminated_length": 151.0,
      "epoch": 7.9516034985422746,
      "grad_norm": 0.15794239938259125,
      "learning_rate": 1e-06,
      "loss": -0.0805,
      "num_tokens": 505529721.0,
      "reward": 0.6540178656578064,
      "reward_std": 0.16292154788970947,
      "rewards/verify_math_reward/mean": 0.6540178656578064,
      "rewards/verify_math_reward/std": 0.4759531021118164,
      "step": 851
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1350446428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3753.0,
      "completions/mean_length": 1146.546875,
      "completions/mean_terminated_length": 686.0516357421875,
      "completions/min_length": 146.0,
      "completions/min_terminated_length": 146.0,
      "epoch": 7.960932944606414,
      "grad_norm": 0.14056451618671417,
      "learning_rate": 1e-06,
      "loss": -0.059,
      "num_tokens": 506150227.0,
      "reward": 0.6194196939468384,
      "reward_std": 0.16273680329322815,
      "rewards/verify_math_reward/mean": 0.6194196343421936,
      "rewards/verify_math_reward/std": 0.48580074310302734,
      "step": 852
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0848214285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2712.0,
      "completions/mean_length": 935.69873046875,
      "completions/mean_terminated_length": 642.7926635742188,
      "completions/min_length": 147.0,
      "completions/min_terminated_length": 147.0,
      "epoch": 7.970262390670554,
      "grad_norm": 0.15269824862480164,
      "learning_rate": 1e-06,
      "loss": -0.0557,
      "num_tokens": 506758189.0,
      "reward": 0.7165178656578064,
      "reward_std": 0.1716417372226715,
      "rewards/verify_math_reward/mean": 0.7165178656578064,
      "rewards/verify_math_reward/std": 0.4509401023387909,
      "step": 853
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1216517857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3916.0,
      "completions/mean_length": 1038.5748291015625,
      "completions/mean_terminated_length": 615.119384765625,
      "completions/min_length": 96.0,
      "completions/min_terminated_length": 96.0,
      "epoch": 7.979591836734694,
      "grad_norm": 0.1502271592617035,
      "learning_rate": 1e-06,
      "loss": -0.0767,
      "num_tokens": 507327736.0,
      "reward": 0.684151828289032,
      "reward_std": 0.14451251924037933,
      "rewards/verify_math_reward/mean": 0.6841517686843872,
      "rewards/verify_math_reward/std": 0.4651124179363251,
      "step": 854
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1238839285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3193.0,
      "completions/mean_length": 1061.78466796875,
      "completions/mean_terminated_length": 632.74267578125,
      "completions/min_length": 117.0,
      "completions/min_terminated_length": 117.0,
      "epoch": 7.988921282798834,
      "grad_norm": 0.1559712439775467,
      "learning_rate": 1e-06,
      "loss": -0.0677,
      "num_tokens": 507925551.0,
      "reward": 0.6395089626312256,
      "reward_std": 0.1562378853559494,
      "rewards/verify_math_reward/mean": 0.6395089030265808,
      "rewards/verify_math_reward/std": 0.4804111421108246,
      "step": 855
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.11931818181818177,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3109.0,
      "completions/mean_length": 1164.28125,
      "completions/mean_terminated_length": 767.0806274414062,
      "completions/min_length": 165.0,
      "completions/min_terminated_length": 165.0,
      "epoch": 7.998250728862974,
      "grad_norm": 0.1355619877576828,
      "learning_rate": 1e-06,
      "loss": -0.0392,
      "num_tokens": 508519647.0,
      "reward": 0.6629464626312256,
      "reward_std": 0.1394055187702179,
      "rewards/verify_math_reward/mean": 0.6629464030265808,
      "rewards/verify_math_reward/std": 0.47296738624572754,
      "step": 856
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1261160714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3079.0,
      "completions/mean_length": 1112.3404541015625,
      "completions/mean_terminated_length": 681.7484130859375,
      "completions/min_length": 86.0,
      "completions/min_terminated_length": 86.0,
      "epoch": 8.00932944606414,
      "grad_norm": 0.1278090476989746,
      "learning_rate": 1e-06,
      "loss": -0.0624,
      "num_tokens": 509147872.0,
      "reward": 0.6328125,
      "reward_std": 0.14214785397052765,
      "rewards/verify_math_reward/mean": 0.6328125,
      "rewards/verify_math_reward/std": 0.48230743408203125,
      "step": 857
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0993303571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3949.0,
      "completions/mean_length": 972.8370971679688,
      "completions/mean_terminated_length": 628.3990478515625,
      "completions/min_length": 212.0,
      "completions/min_terminated_length": 212.0,
      "epoch": 8.018658892128279,
      "grad_norm": 0.14049650728702545,
      "learning_rate": 1e-06,
      "loss": -0.0661,
      "num_tokens": 509750846.0,
      "reward": 0.6941964626312256,
      "reward_std": 0.14921018481254578,
      "rewards/verify_math_reward/mean": 0.6941964030265808,
      "rewards/verify_math_reward/std": 0.4610042870044708,
      "step": 858
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1350446428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4050.0,
      "completions/mean_length": 1124.654052734375,
      "completions/mean_terminated_length": 660.7406616210938,
      "completions/min_length": 170.0,
      "completions/min_terminated_length": 170.0,
      "epoch": 8.02798833819242,
      "grad_norm": 0.1408451795578003,
      "learning_rate": 1e-06,
      "loss": -0.0535,
      "num_tokens": 510347512.0,
      "reward": 0.6305803656578064,
      "reward_std": 0.14150846004486084,
      "rewards/verify_math_reward/mean": 0.6305803656578064,
      "rewards/verify_math_reward/std": 0.4829172194004059,
      "step": 859
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0982142857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3762.0,
      "completions/mean_length": 1014.8984985351562,
      "completions/mean_terminated_length": 679.3328857421875,
      "completions/min_length": 143.0,
      "completions/min_terminated_length": 143.0,
      "epoch": 8.03731778425656,
      "grad_norm": 0.14122828841209412,
      "learning_rate": 1e-06,
      "loss": -0.0615,
      "num_tokens": 510976981.0,
      "reward": 0.6618303656578064,
      "reward_std": 0.15724678337574005,
      "rewards/verify_math_reward/mean": 0.6618303656578064,
      "rewards/verify_math_reward/std": 0.4733508229255676,
      "step": 860
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3877.0,
      "completions/mean_length": 976.6160888671875,
      "completions/mean_terminated_length": 623.9900512695312,
      "completions/min_length": 127.0,
      "completions/min_terminated_length": 127.0,
      "epoch": 8.0466472303207,
      "grad_norm": 0.1603347212076187,
      "learning_rate": 1e-06,
      "loss": -0.0626,
      "num_tokens": 511564341.0,
      "reward": 0.6729910969734192,
      "reward_std": 0.14984887838363647,
      "rewards/verify_math_reward/mean": 0.6729910969734192,
      "rewards/verify_math_reward/std": 0.46938255429267883,
      "step": 861
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3578.0,
      "completions/mean_length": 1054.18310546875,
      "completions/mean_terminated_length": 619.6377563476562,
      "completions/min_length": 154.0,
      "completions/min_terminated_length": 154.0,
      "epoch": 8.055976676384839,
      "grad_norm": 0.1483280211687088,
      "learning_rate": 1e-06,
      "loss": -0.0766,
      "num_tokens": 512131569.0,
      "reward": 0.6785714626312256,
      "reward_std": 0.15158231556415558,
      "rewards/verify_math_reward/mean": 0.6785714030265808,
      "rewards/verify_math_reward/std": 0.46728572249412537,
      "step": 862
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1104910714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3436.0,
      "completions/mean_length": 1040.1484375,
      "completions/mean_terminated_length": 660.5633544921875,
      "completions/min_length": 173.0,
      "completions/min_terminated_length": 173.0,
      "epoch": 8.06530612244898,
      "grad_norm": 0.13730108737945557,
      "learning_rate": 1e-06,
      "loss": -0.0416,
      "num_tokens": 512762374.0,
      "reward": 0.6484375,
      "reward_std": 0.14564156532287598,
      "rewards/verify_math_reward/mean": 0.6484375,
      "rewards/verify_math_reward/std": 0.4777248501777649,
      "step": 863
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0970982142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2843.0,
      "completions/mean_length": 981.15185546875,
      "completions/mean_terminated_length": 646.1804809570312,
      "completions/min_length": 145.0,
      "completions/min_terminated_length": 145.0,
      "epoch": 8.07463556851312,
      "grad_norm": 0.1306883692741394,
      "learning_rate": 1e-06,
      "loss": -0.0273,
      "num_tokens": 513371022.0,
      "reward": 0.6863839626312256,
      "reward_std": 0.12989820539951324,
      "rewards/verify_math_reward/mean": 0.6863839030265808,
      "rewards/verify_math_reward/std": 0.46422144770622253,
      "step": 864
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1361607142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3030.0,
      "completions/mean_length": 1080.7410888671875,
      "completions/mean_terminated_length": 605.4677124023438,
      "completions/min_length": 157.0,
      "completions/min_terminated_length": 157.0,
      "epoch": 8.08396501457726,
      "grad_norm": 0.1641305387020111,
      "learning_rate": 1e-06,
      "loss": -0.0549,
      "num_tokens": 513937078.0,
      "reward": 0.6227678656578064,
      "reward_std": 0.16386516392230988,
      "rewards/verify_math_reward/mean": 0.6227678656578064,
      "rewards/verify_math_reward/std": 0.4849644899368286,
      "step": 865
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0904017857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4046.0,
      "completions/mean_length": 916.4085083007812,
      "completions/mean_terminated_length": 600.4000244140625,
      "completions/min_length": 194.0,
      "completions/min_terminated_length": 194.0,
      "epoch": 8.093294460641399,
      "grad_norm": 0.16136740148067474,
      "learning_rate": 1e-06,
      "loss": -0.0102,
      "num_tokens": 514512044.0,
      "reward": 0.65625,
      "reward_std": 0.15721426904201508,
      "rewards/verify_math_reward/mean": 0.65625,
      "rewards/verify_math_reward/std": 0.4752241373062134,
      "step": 866
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1383928571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3194.0,
      "completions/mean_length": 1137.96875,
      "completions/mean_terminated_length": 662.8445434570312,
      "completions/min_length": 162.0,
      "completions/min_terminated_length": 162.0,
      "epoch": 8.102623906705539,
      "grad_norm": 0.1663280725479126,
      "learning_rate": 1e-06,
      "loss": -0.0662,
      "num_tokens": 515104464.0,
      "reward": 0.6261160969734192,
      "reward_std": 0.17161037027835846,
      "rewards/verify_math_reward/mean": 0.6261160969734192,
      "rewards/verify_math_reward/std": 0.48410362005233765,
      "step": 867
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1183035714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3849.0,
      "completions/mean_length": 1049.0,
      "completions/mean_terminated_length": 640.1620483398438,
      "completions/min_length": 162.0,
      "completions/min_terminated_length": 162.0,
      "epoch": 8.11195335276968,
      "grad_norm": 0.1567470133304596,
      "learning_rate": 1e-06,
      "loss": -0.0548,
      "num_tokens": 515707992.0,
      "reward": 0.6752232313156128,
      "reward_std": 0.17156578600406647,
      "rewards/verify_math_reward/mean": 0.6752232313156128,
      "rewards/verify_math_reward/std": 0.46855294704437256,
      "step": 868
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0915178571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3279.0,
      "completions/mean_length": 955.7210083007812,
      "completions/mean_terminated_length": 639.3783569335938,
      "completions/min_length": 155.0,
      "completions/min_terminated_length": 155.0,
      "epoch": 8.12128279883382,
      "grad_norm": 0.13543248176574707,
      "learning_rate": 1e-06,
      "loss": -0.0483,
      "num_tokens": 516310606.0,
      "reward": 0.6696428656578064,
      "reward_std": 0.16514535248279572,
      "rewards/verify_math_reward/mean": 0.6696428656578064,
      "rewards/verify_math_reward/std": 0.47060438990592957,
      "step": 869
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1082589285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3671.0,
      "completions/mean_length": 1006.14404296875,
      "completions/mean_terminated_length": 631.030029296875,
      "completions/min_length": 118.0,
      "completions/min_terminated_length": 118.0,
      "epoch": 8.130612244897959,
      "grad_norm": 0.14321519434452057,
      "learning_rate": 1e-06,
      "loss": -0.0334,
      "num_tokens": 516900183.0,
      "reward": 0.6026785969734192,
      "reward_std": 0.15312324464321136,
      "rewards/verify_math_reward/mean": 0.6026785969734192,
      "rewards/verify_math_reward/std": 0.48961687088012695,
      "step": 870
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1104910714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4047.0,
      "completions/mean_length": 1002.904052734375,
      "completions/mean_terminated_length": 618.6925659179688,
      "completions/min_length": 126.0,
      "completions/min_terminated_length": 126.0,
      "epoch": 8.139941690962099,
      "grad_norm": 0.140425443649292,
      "learning_rate": 1e-06,
      "loss": -0.0255,
      "num_tokens": 517488593.0,
      "reward": 0.6584821939468384,
      "reward_std": 0.12050722539424896,
      "rewards/verify_math_reward/mean": 0.6584821343421936,
      "rewards/verify_math_reward/std": 0.4744836091995239,
      "step": 871
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0814732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3419.0,
      "completions/mean_length": 902.1406860351562,
      "completions/mean_terminated_length": 618.845703125,
      "completions/min_length": 139.0,
      "completions/min_terminated_length": 139.0,
      "epoch": 8.14927113702624,
      "grad_norm": 0.135789155960083,
      "learning_rate": 1e-06,
      "loss": -0.0351,
      "num_tokens": 518094303.0,
      "reward": 0.6819196939468384,
      "reward_std": 0.13771162927150726,
      "rewards/verify_math_reward/mean": 0.6819196343421936,
      "rewards/verify_math_reward/std": 0.46599099040031433,
      "step": 872
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0982142857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3545.0,
      "completions/mean_length": 953.4230346679688,
      "completions/mean_terminated_length": 611.162109375,
      "completions/min_length": 141.0,
      "completions/min_terminated_length": 141.0,
      "epoch": 8.15860058309038,
      "grad_norm": 0.1614873856306076,
      "learning_rate": 1e-06,
      "loss": -0.0537,
      "num_tokens": 518680834.0,
      "reward": 0.6975446939468384,
      "reward_std": 0.17351828515529633,
      "rewards/verify_math_reward/mean": 0.6975446343421936,
      "rewards/verify_math_reward/std": 0.45957788825035095,
      "step": 873
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1294642857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3688.0,
      "completions/mean_length": 1118.2332763671875,
      "completions/mean_terminated_length": 675.3859252929688,
      "completions/min_length": 131.0,
      "completions/min_terminated_length": 131.0,
      "epoch": 8.167930029154519,
      "grad_norm": 0.16200335323810577,
      "learning_rate": 1e-06,
      "loss": -0.1006,
      "num_tokens": 519300195.0,
      "reward": 0.6361607313156128,
      "reward_std": 0.18185940384864807,
      "rewards/verify_math_reward/mean": 0.6361607313156128,
      "rewards/verify_math_reward/std": 0.4813718795776367,
      "step": 874
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1082589285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4001.0,
      "completions/mean_length": 974.4152221679688,
      "completions/mean_terminated_length": 595.4493408203125,
      "completions/min_length": 125.0,
      "completions/min_terminated_length": 125.0,
      "epoch": 8.177259475218658,
      "grad_norm": 0.1391468197107315,
      "learning_rate": 1e-06,
      "loss": -0.0429,
      "num_tokens": 519856455.0,
      "reward": 0.6830357313156128,
      "reward_std": 0.13981597125530243,
      "rewards/verify_math_reward/mean": 0.6830357313156128,
      "rewards/verify_math_reward/std": 0.46555325388908386,
      "step": 875
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1104910714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3661.0,
      "completions/mean_length": 950.3973388671875,
      "completions/mean_terminated_length": 559.6637573242188,
      "completions/min_length": 133.0,
      "completions/min_terminated_length": 133.0,
      "epoch": 8.186588921282798,
      "grad_norm": 0.12101931124925613,
      "learning_rate": 1e-06,
      "loss": -0.0314,
      "num_tokens": 520395035.0,
      "reward": 0.7031250596046448,
      "reward_std": 0.09866905957460403,
      "rewards/verify_math_reward/mean": 0.703125,
      "rewards/verify_math_reward/std": 0.4571361541748047,
      "step": 876
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1026785714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3659.0,
      "completions/mean_length": 977.36279296875,
      "completions/mean_terminated_length": 620.5037231445312,
      "completions/min_length": 133.0,
      "completions/min_terminated_length": 133.0,
      "epoch": 8.19591836734694,
      "grad_norm": 0.13639773428440094,
      "learning_rate": 1e-06,
      "loss": -0.0493,
      "num_tokens": 520993640.0,
      "reward": 0.6629464626312256,
      "reward_std": 0.1573241800069809,
      "rewards/verify_math_reward/mean": 0.6629464030265808,
      "rewards/verify_math_reward/std": 0.47296738624572754,
      "step": 877
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1551339285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3126.0,
      "completions/mean_length": 1146.048095703125,
      "completions/mean_terminated_length": 604.379150390625,
      "completions/min_length": 115.0,
      "completions/min_terminated_length": 115.0,
      "epoch": 8.205247813411079,
      "grad_norm": 0.1504117250442505,
      "learning_rate": 1e-06,
      "loss": -0.0784,
      "num_tokens": 521544475.0,
      "reward": 0.6395089626312256,
      "reward_std": 0.14917626976966858,
      "rewards/verify_math_reward/mean": 0.6395089030265808,
      "rewards/verify_math_reward/std": 0.4804111421108246,
      "step": 878
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0959821428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3926.0,
      "completions/mean_length": 952.1172485351562,
      "completions/mean_terminated_length": 618.322265625,
      "completions/min_length": 138.0,
      "completions/min_terminated_length": 138.0,
      "epoch": 8.214577259475218,
      "grad_norm": 0.14028151333332062,
      "learning_rate": 1e-06,
      "loss": -0.0182,
      "num_tokens": 522137692.0,
      "reward": 0.676339328289032,
      "reward_std": 0.12715697288513184,
      "rewards/verify_math_reward/mean": 0.6763392686843872,
      "rewards/verify_math_reward/std": 0.4681335985660553,
      "step": 879
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1216517857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3499.0,
      "completions/mean_length": 1094.4888916015625,
      "completions/mean_terminated_length": 678.777587890625,
      "completions/min_length": 169.0,
      "completions/min_terminated_length": 169.0,
      "epoch": 8.223906705539358,
      "grad_norm": 0.14301654696464539,
      "learning_rate": 1e-06,
      "loss": -0.0583,
      "num_tokens": 522761042.0,
      "reward": 0.6037946939468384,
      "reward_std": 0.16991718113422394,
      "rewards/verify_math_reward/mean": 0.6037946343421936,
      "rewards/verify_math_reward/std": 0.48938119411468506,
      "step": 880
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1205357142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3660.0,
      "completions/mean_length": 1016.747802734375,
      "completions/mean_terminated_length": 594.71826171875,
      "completions/min_length": 103.0,
      "completions/min_terminated_length": 103.0,
      "epoch": 8.2332361516035,
      "grad_norm": 0.14127644896507263,
      "learning_rate": 1e-06,
      "loss": -0.0644,
      "num_tokens": 523325104.0,
      "reward": 0.6462053656578064,
      "reward_std": 0.16901704668998718,
      "rewards/verify_math_reward/mean": 0.6462053656578064,
      "rewards/verify_math_reward/std": 0.478413462638855,
      "step": 881
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1339285714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3782.0,
      "completions/mean_length": 1095.1585693359375,
      "completions/mean_terminated_length": 631.1107788085938,
      "completions/min_length": 147.0,
      "completions/min_terminated_length": 147.0,
      "epoch": 8.242565597667639,
      "grad_norm": 0.15452386438846588,
      "learning_rate": 1e-06,
      "loss": -0.0646,
      "num_tokens": 523918358.0,
      "reward": 0.6205357313156128,
      "reward_std": 0.15357083082199097,
      "rewards/verify_math_reward/mean": 0.6205357313156128,
      "rewards/verify_math_reward/std": 0.4855247139930725,
      "step": 882
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1138392857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2608.0,
      "completions/mean_length": 1002.552490234375,
      "completions/mean_terminated_length": 605.1574096679688,
      "completions/min_length": 144.0,
      "completions/min_terminated_length": 144.0,
      "epoch": 8.251895043731778,
      "grad_norm": 0.14762280881404877,
      "learning_rate": 1e-06,
      "loss": -0.0523,
      "num_tokens": 524488565.0,
      "reward": 0.6383928656578064,
      "reward_std": 0.14203837513923645,
      "rewards/verify_math_reward/mean": 0.6383928656578064,
      "rewards/verify_math_reward/std": 0.4807341992855072,
      "step": 883
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1026785714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3903.0,
      "completions/mean_length": 982.7678833007812,
      "completions/mean_terminated_length": 626.52734375,
      "completions/min_length": 141.0,
      "completions/min_terminated_length": 141.0,
      "epoch": 8.261224489795918,
      "grad_norm": 0.1296025514602661,
      "learning_rate": 1e-06,
      "loss": -0.0472,
      "num_tokens": 525075093.0,
      "reward": 0.6618303656578064,
      "reward_std": 0.11712156236171722,
      "rewards/verify_math_reward/mean": 0.6618303656578064,
      "rewards/verify_math_reward/std": 0.4733508229255676,
      "step": 884
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3271.0,
      "completions/mean_length": 1014.65966796875,
      "completions/mean_terminated_length": 574.4680786132812,
      "completions/min_length": 146.0,
      "completions/min_terminated_length": 146.0,
      "epoch": 8.270553935860057,
      "grad_norm": 0.1451510339975357,
      "learning_rate": 1e-06,
      "loss": -0.0576,
      "num_tokens": 525622916.0,
      "reward": 0.6696428656578064,
      "reward_std": 0.1388377547264099,
      "rewards/verify_math_reward/mean": 0.6696428656578064,
      "rewards/verify_math_reward/std": 0.47060438990592957,
      "step": 885
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1216517857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2806.0,
      "completions/mean_length": 1030.2757568359375,
      "completions/mean_terminated_length": 605.6708984375,
      "completions/min_length": 140.0,
      "completions/min_terminated_length": 140.0,
      "epoch": 8.279883381924199,
      "grad_norm": 0.16212977468967438,
      "learning_rate": 1e-06,
      "loss": -0.0441,
      "num_tokens": 526190363.0,
      "reward": 0.6383928656578064,
      "reward_std": 0.14740821719169617,
      "rewards/verify_math_reward/mean": 0.6383928656578064,
      "rewards/verify_math_reward/std": 0.4807341992855072,
      "step": 886
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1049107142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2863.0,
      "completions/mean_length": 972.911865234375,
      "completions/mean_terminated_length": 606.8641357421875,
      "completions/min_length": 145.0,
      "completions/min_terminated_length": 145.0,
      "epoch": 8.289212827988338,
      "grad_norm": 0.15837009251117706,
      "learning_rate": 1e-06,
      "loss": -0.0716,
      "num_tokens": 526761604.0,
      "reward": 0.6819196939468384,
      "reward_std": 0.17513476312160492,
      "rewards/verify_math_reward/mean": 0.6819196343421936,
      "rewards/verify_math_reward/std": 0.46599099040031433,
      "step": 887
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0904017857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3496.0,
      "completions/mean_length": 937.294677734375,
      "completions/mean_terminated_length": 623.3619384765625,
      "completions/min_length": 165.0,
      "completions/min_terminated_length": 165.0,
      "epoch": 8.298542274052478,
      "grad_norm": 0.13528120517730713,
      "learning_rate": 1e-06,
      "loss": -0.0371,
      "num_tokens": 527359644.0,
      "reward": 0.6595982313156128,
      "reward_std": 0.13079974055290222,
      "rewards/verify_math_reward/mean": 0.6595982313156128,
      "rewards/verify_math_reward/std": 0.4741089344024658,
      "step": 888
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2403.0,
      "completions/mean_length": 941.3281860351562,
      "completions/mean_terminated_length": 584.7130126953125,
      "completions/min_length": 147.0,
      "completions/min_terminated_length": 147.0,
      "epoch": 8.307871720116617,
      "grad_norm": 0.1483733206987381,
      "learning_rate": 1e-06,
      "loss": -0.0509,
      "num_tokens": 527916826.0,
      "reward": 0.7254464626312256,
      "reward_std": 0.15056565403938293,
      "rewards/verify_math_reward/mean": 0.7254464030265808,
      "rewards/verify_math_reward/std": 0.4465382993221283,
      "step": 889
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1171875,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2832.0,
      "completions/mean_length": 1013.2500610351562,
      "completions/mean_terminated_length": 604.035400390625,
      "completions/min_length": 147.0,
      "completions/min_terminated_length": 147.0,
      "epoch": 8.317201166180759,
      "grad_norm": 0.1543199121952057,
      "learning_rate": 1e-06,
      "loss": -0.036,
      "num_tokens": 528488210.0,
      "reward": 0.6707589626312256,
      "reward_std": 0.14376434683799744,
      "rewards/verify_math_reward/mean": 0.6707589030265808,
      "rewards/verify_math_reward/std": 0.4702001214027405,
      "step": 890
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1238839285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3425.0,
      "completions/mean_length": 1051.4676513671875,
      "completions/mean_terminated_length": 620.9668579101562,
      "completions/min_length": 126.0,
      "completions/min_terminated_length": 126.0,
      "epoch": 8.326530612244898,
      "grad_norm": 0.14981353282928467,
      "learning_rate": 1e-06,
      "loss": -0.0488,
      "num_tokens": 529071613.0,
      "reward": 0.640625,
      "reward_std": 0.1513124257326126,
      "rewards/verify_math_reward/mean": 0.640625,
      "rewards/verify_math_reward/std": 0.48008525371551514,
      "step": 891
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1104910714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3392.0,
      "completions/mean_length": 965.3270263671875,
      "completions/mean_terminated_length": 576.4479370117188,
      "completions/min_length": 161.0,
      "completions/min_terminated_length": 161.0,
      "epoch": 8.335860058309038,
      "grad_norm": 0.14016583561897278,
      "learning_rate": 1e-06,
      "loss": -0.0434,
      "num_tokens": 529619026.0,
      "reward": 0.621651828289032,
      "reward_std": 0.13049665093421936,
      "rewards/verify_math_reward/mean": 0.6216517686843872,
      "rewards/verify_math_reward/std": 0.485245943069458,
      "step": 892
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0915178571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2651.0,
      "completions/mean_length": 917.7388916015625,
      "completions/mean_terminated_length": 597.5700073242188,
      "completions/min_length": 146.0,
      "completions/min_terminated_length": 146.0,
      "epoch": 8.345189504373177,
      "grad_norm": 0.15088114142417908,
      "learning_rate": 1e-06,
      "loss": -0.0448,
      "num_tokens": 530186584.0,
      "reward": 0.6640625,
      "reward_std": 0.16758601367473602,
      "rewards/verify_math_reward/mean": 0.6640625,
      "rewards/verify_math_reward/std": 0.4725809693336487,
      "step": 893
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1205357142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3594.0,
      "completions/mean_length": 989.333740234375,
      "completions/mean_terminated_length": 563.5469360351562,
      "completions/min_length": 146.0,
      "completions/min_terminated_length": 146.0,
      "epoch": 8.354518950437317,
      "grad_norm": 0.16835367679595947,
      "learning_rate": 1e-06,
      "loss": -0.0282,
      "num_tokens": 530724155.0,
      "reward": 0.65625,
      "reward_std": 0.14203977584838867,
      "rewards/verify_math_reward/mean": 0.65625,
      "rewards/verify_math_reward/std": 0.4752241373062134,
      "step": 894
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1216517857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3645.0,
      "completions/mean_length": 1058.7076416015625,
      "completions/mean_terminated_length": 638.0406494140625,
      "completions/min_length": 138.0,
      "completions/min_terminated_length": 138.0,
      "epoch": 8.363848396501458,
      "grad_norm": 0.14951688051223755,
      "learning_rate": 1e-06,
      "loss": -0.0692,
      "num_tokens": 531309765.0,
      "reward": 0.6171875,
      "reward_std": 0.16728290915489197,
      "rewards/verify_math_reward/mean": 0.6171875,
      "rewards/verify_math_reward/std": 0.4863446056842804,
      "step": 895
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1238839285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2943.0,
      "completions/mean_length": 1047.4296875,
      "completions/mean_terminated_length": 616.3579711914062,
      "completions/min_length": 128.0,
      "completions/min_terminated_length": 128.0,
      "epoch": 8.373177842565598,
      "grad_norm": 0.1584387719631195,
      "learning_rate": 1e-06,
      "loss": -0.0682,
      "num_tokens": 531876990.0,
      "reward": 0.6707589626312256,
      "reward_std": 0.15289753675460815,
      "rewards/verify_math_reward/mean": 0.6707589030265808,
      "rewards/verify_math_reward/std": 0.4702001214027405,
      "step": 896
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0982142857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2528.0,
      "completions/mean_length": 884.5670166015625,
      "completions/mean_terminated_length": 534.8069458007812,
      "completions/min_length": 130.0,
      "completions/min_terminated_length": 130.0,
      "epoch": 8.382507288629737,
      "grad_norm": 0.15818488597869873,
      "learning_rate": 1e-06,
      "loss": -0.05,
      "num_tokens": 532409834.0,
      "reward": 0.676339328289032,
      "reward_std": 0.15349414944648743,
      "rewards/verify_math_reward/mean": 0.6763392686843872,
      "rewards/verify_math_reward/std": 0.4681335985660553,
      "step": 897
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1049107142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3117.0,
      "completions/mean_length": 955.9386596679688,
      "completions/mean_terminated_length": 587.9014892578125,
      "completions/min_length": 150.0,
      "completions/min_terminated_length": 150.0,
      "epoch": 8.391836734693877,
      "grad_norm": 0.16838958859443665,
      "learning_rate": 1e-06,
      "loss": -0.0675,
      "num_tokens": 532958347.0,
      "reward": 0.6964285969734192,
      "reward_std": 0.1521807461977005,
      "rewards/verify_math_reward/mean": 0.6964285969734192,
      "rewards/verify_math_reward/std": 0.4600565433502197,
      "step": 898
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0892857142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3049.0,
      "completions/mean_length": 889.87060546875,
      "completions/mean_terminated_length": 575.5441284179688,
      "completions/min_length": 152.0,
      "completions/min_terminated_length": 152.0,
      "epoch": 8.401166180758018,
      "grad_norm": 0.1707538217306137,
      "learning_rate": 1e-06,
      "loss": -0.0181,
      "num_tokens": 533520095.0,
      "reward": 0.7008928656578064,
      "reward_std": 0.16067594289779663,
      "rewards/verify_math_reward/mean": 0.7008928656578064,
      "rewards/verify_math_reward/std": 0.458122581243515,
      "step": 899
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0959821428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3994.0,
      "completions/mean_length": 896.6998291015625,
      "completions/mean_terminated_length": 557.02099609375,
      "completions/min_length": 139.0,
      "completions/min_terminated_length": 139.0,
      "epoch": 8.410495626822158,
      "grad_norm": 0.16008999943733215,
      "learning_rate": 1e-06,
      "loss": -0.0336,
      "num_tokens": 534051682.0,
      "reward": 0.723214328289032,
      "reward_std": 0.16134603321552277,
      "rewards/verify_math_reward/mean": 0.7232142686843872,
      "rewards/verify_math_reward/std": 0.44765952229499817,
      "step": 900
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1149553571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3946.0,
      "completions/mean_length": 1003.747802734375,
      "completions/mean_terminated_length": 602.10595703125,
      "completions/min_length": 112.0,
      "completions/min_terminated_length": 112.0,
      "epoch": 8.419825072886297,
      "grad_norm": 0.1635473221540451,
      "learning_rate": 1e-06,
      "loss": -0.067,
      "num_tokens": 534612272.0,
      "reward": 0.6941964626312256,
      "reward_std": 0.17156507074832916,
      "rewards/verify_math_reward/mean": 0.6941964030265808,
      "rewards/verify_math_reward/std": 0.4610042870044708,
      "step": 901
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0703125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2206.0,
      "completions/mean_length": 797.075927734375,
      "completions/mean_terminated_length": 547.577392578125,
      "completions/min_length": 140.0,
      "completions/min_terminated_length": 140.0,
      "epoch": 8.429154518950437,
      "grad_norm": 0.13711099326610565,
      "learning_rate": 1e-06,
      "loss": -0.0672,
      "num_tokens": 535149532.0,
      "reward": 0.738839328289032,
      "reward_std": 0.14083515107631683,
      "rewards/verify_math_reward/mean": 0.7388392686843872,
      "rewards/verify_math_reward/std": 0.439512699842453,
      "step": 902
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0904017857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2247.0,
      "completions/mean_length": 882.3504638671875,
      "completions/mean_terminated_length": 562.95703125,
      "completions/min_length": 155.0,
      "completions/min_terminated_length": 155.0,
      "epoch": 8.438483965014576,
      "grad_norm": 0.12448661029338837,
      "learning_rate": 1e-06,
      "loss": -0.0397,
      "num_tokens": 535695774.0,
      "reward": 0.6964285969734192,
      "reward_std": 0.11032027006149292,
      "rewards/verify_math_reward/mean": 0.6964285969734192,
      "rewards/verify_math_reward/std": 0.4600565731525421,
      "step": 903
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0970982142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3531.0,
      "completions/mean_length": 979.6038208007812,
      "completions/mean_terminated_length": 644.4660034179688,
      "completions/min_length": 171.0,
      "completions/min_terminated_length": 171.0,
      "epoch": 8.447813411078718,
      "grad_norm": 0.12877033650875092,
      "learning_rate": 1e-06,
      "loss": -0.0409,
      "num_tokens": 536313947.0,
      "reward": 0.6729910969734192,
      "reward_std": 0.11580956727266312,
      "rewards/verify_math_reward/mean": 0.6729910969734192,
      "rewards/verify_math_reward/std": 0.46938255429267883,
      "step": 904
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.109375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3182.0,
      "completions/mean_length": 972.34716796875,
      "completions/mean_terminated_length": 588.7406005859375,
      "completions/min_length": 152.0,
      "completions/min_terminated_length": 152.0,
      "epoch": 8.457142857142857,
      "grad_norm": 0.14431960880756378,
      "learning_rate": 1e-06,
      "loss": -0.0484,
      "num_tokens": 536879626.0,
      "reward": 0.6551339626312256,
      "reward_std": 0.15484780073165894,
      "rewards/verify_math_reward/mean": 0.6551339030265808,
      "rewards/verify_math_reward/std": 0.4755900800228119,
      "step": 905
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1026785714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4038.0,
      "completions/mean_length": 951.5614013671875,
      "completions/mean_terminated_length": 591.75,
      "completions/min_length": 146.0,
      "completions/min_terminated_length": 146.0,
      "epoch": 8.466472303206997,
      "grad_norm": 0.14249777793884277,
      "learning_rate": 1e-06,
      "loss": -0.03,
      "num_tokens": 537441313.0,
      "reward": 0.7165178656578064,
      "reward_std": 0.11535493284463882,
      "rewards/verify_math_reward/mean": 0.7165178656578064,
      "rewards/verify_math_reward/std": 0.4509401023387909,
      "step": 906
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1238839285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2735.0,
      "completions/mean_length": 1031.3248291015625,
      "completions/mean_terminated_length": 597.975830078125,
      "completions/min_length": 146.0,
      "completions/min_terminated_length": 146.0,
      "epoch": 8.475801749271136,
      "grad_norm": 0.14390555024147034,
      "learning_rate": 1e-06,
      "loss": -0.0502,
      "num_tokens": 537996172.0,
      "reward": 0.6462053656578064,
      "reward_std": 0.14083333313465118,
      "rewards/verify_math_reward/mean": 0.6462053656578064,
      "rewards/verify_math_reward/std": 0.478413462638855,
      "step": 907
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1328125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3831.0,
      "completions/mean_length": 1109.84375,
      "completions/mean_terminated_length": 652.5045166015625,
      "completions/min_length": 155.0,
      "completions/min_terminated_length": 155.0,
      "epoch": 8.485131195335278,
      "grad_norm": 0.1546357423067093,
      "learning_rate": 1e-06,
      "loss": -0.0697,
      "num_tokens": 538586400.0,
      "reward": 0.6495535969734192,
      "reward_std": 0.15049009025096893,
      "rewards/verify_math_reward/mean": 0.6495535969734192,
      "rewards/verify_math_reward/std": 0.477376252412796,
      "step": 908
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0714285714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3880.0,
      "completions/mean_length": 847.724365234375,
      "completions/mean_terminated_length": 597.8569946289062,
      "completions/min_length": 150.0,
      "completions/min_terminated_length": 150.0,
      "epoch": 8.494460641399417,
      "grad_norm": 0.15690870583057404,
      "learning_rate": 1e-06,
      "loss": -0.036,
      "num_tokens": 539169873.0,
      "reward": 0.6941964626312256,
      "reward_std": 0.15018586814403534,
      "rewards/verify_math_reward/mean": 0.6941964030265808,
      "rewards/verify_math_reward/std": 0.4610042870044708,
      "step": 909
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3696.0,
      "completions/mean_length": 960.2567138671875,
      "completions/mean_terminated_length": 605.7813720703125,
      "completions/min_length": 145.0,
      "completions/min_terminated_length": 145.0,
      "epoch": 8.503790087463557,
      "grad_norm": 0.151783287525177,
      "learning_rate": 1e-06,
      "loss": -0.0513,
      "num_tokens": 539757591.0,
      "reward": 0.6517857313156128,
      "reward_std": 0.15311436355113983,
      "rewards/verify_math_reward/mean": 0.6517857313156128,
      "rewards/verify_math_reward/std": 0.47667041420936584,
      "step": 910
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.056919642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3717.0,
      "completions/mean_length": 747.2422485351562,
      "completions/mean_terminated_length": 545.1278076171875,
      "completions/min_length": 133.0,
      "completions/min_terminated_length": 133.0,
      "epoch": 8.513119533527696,
      "grad_norm": 0.15444251894950867,
      "learning_rate": 1e-06,
      "loss": -0.0229,
      "num_tokens": 540307080.0,
      "reward": 0.7109375596046448,
      "reward_std": 0.12384940683841705,
      "rewards/verify_math_reward/mean": 0.7109375,
      "rewards/verify_math_reward/std": 0.45358020067214966,
      "step": 911
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1316964285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3878.0,
      "completions/mean_length": 1106.2132568359375,
      "completions/mean_terminated_length": 652.7493286132812,
      "completions/min_length": 152.0,
      "completions/min_terminated_length": 152.0,
      "epoch": 8.522448979591836,
      "grad_norm": 0.1587335467338562,
      "learning_rate": 1e-06,
      "loss": -0.073,
      "num_tokens": 540909119.0,
      "reward": 0.5524553656578064,
      "reward_std": 0.15424413979053497,
      "rewards/verify_math_reward/mean": 0.5524553656578064,
      "rewards/verify_math_reward/std": 0.49751853942871094,
      "step": 912
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1104910714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3861.0,
      "completions/mean_length": 995.3114013671875,
      "completions/mean_terminated_length": 610.1568603515625,
      "completions/min_length": 170.0,
      "completions/min_terminated_length": 170.0,
      "epoch": 8.531778425655977,
      "grad_norm": 0.14729638397693634,
      "learning_rate": 1e-06,
      "loss": -0.0567,
      "num_tokens": 541480702.0,
      "reward": 0.6484375,
      "reward_std": 0.1466161608695984,
      "rewards/verify_math_reward/mean": 0.6484375,
      "rewards/verify_math_reward/std": 0.4777248501777649,
      "step": 913
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1305803571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2520.0,
      "completions/mean_length": 1001.39404296875,
      "completions/mean_terminated_length": 536.607177734375,
      "completions/min_length": 128.0,
      "completions/min_terminated_length": 128.0,
      "epoch": 8.541107871720117,
      "grad_norm": 0.15659281611442566,
      "learning_rate": 1e-06,
      "loss": -0.0842,
      "num_tokens": 541992983.0,
      "reward": 0.6796875596046448,
      "reward_std": 0.13008618354797363,
      "rewards/verify_math_reward/mean": 0.6796875,
      "rewards/verify_math_reward/std": 0.4668572247028351,
      "step": 914
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0714285714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2784.0,
      "completions/mean_length": 837.9933471679688,
      "completions/mean_terminated_length": 587.37744140625,
      "completions/min_length": 127.0,
      "completions/min_terminated_length": 127.0,
      "epoch": 8.550437317784256,
      "grad_norm": 0.14385898411273956,
      "learning_rate": 1e-06,
      "loss": -0.0462,
      "num_tokens": 542570257.0,
      "reward": 0.7131696939468384,
      "reward_std": 0.14823377132415771,
      "rewards/verify_math_reward/mean": 0.7131696343421936,
      "rewards/verify_math_reward/std": 0.4525342881679535,
      "step": 915
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1004464285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3764.0,
      "completions/mean_length": 1045.6820068359375,
      "completions/mean_terminated_length": 705.07568359375,
      "completions/min_length": 175.0,
      "completions/min_terminated_length": 175.0,
      "epoch": 8.559766763848396,
      "grad_norm": 0.12489776313304901,
      "learning_rate": 1e-06,
      "loss": -0.0436,
      "num_tokens": 543237564.0,
      "reward": 0.582589328289032,
      "reward_std": 0.12140624225139618,
      "rewards/verify_math_reward/mean": 0.5825892686843872,
      "rewards/verify_math_reward/std": 0.4934072494506836,
      "step": 916
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1305803571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3977.0,
      "completions/mean_length": 1061.5279541015625,
      "completions/mean_terminated_length": 605.7728271484375,
      "completions/min_length": 176.0,
      "completions/min_terminated_length": 176.0,
      "epoch": 8.569096209912537,
      "grad_norm": 0.15697191655635834,
      "learning_rate": 1e-06,
      "loss": -0.0465,
      "num_tokens": 543797869.0,
      "reward": 0.6339285969734192,
      "reward_std": 0.15473198890686035,
      "rewards/verify_math_reward/mean": 0.6339285969734192,
      "rewards/verify_math_reward/std": 0.48199835419654846,
      "step": 917
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3649.0,
      "completions/mean_length": 945.2600708007812,
      "completions/mean_terminated_length": 589.0894165039062,
      "completions/min_length": 146.0,
      "completions/min_terminated_length": 146.0,
      "epoch": 8.578425655976677,
      "grad_norm": 0.14893698692321777,
      "learning_rate": 1e-06,
      "loss": -0.0396,
      "num_tokens": 544366062.0,
      "reward": 0.645089328289032,
      "reward_std": 0.15331122279167175,
      "rewards/verify_math_reward/mean": 0.6450892686843872,
      "rewards/verify_math_reward/std": 0.4787535071372986,
      "step": 918
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0714285714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3401.0,
      "completions/mean_length": 813.224365234375,
      "completions/mean_terminated_length": 560.703125,
      "completions/min_length": 153.0,
      "completions/min_terminated_length": 153.0,
      "epoch": 8.587755102040816,
      "grad_norm": 0.16182899475097656,
      "learning_rate": 1e-06,
      "loss": -0.0379,
      "num_tokens": 544928087.0,
      "reward": 0.6964285969734192,
      "reward_std": 0.16401740908622742,
      "rewards/verify_math_reward/mean": 0.6964285969734192,
      "rewards/verify_math_reward/std": 0.4600566029548645,
      "step": 919
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0948660714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3169.0,
      "completions/mean_length": 995.1763916015625,
      "completions/mean_terminated_length": 670.1824951171875,
      "completions/min_length": 131.0,
      "completions/min_terminated_length": 131.0,
      "epoch": 8.597084548104956,
      "grad_norm": 0.14866846799850464,
      "learning_rate": 1e-06,
      "loss": -0.0434,
      "num_tokens": 545554333.0,
      "reward": 0.606026828289032,
      "reward_std": 0.1726934313774109,
      "rewards/verify_math_reward/mean": 0.6060267686843872,
      "rewards/verify_math_reward/std": 0.48890191316604614,
      "step": 920
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1071428571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2508.0,
      "completions/mean_length": 943.3449096679688,
      "completions/mean_terminated_length": 565.0262451171875,
      "completions/min_length": 134.0,
      "completions/min_terminated_length": 134.0,
      "epoch": 8.606413994169095,
      "grad_norm": 0.1682433933019638,
      "learning_rate": 1e-06,
      "loss": -0.0718,
      "num_tokens": 546089034.0,
      "reward": 0.6941964626312256,
      "reward_std": 0.16533967852592468,
      "rewards/verify_math_reward/mean": 0.6941964030265808,
      "rewards/verify_math_reward/std": 0.4610042870044708,
      "step": 921
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0870535714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2486.0,
      "completions/mean_length": 928.1741333007812,
      "completions/mean_terminated_length": 626.1076049804688,
      "completions/min_length": 130.0,
      "completions/min_terminated_length": 130.0,
      "epoch": 8.615743440233237,
      "grad_norm": 0.15417876839637756,
      "learning_rate": 1e-06,
      "loss": -0.0661,
      "num_tokens": 546685302.0,
      "reward": 0.6383928656578064,
      "reward_std": 0.1768607497215271,
      "rewards/verify_math_reward/mean": 0.6383928656578064,
      "rewards/verify_math_reward/std": 0.4807341694831848,
      "step": 922
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1294642857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2817.0,
      "completions/mean_length": 1046.7723388671875,
      "completions/mean_terminated_length": 593.2974243164062,
      "completions/min_length": 141.0,
      "completions/min_terminated_length": 141.0,
      "epoch": 8.625072886297376,
      "grad_norm": 0.15682968497276306,
      "learning_rate": 1e-06,
      "loss": -0.0738,
      "num_tokens": 547244258.0,
      "reward": 0.6718750596046448,
      "reward_std": 0.17427602410316467,
      "rewards/verify_math_reward/mean": 0.671875,
      "rewards/verify_math_reward/std": 0.46979284286499023,
      "step": 923
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0982142857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3816.0,
      "completions/mean_length": 911.6105346679688,
      "completions/mean_terminated_length": 564.7957763671875,
      "completions/min_length": 141.0,
      "completions/min_terminated_length": 141.0,
      "epoch": 8.634402332361516,
      "grad_norm": 0.1625637263059616,
      "learning_rate": 1e-06,
      "loss": -0.0684,
      "num_tokens": 547792933.0,
      "reward": 0.6629464626312256,
      "reward_std": 0.1560874581336975,
      "rewards/verify_math_reward/mean": 0.6629464030265808,
      "rewards/verify_math_reward/std": 0.47296738624572754,
      "step": 924
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1127232142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3288.0,
      "completions/mean_length": 1000.6172485351562,
      "completions/mean_terminated_length": 607.3673095703125,
      "completions/min_length": 148.0,
      "completions/min_terminated_length": 148.0,
      "epoch": 8.643731778425655,
      "grad_norm": 0.16285686194896698,
      "learning_rate": 1e-06,
      "loss": -0.0424,
      "num_tokens": 548367766.0,
      "reward": 0.6462053656578064,
      "reward_std": 0.1533093899488449,
      "rewards/verify_math_reward/mean": 0.6462053656578064,
      "rewards/verify_math_reward/std": 0.478413462638855,
      "step": 925
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1082589285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3667.0,
      "completions/mean_length": 969.62841796875,
      "completions/mean_terminated_length": 590.0813598632812,
      "completions/min_length": 113.0,
      "completions/min_terminated_length": 113.0,
      "epoch": 8.653061224489797,
      "grad_norm": 0.1668129414319992,
      "learning_rate": 1e-06,
      "loss": -0.0534,
      "num_tokens": 548927121.0,
      "reward": 0.676339328289032,
      "reward_std": 0.17145879566669464,
      "rewards/verify_math_reward/mean": 0.6763392686843872,
      "rewards/verify_math_reward/std": 0.4681335985660553,
      "step": 926
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0915178571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2468.0,
      "completions/mean_length": 864.677490234375,
      "completions/mean_terminated_length": 539.1633911132812,
      "completions/min_length": 142.0,
      "completions/min_terminated_length": 142.0,
      "epoch": 8.662390670553936,
      "grad_norm": 0.15278904139995575,
      "learning_rate": 1e-06,
      "loss": -0.0245,
      "num_tokens": 549457216.0,
      "reward": 0.6629464626312256,
      "reward_std": 0.123512402176857,
      "rewards/verify_math_reward/mean": 0.6629464030265808,
      "rewards/verify_math_reward/std": 0.47296738624572754,
      "step": 927
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1328125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2918.0,
      "completions/mean_length": 1068.4364013671875,
      "completions/mean_terminated_length": 604.7554931640625,
      "completions/min_length": 138.0,
      "completions/min_terminated_length": 138.0,
      "epoch": 8.671720116618076,
      "grad_norm": 0.14968419075012207,
      "learning_rate": 1e-06,
      "loss": -0.0584,
      "num_tokens": 550016215.0,
      "reward": 0.6171875,
      "reward_std": 0.15240898728370667,
      "rewards/verify_math_reward/mean": 0.6171875,
      "rewards/verify_math_reward/std": 0.4863446056842804,
      "step": 928
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1116071428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3248.0,
      "completions/mean_length": 992.3895263671875,
      "completions/mean_terminated_length": 602.4887084960938,
      "completions/min_length": 150.0,
      "completions/min_terminated_length": 150.0,
      "epoch": 8.681049562682215,
      "grad_norm": 0.14219050109386444,
      "learning_rate": 1e-06,
      "loss": -0.0423,
      "num_tokens": 550584492.0,
      "reward": 0.6707589626312256,
      "reward_std": 0.14894986152648926,
      "rewards/verify_math_reward/mean": 0.6707589030265808,
      "rewards/verify_math_reward/std": 0.4702001214027405,
      "step": 929
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1227678571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4003.0,
      "completions/mean_length": 1016.3058471679688,
      "completions/mean_terminated_length": 585.3053588867188,
      "completions/min_length": 161.0,
      "completions/min_terminated_length": 161.0,
      "epoch": 8.690379008746355,
      "grad_norm": 0.146462082862854,
      "learning_rate": 1e-06,
      "loss": -0.105,
      "num_tokens": 551130854.0,
      "reward": 0.691964328289032,
      "reward_std": 0.1449248045682907,
      "rewards/verify_math_reward/mean": 0.6919642686843872,
      "rewards/verify_math_reward/std": 0.4619392454624176,
      "step": 930
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1328125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3965.0,
      "completions/mean_length": 1116.10498046875,
      "completions/mean_terminated_length": 659.724609375,
      "completions/min_length": 170.0,
      "completions/min_terminated_length": 170.0,
      "epoch": 8.699708454810496,
      "grad_norm": 0.16851374506950378,
      "learning_rate": 1e-06,
      "loss": -0.0629,
      "num_tokens": 551736900.0,
      "reward": 0.6428571939468384,
      "reward_std": 0.18144892156124115,
      "rewards/verify_math_reward/mean": 0.6428571343421936,
      "rewards/verify_math_reward/std": 0.4794250428676605,
      "step": 931
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1138392857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3786.0,
      "completions/mean_length": 1011.4308471679688,
      "completions/mean_terminated_length": 615.17626953125,
      "completions/min_length": 140.0,
      "completions/min_terminated_length": 140.0,
      "epoch": 8.709037900874636,
      "grad_norm": 0.14060620963573456,
      "learning_rate": 1e-06,
      "loss": -0.0586,
      "num_tokens": 552318958.0,
      "reward": 0.6183035969734192,
      "reward_std": 0.12877096235752106,
      "rewards/verify_math_reward/mean": 0.6183035969734192,
      "rewards/verify_math_reward/std": 0.4860740303993225,
      "step": 932
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1160714285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3944.0,
      "completions/mean_length": 1045.56591796875,
      "completions/mean_terminated_length": 645.0037841796875,
      "completions/min_length": 153.0,
      "completions/min_terminated_length": 153.0,
      "epoch": 8.718367346938775,
      "grad_norm": 0.14821182191371918,
      "learning_rate": 1e-06,
      "loss": -0.0495,
      "num_tokens": 552918489.0,
      "reward": 0.6618303656578064,
      "reward_std": 0.14797163009643555,
      "rewards/verify_math_reward/mean": 0.6618303656578064,
      "rewards/verify_math_reward/std": 0.4733508229255676,
      "step": 933
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1339285714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3940.0,
      "completions/mean_length": 1086.122802734375,
      "completions/mean_terminated_length": 620.6777954101562,
      "completions/min_length": 180.0,
      "completions/min_terminated_length": 180.0,
      "epoch": 8.727696793002915,
      "grad_norm": 0.15176919102668762,
      "learning_rate": 1e-06,
      "loss": -0.0384,
      "num_tokens": 553486303.0,
      "reward": 0.59375,
      "reward_std": 0.1425604224205017,
      "rewards/verify_math_reward/mean": 0.59375,
      "rewards/verify_math_reward/std": 0.4914066195487976,
      "step": 934
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1227678571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3582.0,
      "completions/mean_length": 999.4006958007812,
      "completions/mean_terminated_length": 566.0343627929688,
      "completions/min_length": 99.0,
      "completions/min_terminated_length": 99.0,
      "epoch": 8.737026239067056,
      "grad_norm": 0.1681135594844818,
      "learning_rate": 1e-06,
      "loss": -0.0458,
      "num_tokens": 554010670.0,
      "reward": 0.676339328289032,
      "reward_std": 0.138991117477417,
      "rewards/verify_math_reward/mean": 0.6763392686843872,
      "rewards/verify_math_reward/std": 0.4681335985660553,
      "step": 935
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0982142857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2998.0,
      "completions/mean_length": 901.6607666015625,
      "completions/mean_terminated_length": 553.7623901367188,
      "completions/min_length": 164.0,
      "completions/min_terminated_length": 164.0,
      "epoch": 8.746355685131196,
      "grad_norm": 0.17645412683486938,
      "learning_rate": 1e-06,
      "loss": -0.0575,
      "num_tokens": 554546222.0,
      "reward": 0.660714328289032,
      "reward_std": 0.1832929402589798,
      "rewards/verify_math_reward/mean": 0.6607142686843872,
      "rewards/verify_math_reward/std": 0.4737313687801361,
      "step": 936
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0904017857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3279.0,
      "completions/mean_length": 902.8873291015625,
      "completions/mean_terminated_length": 585.5349731445312,
      "completions/min_length": 127.0,
      "completions/min_terminated_length": 127.0,
      "epoch": 8.755685131195335,
      "grad_norm": 0.15916696190834045,
      "learning_rate": 1e-06,
      "loss": -0.0621,
      "num_tokens": 555116649.0,
      "reward": 0.7031250596046448,
      "reward_std": 0.14684300124645233,
      "rewards/verify_math_reward/mean": 0.703125,
      "rewards/verify_math_reward/std": 0.4571361541748047,
      "step": 937
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1294642857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3678.0,
      "completions/mean_length": 1073.328125,
      "completions/mean_terminated_length": 623.8026123046875,
      "completions/min_length": 147.0,
      "completions/min_terminated_length": 147.0,
      "epoch": 8.765014577259475,
      "grad_norm": 0.16375704109668732,
      "learning_rate": 1e-06,
      "loss": -0.0714,
      "num_tokens": 555689887.0,
      "reward": 0.6618303656578064,
      "reward_std": 0.17212960124015808,
      "rewards/verify_math_reward/mean": 0.6618303656578064,
      "rewards/verify_math_reward/std": 0.4733508229255676,
      "step": 938
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0714285714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3306.0,
      "completions/mean_length": 778.216552734375,
      "completions/mean_terminated_length": 523.00244140625,
      "completions/min_length": 144.0,
      "completions/min_terminated_length": 144.0,
      "epoch": 8.774344023323614,
      "grad_norm": 0.14863629639148712,
      "learning_rate": 1e-06,
      "loss": -0.046,
      "num_tokens": 556209001.0,
      "reward": 0.7366071939468384,
      "reward_std": 0.12143944203853607,
      "rewards/verify_math_reward/mean": 0.7366071343421936,
      "rewards/verify_math_reward/std": 0.44071969389915466,
      "step": 939
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1138392857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3990.0,
      "completions/mean_length": 1018.8873291015625,
      "completions/mean_terminated_length": 623.5906372070312,
      "completions/min_length": 153.0,
      "completions/min_terminated_length": 153.0,
      "epoch": 8.783673469387756,
      "grad_norm": 0.13999132812023163,
      "learning_rate": 1e-06,
      "loss": -0.0294,
      "num_tokens": 556797812.0,
      "reward": 0.652901828289032,
      "reward_std": 0.1420711725950241,
      "rewards/verify_math_reward/mean": 0.6529017686843872,
      "rewards/verify_math_reward/std": 0.47631320357322693,
      "step": 940
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0892857142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2837.0,
      "completions/mean_length": 868.8917846679688,
      "completions/mean_terminated_length": 552.5086059570312,
      "completions/min_length": 149.0,
      "completions/min_terminated_length": 149.0,
      "epoch": 8.793002915451895,
      "grad_norm": 0.13161882758140564,
      "learning_rate": 1e-06,
      "loss": -0.0465,
      "num_tokens": 557327723.0,
      "reward": 0.7131696939468384,
      "reward_std": 0.12677791714668274,
      "rewards/verify_math_reward/mean": 0.7131696343421936,
      "rewards/verify_math_reward/std": 0.4525342881679535,
      "step": 941
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1238839285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3773.0,
      "completions/mean_length": 1007.5547485351562,
      "completions/mean_terminated_length": 570.8446044921875,
      "completions/min_length": 165.0,
      "completions/min_terminated_length": 165.0,
      "epoch": 8.802332361516035,
      "grad_norm": 0.13785584270954132,
      "learning_rate": 1e-06,
      "loss": -0.0311,
      "num_tokens": 557860932.0,
      "reward": 0.65625,
      "reward_std": 0.12317357957363129,
      "rewards/verify_math_reward/mean": 0.65625,
      "rewards/verify_math_reward/std": 0.4752241373062134,
      "step": 942
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0982142857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2927.0,
      "completions/mean_length": 923.950927734375,
      "completions/mean_terminated_length": 578.4801635742188,
      "completions/min_length": 139.0,
      "completions/min_terminated_length": 139.0,
      "epoch": 8.811661807580174,
      "grad_norm": 0.14556001126766205,
      "learning_rate": 1e-06,
      "loss": -0.0637,
      "num_tokens": 558418480.0,
      "reward": 0.6930803656578064,
      "reward_std": 0.1663813591003418,
      "rewards/verify_math_reward/mean": 0.6930803656578064,
      "rewards/verify_math_reward/std": 0.46147334575653076,
      "step": 943
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0948660714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4031.0,
      "completions/mean_length": 965.3951416015625,
      "completions/mean_terminated_length": 637.2799072265625,
      "completions/min_length": 147.0,
      "completions/min_terminated_length": 147.0,
      "epoch": 8.820991253644316,
      "grad_norm": 0.13868778944015503,
      "learning_rate": 1e-06,
      "loss": -0.0572,
      "num_tokens": 559020906.0,
      "reward": 0.6729910969734192,
      "reward_std": 0.12438002973794937,
      "rewards/verify_math_reward/mean": 0.6729910969734192,
      "rewards/verify_math_reward/std": 0.46938255429267883,
      "step": 944
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1194196428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4022.0,
      "completions/mean_length": 1029.735595703125,
      "completions/mean_terminated_length": 613.9049682617188,
      "completions/min_length": 140.0,
      "completions/min_terminated_length": 140.0,
      "epoch": 8.830320699708455,
      "grad_norm": 0.15609444677829742,
      "learning_rate": 1e-06,
      "loss": -0.0811,
      "num_tokens": 559589149.0,
      "reward": 0.6339285969734192,
      "reward_std": 0.16314797103405,
      "rewards/verify_math_reward/mean": 0.6339285969734192,
      "rewards/verify_math_reward/std": 0.48199835419654846,
      "step": 945
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1049107142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3523.0,
      "completions/mean_length": 939.0480346679688,
      "completions/mean_terminated_length": 569.0311889648438,
      "completions/min_length": 142.0,
      "completions/min_terminated_length": 142.0,
      "epoch": 8.839650145772595,
      "grad_norm": 0.15562525391578674,
      "learning_rate": 1e-06,
      "loss": -0.0571,
      "num_tokens": 560129256.0,
      "reward": 0.6819196939468384,
      "reward_std": 0.14846017956733704,
      "rewards/verify_math_reward/mean": 0.6819196343421936,
      "rewards/verify_math_reward/std": 0.46599099040031433,
      "step": 946
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.109375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2939.0,
      "completions/mean_length": 971.0000610351562,
      "completions/mean_terminated_length": 587.2280883789062,
      "completions/min_length": 173.0,
      "completions/min_terminated_length": 173.0,
      "epoch": 8.848979591836734,
      "grad_norm": 0.1636345386505127,
      "learning_rate": 1e-06,
      "loss": -0.0768,
      "num_tokens": 560680384.0,
      "reward": 0.6439732313156128,
      "reward_std": 0.1599937528371811,
      "rewards/verify_math_reward/mean": 0.6439732313156128,
      "rewards/verify_math_reward/std": 0.47909072041511536,
      "step": 947
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0926339285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3564.0,
      "completions/mean_length": 981.5234985351562,
      "completions/mean_terminated_length": 663.5633544921875,
      "completions/min_length": 111.0,
      "completions/min_terminated_length": 111.0,
      "epoch": 8.858309037900874,
      "grad_norm": 0.11569786816835403,
      "learning_rate": 1e-06,
      "loss": -0.0453,
      "num_tokens": 561309981.0,
      "reward": 0.6662946939468384,
      "reward_std": 0.13121412694454193,
      "rewards/verify_math_reward/mean": 0.6662946343421936,
      "rewards/verify_math_reward/std": 0.47179922461509705,
      "step": 948
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1171875,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3260.0,
      "completions/mean_length": 998.7254638671875,
      "completions/mean_terminated_length": 587.5828247070312,
      "completions/min_length": 119.0,
      "completions/min_terminated_length": 119.0,
      "epoch": 8.867638483965015,
      "grad_norm": 0.14304448664188385,
      "learning_rate": 1e-06,
      "loss": -0.0717,
      "num_tokens": 561861527.0,
      "reward": 0.676339328289032,
      "reward_std": 0.14444763958454132,
      "rewards/verify_math_reward/mean": 0.6763392686843872,
      "rewards/verify_math_reward/std": 0.4681335687637329,
      "step": 949
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1439732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2845.0,
      "completions/mean_length": 1089.8560791015625,
      "completions/mean_terminated_length": 584.2594604492188,
      "completions/min_length": 127.0,
      "completions/min_terminated_length": 127.0,
      "epoch": 8.876967930029155,
      "grad_norm": 0.1965818852186203,
      "learning_rate": 1e-06,
      "loss": -0.0607,
      "num_tokens": 562399710.0,
      "reward": 0.6540178656578064,
      "reward_std": 0.14687760174274445,
      "rewards/verify_math_reward/mean": 0.6540178656578064,
      "rewards/verify_math_reward/std": 0.4759531021118164,
      "step": 950
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1316964285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3841.0,
      "completions/mean_length": 1047.482177734375,
      "completions/mean_terminated_length": 585.1105346679688,
      "completions/min_length": 126.0,
      "completions/min_terminated_length": 126.0,
      "epoch": 8.886297376093294,
      "grad_norm": 0.15234307944774628,
      "learning_rate": 1e-06,
      "loss": -0.0609,
      "num_tokens": 562948966.0,
      "reward": 0.6328125,
      "reward_std": 0.13083434104919434,
      "rewards/verify_math_reward/mean": 0.6328125,
      "rewards/verify_math_reward/std": 0.48230743408203125,
      "step": 951
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1104910714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3128.0,
      "completions/mean_length": 988.5558471679688,
      "completions/mean_terminated_length": 602.5620727539062,
      "completions/min_length": 172.0,
      "completions/min_terminated_length": 172.0,
      "epoch": 8.895626822157434,
      "grad_norm": 0.13719192147254944,
      "learning_rate": 1e-06,
      "loss": -0.0582,
      "num_tokens": 563519256.0,
      "reward": 0.6417410969734192,
      "reward_std": 0.1342613846063614,
      "rewards/verify_math_reward/mean": 0.6417410969734192,
      "rewards/verify_math_reward/std": 0.47975656390190125,
      "step": 952
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1082589285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2888.0,
      "completions/mean_length": 960.536865234375,
      "completions/mean_terminated_length": 579.8861083984375,
      "completions/min_length": 157.0,
      "completions/min_terminated_length": 157.0,
      "epoch": 8.904956268221575,
      "grad_norm": 0.20323546230793,
      "learning_rate": 1e-06,
      "loss": -0.0396,
      "num_tokens": 564073761.0,
      "reward": 0.645089328289032,
      "reward_std": 0.19314108788967133,
      "rewards/verify_math_reward/mean": 0.6450892686843872,
      "rewards/verify_math_reward/std": 0.4787535071372986,
      "step": 953
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0904017857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2772.0,
      "completions/mean_length": 900.1730346679688,
      "completions/mean_terminated_length": 582.5509033203125,
      "completions/min_length": 146.0,
      "completions/min_terminated_length": 146.0,
      "epoch": 8.914285714285715,
      "grad_norm": 0.17294132709503174,
      "learning_rate": 1e-06,
      "loss": -0.0444,
      "num_tokens": 564639772.0,
      "reward": 0.6975446939468384,
      "reward_std": 0.17066673934459686,
      "rewards/verify_math_reward/mean": 0.6975446343421936,
      "rewards/verify_math_reward/std": 0.45957788825035095,
      "step": 954
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1026785714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4063.0,
      "completions/mean_length": 971.8906860351562,
      "completions/mean_terminated_length": 614.4054565429688,
      "completions/min_length": 111.0,
      "completions/min_terminated_length": 111.0,
      "epoch": 8.923615160349854,
      "grad_norm": 0.14424920082092285,
      "learning_rate": 1e-06,
      "loss": -0.0544,
      "num_tokens": 565217474.0,
      "reward": 0.7109375596046448,
      "reward_std": 0.13940481841564178,
      "rewards/verify_math_reward/mean": 0.7109375,
      "rewards/verify_math_reward/std": 0.45358020067214966,
      "step": 955
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1227678571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4051.0,
      "completions/mean_length": 1035.2757568359375,
      "completions/mean_terminated_length": 606.9300537109375,
      "completions/min_length": 165.0,
      "completions/min_terminated_length": 165.0,
      "epoch": 8.932944606413994,
      "grad_norm": 0.1515893191099167,
      "learning_rate": 1e-06,
      "loss": -0.0562,
      "num_tokens": 565784753.0,
      "reward": 0.6439732313156128,
      "reward_std": 0.14887316524982452,
      "rewards/verify_math_reward/mean": 0.6439732313156128,
      "rewards/verify_math_reward/std": 0.47909072041511536,
      "step": 956
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.15625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3463.0,
      "completions/mean_length": 1158.1328125,
      "completions/mean_terminated_length": 614.0833129882812,
      "completions/min_length": 185.0,
      "completions/min_terminated_length": 185.0,
      "epoch": 8.942274052478133,
      "grad_norm": 0.16238532960414886,
      "learning_rate": 1e-06,
      "loss": -0.0726,
      "num_tokens": 566340680.0,
      "reward": 0.5959821939468384,
      "reward_std": 0.13380561769008636,
      "rewards/verify_math_reward/mean": 0.5959821343421936,
      "rewards/verify_math_reward/std": 0.490975022315979,
      "step": 957
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1160714285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3308.0,
      "completions/mean_length": 1034.575927734375,
      "completions/mean_terminated_length": 632.5706787109375,
      "completions/min_length": 116.0,
      "completions/min_terminated_length": 116.0,
      "epoch": 8.951603498542275,
      "grad_norm": 0.16529758274555206,
      "learning_rate": 1e-06,
      "loss": -0.0535,
      "num_tokens": 566936924.0,
      "reward": 0.6383928656578064,
      "reward_std": 0.1720893532037735,
      "rewards/verify_math_reward/mean": 0.6383928656578064,
      "rewards/verify_math_reward/std": 0.4807341992855072,
      "step": 958
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3321.0,
      "completions/mean_length": 1027.1082763671875,
      "completions/mean_terminated_length": 588.6951293945312,
      "completions/min_length": 163.0,
      "completions/min_terminated_length": 163.0,
      "epoch": 8.960932944606414,
      "grad_norm": 0.14592179656028748,
      "learning_rate": 1e-06,
      "loss": -0.0565,
      "num_tokens": 567489669.0,
      "reward": 0.6964285969734192,
      "reward_std": 0.13425210118293762,
      "rewards/verify_math_reward/mean": 0.6964285969734192,
      "rewards/verify_math_reward/std": 0.4600565731525421,
      "step": 959
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3533.0,
      "completions/mean_length": 1098.482177734375,
      "completions/mean_terminated_length": 670.2653198242188,
      "completions/min_length": 160.0,
      "completions/min_terminated_length": 160.0,
      "epoch": 8.970262390670554,
      "grad_norm": 0.14832952618598938,
      "learning_rate": 1e-06,
      "loss": -0.0602,
      "num_tokens": 568111877.0,
      "reward": 0.676339328289032,
      "reward_std": 0.16183707118034363,
      "rewards/verify_math_reward/mean": 0.6763392686843872,
      "rewards/verify_math_reward/std": 0.4681335985660553,
      "step": 960
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0904017857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2840.0,
      "completions/mean_length": 917.3873291015625,
      "completions/mean_terminated_length": 601.47607421875,
      "completions/min_length": 136.0,
      "completions/min_terminated_length": 136.0,
      "epoch": 8.979591836734693,
      "grad_norm": 0.14388814568519592,
      "learning_rate": 1e-06,
      "loss": -0.0588,
      "num_tokens": 568690640.0,
      "reward": 0.707589328289032,
      "reward_std": 0.1379055380821228,
      "rewards/verify_math_reward/mean": 0.7075892686843872,
      "rewards/verify_math_reward/std": 0.45512402057647705,
      "step": 961
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1328125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2771.0,
      "completions/mean_length": 1085.079345703125,
      "completions/mean_terminated_length": 623.947265625,
      "completions/min_length": 138.0,
      "completions/min_terminated_length": 138.0,
      "epoch": 8.988921282798835,
      "grad_norm": 0.15324236452579498,
      "learning_rate": 1e-06,
      "loss": -0.0331,
      "num_tokens": 569272719.0,
      "reward": 0.6339285969734192,
      "reward_std": 0.15709525346755981,
      "rewards/verify_math_reward/mean": 0.6339285969734192,
      "rewards/verify_math_reward/std": 0.48199835419654846,
      "step": 962
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.10795454545454541,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3484.0,
      "completions/mean_length": 1034.82958984375,
      "completions/mean_terminated_length": 664.3694458007812,
      "completions/min_length": 179.0,
      "completions/min_terminated_length": 179.0,
      "epoch": 8.998250728862974,
      "grad_norm": 0.14960594475269318,
      "learning_rate": 1e-06,
      "loss": -0.0857,
      "num_tokens": 569898578.0,
      "reward": 0.6127232313156128,
      "reward_std": 0.15454721450805664,
      "rewards/verify_math_reward/mean": 0.6127232313156128,
      "rewards/verify_math_reward/std": 0.4873998463153839,
      "step": 963
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1138392857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3632.0,
      "completions/mean_length": 1084.6842041015625,
      "completions/mean_terminated_length": 697.8400268554688,
      "completions/min_length": 134.0,
      "completions/min_terminated_length": 134.0,
      "epoch": 9.00932944606414,
      "grad_norm": 0.13798940181732178,
      "learning_rate": 1e-06,
      "loss": -0.0505,
      "num_tokens": 570544871.0,
      "reward": 0.6640625,
      "reward_std": 0.1573990434408188,
      "rewards/verify_math_reward/mean": 0.6640625,
      "rewards/verify_math_reward/std": 0.4725809693336487,
      "step": 964
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1272321428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3912.0,
      "completions/mean_length": 1014.8527221679688,
      "completions/mean_terminated_length": 565.682861328125,
      "completions/min_length": 126.0,
      "completions/min_terminated_length": 126.0,
      "epoch": 9.018658892128279,
      "grad_norm": 0.1554582715034485,
      "learning_rate": 1e-06,
      "loss": -0.0397,
      "num_tokens": 571073219.0,
      "reward": 0.6897321939468384,
      "reward_std": 0.11239181458950043,
      "rewards/verify_math_reward/mean": 0.6897321343421936,
      "rewards/verify_math_reward/std": 0.4628615975379944,
      "step": 965
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0904017857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3055.0,
      "completions/mean_length": 910.2723388671875,
      "completions/mean_terminated_length": 593.6539916992188,
      "completions/min_length": 146.0,
      "completions/min_terminated_length": 146.0,
      "epoch": 9.02798833819242,
      "grad_norm": 0.16948619484901428,
      "learning_rate": 1e-06,
      "loss": -0.0649,
      "num_tokens": 571653719.0,
      "reward": 0.6819196939468384,
      "reward_std": 0.15529540181159973,
      "rewards/verify_math_reward/mean": 0.6819196343421936,
      "rewards/verify_math_reward/std": 0.46599099040031433,
      "step": 966
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1339285714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3826.0,
      "completions/mean_length": 1083.469970703125,
      "completions/mean_terminated_length": 617.6146850585938,
      "completions/min_length": 153.0,
      "completions/min_terminated_length": 153.0,
      "epoch": 9.03731778425656,
      "grad_norm": 0.15437005460262299,
      "learning_rate": 1e-06,
      "loss": -0.058,
      "num_tokens": 572223388.0,
      "reward": 0.6428571939468384,
      "reward_std": 0.15785479545593262,
      "rewards/verify_math_reward/mean": 0.6428571343421936,
      "rewards/verify_math_reward/std": 0.47942501306533813,
      "step": 967
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1328125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3944.0,
      "completions/mean_length": 1079.0848388671875,
      "completions/mean_terminated_length": 617.0347290039062,
      "completions/min_length": 174.0,
      "completions/min_terminated_length": 174.0,
      "epoch": 9.0466472303207,
      "grad_norm": 0.16558527946472168,
      "learning_rate": 1e-06,
      "loss": -0.0653,
      "num_tokens": 572808672.0,
      "reward": 0.6127232313156128,
      "reward_std": 0.16784563660621643,
      "rewards/verify_math_reward/mean": 0.6127232313156128,
      "rewards/verify_math_reward/std": 0.4873998463153839,
      "step": 968
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1071428571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3676.0,
      "completions/mean_length": 991.9810791015625,
      "completions/mean_terminated_length": 619.4987182617188,
      "completions/min_length": 161.0,
      "completions/min_terminated_length": 161.0,
      "epoch": 9.055976676384839,
      "grad_norm": 0.1664285659790039,
      "learning_rate": 1e-06,
      "loss": -0.0499,
      "num_tokens": 573388911.0,
      "reward": 0.707589328289032,
      "reward_std": 0.16308125853538513,
      "rewards/verify_math_reward/mean": 0.7075892686843872,
      "rewards/verify_math_reward/std": 0.45512402057647705,
      "step": 969
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1004464285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2918.0,
      "completions/mean_length": 912.30810546875,
      "completions/mean_terminated_length": 556.8088989257812,
      "completions/min_length": 132.0,
      "completions/min_terminated_length": 132.0,
      "epoch": 9.06530612244898,
      "grad_norm": 0.1441843956708908,
      "learning_rate": 1e-06,
      "loss": -0.0377,
      "num_tokens": 573922227.0,
      "reward": 0.7053571939468384,
      "reward_std": 0.13760104775428772,
      "rewards/verify_math_reward/mean": 0.7053571343421936,
      "rewards/verify_math_reward/std": 0.45613667368888855,
      "step": 970
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1417410714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3922.0,
      "completions/mean_length": 1120.3148193359375,
      "completions/mean_terminated_length": 628.8816528320312,
      "completions/min_length": 129.0,
      "completions/min_terminated_length": 129.0,
      "epoch": 9.07463556851312,
      "grad_norm": 0.1594502031803131,
      "learning_rate": 1e-06,
      "loss": -0.0444,
      "num_tokens": 574498245.0,
      "reward": 0.6339285969734192,
      "reward_std": 0.1505335569381714,
      "rewards/verify_math_reward/mean": 0.6339285969734192,
      "rewards/verify_math_reward/std": 0.48199835419654846,
      "step": 971
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0915178571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3337.0,
      "completions/mean_length": 918.4766235351562,
      "completions/mean_terminated_length": 598.382080078125,
      "completions/min_length": 161.0,
      "completions/min_terminated_length": 161.0,
      "epoch": 9.08396501457726,
      "grad_norm": 0.13108834624290466,
      "learning_rate": 1e-06,
      "loss": -0.0321,
      "num_tokens": 575068248.0,
      "reward": 0.7131696939468384,
      "reward_std": 0.1353137493133545,
      "rewards/verify_math_reward/mean": 0.7131696343421936,
      "rewards/verify_math_reward/std": 0.4525342881679535,
      "step": 972
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0926339285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2631.0,
      "completions/mean_length": 938.8582763671875,
      "completions/mean_terminated_length": 616.5424194335938,
      "completions/min_length": 127.0,
      "completions/min_terminated_length": 127.0,
      "epoch": 9.093294460641399,
      "grad_norm": 0.1505901962518692,
      "learning_rate": 1e-06,
      "loss": -0.0466,
      "num_tokens": 575657521.0,
      "reward": 0.6930803656578064,
      "reward_std": 0.15691189467906952,
      "rewards/verify_math_reward/mean": 0.6930803656578064,
      "rewards/verify_math_reward/std": 0.46147334575653076,
      "step": 973
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1305803571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2541.0,
      "completions/mean_length": 1112.43310546875,
      "completions/mean_terminated_length": 664.323486328125,
      "completions/min_length": 156.0,
      "completions/min_terminated_length": 156.0,
      "epoch": 9.102623906705539,
      "grad_norm": 0.14574332535266876,
      "learning_rate": 1e-06,
      "loss": -0.0251,
      "num_tokens": 576255141.0,
      "reward": 0.6383928656578064,
      "reward_std": 0.1285124570131302,
      "rewards/verify_math_reward/mean": 0.6383928656578064,
      "rewards/verify_math_reward/std": 0.4807341694831848,
      "step": 974
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1328125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3185.0,
      "completions/mean_length": 1083.8695068359375,
      "completions/mean_terminated_length": 622.5521240234375,
      "completions/min_length": 153.0,
      "completions/min_terminated_length": 153.0,
      "epoch": 9.11195335276968,
      "grad_norm": 0.1633671373128891,
      "learning_rate": 1e-06,
      "loss": -0.0821,
      "num_tokens": 576844376.0,
      "reward": 0.6227678656578064,
      "reward_std": 0.15785479545593262,
      "rewards/verify_math_reward/mean": 0.6227678656578064,
      "rewards/verify_math_reward/std": 0.4849644899368286,
      "step": 975
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1495535714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3928.0,
      "completions/mean_length": 1111.6507568359375,
      "completions/mean_terminated_length": 586.8438110351562,
      "completions/min_length": 194.0,
      "completions/min_terminated_length": 194.0,
      "epoch": 9.12128279883382,
      "grad_norm": 0.14949971437454224,
      "learning_rate": 1e-06,
      "loss": -0.0462,
      "num_tokens": 577383727.0,
      "reward": 0.6171875,
      "reward_std": 0.14458921551704407,
      "rewards/verify_math_reward/mean": 0.6171875,
      "rewards/verify_math_reward/std": 0.4863446056842804,
      "step": 976
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1149553571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3850.0,
      "completions/mean_length": 1097.341552734375,
      "completions/mean_terminated_length": 707.8562622070312,
      "completions/min_length": 158.0,
      "completions/min_terminated_length": 158.0,
      "epoch": 9.130612244897959,
      "grad_norm": 0.14869371056556702,
      "learning_rate": 1e-06,
      "loss": -0.0469,
      "num_tokens": 578038009.0,
      "reward": 0.625,
      "reward_std": 0.17559193074703217,
      "rewards/verify_math_reward/mean": 0.625,
      "rewards/verify_math_reward/std": 0.48439329862594604,
      "step": 977
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.140625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3343.0,
      "completions/mean_length": 1100.8192138671875,
      "completions/mean_terminated_length": 610.6986694335938,
      "completions/min_length": 129.0,
      "completions/min_terminated_length": 129.0,
      "epoch": 9.139941690962099,
      "grad_norm": 0.14077144861221313,
      "learning_rate": 1e-06,
      "loss": -0.0513,
      "num_tokens": 578597015.0,
      "reward": 0.6729910969734192,
      "reward_std": 0.12400025129318237,
      "rewards/verify_math_reward/mean": 0.6729910969734192,
      "rewards/verify_math_reward/std": 0.46938255429267883,
      "step": 978
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1283482142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2632.0,
      "completions/mean_length": 1064.946533203125,
      "completions/mean_terminated_length": 618.6325073242188,
      "completions/min_length": 159.0,
      "completions/min_terminated_length": 159.0,
      "epoch": 9.14927113702624,
      "grad_norm": 0.15716727077960968,
      "learning_rate": 1e-06,
      "loss": -0.0558,
      "num_tokens": 579174959.0,
      "reward": 0.6718750596046448,
      "reward_std": 0.14042328298091888,
      "rewards/verify_math_reward/mean": 0.671875,
      "rewards/verify_math_reward/std": 0.46979284286499023,
      "step": 979
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1261160714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3250.0,
      "completions/mean_length": 1080.828125,
      "completions/mean_terminated_length": 645.6883544921875,
      "completions/min_length": 158.0,
      "completions/min_terminated_length": 158.0,
      "epoch": 9.15860058309038,
      "grad_norm": 0.17431092262268066,
      "learning_rate": 1e-06,
      "loss": -0.0813,
      "num_tokens": 579771733.0,
      "reward": 0.6729910969734192,
      "reward_std": 0.19831563532352448,
      "rewards/verify_math_reward/mean": 0.6729910969734192,
      "rewards/verify_math_reward/std": 0.46938255429267883,
      "step": 980
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1171875,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3907.0,
      "completions/mean_length": 1025.091552734375,
      "completions/mean_terminated_length": 617.4487915039062,
      "completions/min_length": 152.0,
      "completions/min_terminated_length": 152.0,
      "epoch": 9.167930029154519,
      "grad_norm": 0.15481217205524445,
      "learning_rate": 1e-06,
      "loss": -0.072,
      "num_tokens": 580357559.0,
      "reward": 0.6462053656578064,
      "reward_std": 0.1678135246038437,
      "rewards/verify_math_reward/mean": 0.6462053656578064,
      "rewards/verify_math_reward/std": 0.478413462638855,
      "step": 981
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.078125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2707.0,
      "completions/mean_length": 874.5625610351562,
      "completions/mean_terminated_length": 601.559326171875,
      "completions/min_length": 132.0,
      "completions/min_terminated_length": 132.0,
      "epoch": 9.177259475218658,
      "grad_norm": 0.14269818365573883,
      "learning_rate": 1e-06,
      "loss": -0.0474,
      "num_tokens": 580933903.0,
      "reward": 0.6707589626312256,
      "reward_std": 0.1479741632938385,
      "rewards/verify_math_reward/mean": 0.6707589030265808,
      "rewards/verify_math_reward/std": 0.4702001214027405,
      "step": 982
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0669642857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3637.0,
      "completions/mean_length": 839.9397583007812,
      "completions/mean_terminated_length": 606.2511596679688,
      "completions/min_length": 176.0,
      "completions/min_terminated_length": 176.0,
      "epoch": 9.186588921282798,
      "grad_norm": 0.14914286136627197,
      "learning_rate": 1e-06,
      "loss": -0.0626,
      "num_tokens": 581526081.0,
      "reward": 0.7522321939468384,
      "reward_std": 0.16788700222969055,
      "rewards/verify_math_reward/mean": 0.7522321343421936,
      "rewards/verify_math_reward/std": 0.4319573938846588,
      "step": 983
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1238839285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3861.0,
      "completions/mean_length": 1065.5692138671875,
      "completions/mean_terminated_length": 637.0624389648438,
      "completions/min_length": 153.0,
      "completions/min_terminated_length": 153.0,
      "epoch": 9.19591836734694,
      "grad_norm": 0.14959770441055298,
      "learning_rate": 1e-06,
      "loss": -0.0521,
      "num_tokens": 582115927.0,
      "reward": 0.6428571939468384,
      "reward_std": 0.16266827285289764,
      "rewards/verify_math_reward/mean": 0.6428571343421936,
      "rewards/verify_math_reward/std": 0.4794250428676605,
      "step": 984
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.046875,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3236.0,
      "completions/mean_length": 747.8939819335938,
      "completions/mean_terminated_length": 583.2329711914062,
      "completions/min_length": 126.0,
      "completions/min_terminated_length": 126.0,
      "epoch": 9.205247813411079,
      "grad_norm": 0.13443413376808167,
      "learning_rate": 1e-06,
      "loss": -0.0214,
      "num_tokens": 582685680.0,
      "reward": 0.7444196939468384,
      "reward_std": 0.1287727802991867,
      "rewards/verify_math_reward/mean": 0.7444196343421936,
      "rewards/verify_math_reward/std": 0.43643051385879517,
      "step": 985
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0703125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2657.0,
      "completions/mean_length": 821.0814819335938,
      "completions/mean_terminated_length": 573.3985595703125,
      "completions/min_length": 137.0,
      "completions/min_terminated_length": 137.0,
      "epoch": 9.214577259475218,
      "grad_norm": 0.13425439596176147,
      "learning_rate": 1e-06,
      "loss": -0.0545,
      "num_tokens": 583248905.0,
      "reward": 0.754464328289032,
      "reward_std": 0.11914923042058945,
      "rewards/verify_math_reward/mean": 0.7544642686843872,
      "rewards/verify_math_reward/std": 0.43064478039741516,
      "step": 986
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1584821428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3953.0,
      "completions/mean_length": 1207.0413818359375,
      "completions/mean_terminated_length": 662.9668579101562,
      "completions/min_length": 155.0,
      "completions/min_terminated_length": 155.0,
      "epoch": 9.223906705539358,
      "grad_norm": 0.15486279129981995,
      "learning_rate": 1e-06,
      "loss": -0.0709,
      "num_tokens": 583848238.0,
      "reward": 0.6160714626312256,
      "reward_std": 0.1367775946855545,
      "rewards/verify_math_reward/mean": 0.6160714030265808,
      "rewards/verify_math_reward/std": 0.486612468957901,
      "step": 987
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1138392857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3059.0,
      "completions/mean_length": 1071.575927734375,
      "completions/mean_terminated_length": 683.0478515625,
      "completions/min_length": 154.0,
      "completions/min_terminated_length": 154.0,
      "epoch": 9.2332361516035,
      "grad_norm": 0.14331471920013428,
      "learning_rate": 1e-06,
      "loss": -0.062,
      "num_tokens": 584490458.0,
      "reward": 0.640625,
      "reward_std": 0.16308125853538513,
      "rewards/verify_math_reward/mean": 0.640625,
      "rewards/verify_math_reward/std": 0.48008525371551514,
      "step": 988
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1026785714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3276.0,
      "completions/mean_length": 939.4933471679688,
      "completions/mean_terminated_length": 578.3009643554688,
      "completions/min_length": 147.0,
      "completions/min_terminated_length": 147.0,
      "epoch": 9.242565597667639,
      "grad_norm": 0.17624999582767487,
      "learning_rate": 1e-06,
      "loss": -0.052,
      "num_tokens": 585039044.0,
      "reward": 0.707589328289032,
      "reward_std": 0.14800554513931274,
      "rewards/verify_math_reward/mean": 0.7075892686843872,
      "rewards/verify_math_reward/std": 0.45512402057647705,
      "step": 989
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1104910714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4046.0,
      "completions/mean_length": 986.411865234375,
      "completions/mean_terminated_length": 600.1517944335938,
      "completions/min_length": 127.0,
      "completions/min_terminated_length": 127.0,
      "epoch": 9.251895043731778,
      "grad_norm": 0.16277343034744263,
      "learning_rate": 1e-06,
      "loss": -0.0733,
      "num_tokens": 585614749.0,
      "reward": 0.6662946939468384,
      "reward_std": 0.1639414280653,
      "rewards/verify_math_reward/mean": 0.6662946343421936,
      "rewards/verify_math_reward/std": 0.47179925441741943,
      "step": 990
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0993303571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2801.0,
      "completions/mean_length": 942.0614013671875,
      "completions/mean_terminated_length": 594.229248046875,
      "completions/min_length": 141.0,
      "completions/min_terminated_length": 141.0,
      "epoch": 9.261224489795918,
      "grad_norm": 0.1278885006904602,
      "learning_rate": 1e-06,
      "loss": -0.02,
      "num_tokens": 586196420.0,
      "reward": 0.6350446939468384,
      "reward_std": 0.1173504963517189,
      "rewards/verify_math_reward/mean": 0.6350446343421936,
      "rewards/verify_math_reward/std": 0.481686532497406,
      "step": 991
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1104910714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3554.0,
      "completions/mean_length": 1033.33935546875,
      "completions/mean_terminated_length": 652.9083862304688,
      "completions/min_length": 131.0,
      "completions/min_terminated_length": 131.0,
      "epoch": 9.270553935860057,
      "grad_norm": 0.16274511814117432,
      "learning_rate": 1e-06,
      "loss": -0.0916,
      "num_tokens": 586803124.0,
      "reward": 0.637276828289032,
      "reward_std": 0.1754392832517624,
      "rewards/verify_math_reward/mean": 0.6372767686843872,
      "rewards/verify_math_reward/std": 0.481054425239563,
      "step": 992
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1149553571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3278.0,
      "completions/mean_length": 1030.6138916015625,
      "completions/mean_terminated_length": 632.4615478515625,
      "completions/min_length": 166.0,
      "completions/min_terminated_length": 166.0,
      "epoch": 9.279883381924199,
      "grad_norm": 0.13017813861370087,
      "learning_rate": 1e-06,
      "loss": -0.0704,
      "num_tokens": 587403874.0,
      "reward": 0.6729910969734192,
      "reward_std": 0.1233583390712738,
      "rewards/verify_math_reward/mean": 0.6729910969734192,
      "rewards/verify_math_reward/std": 0.46938255429267883,
      "step": 993
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1127232142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3528.0,
      "completions/mean_length": 1033.0770263671875,
      "completions/mean_terminated_length": 643.950927734375,
      "completions/min_length": 164.0,
      "completions/min_terminated_length": 164.0,
      "epoch": 9.289212827988338,
      "grad_norm": 0.1934451460838318,
      "learning_rate": 1e-06,
      "loss": -0.0575,
      "num_tokens": 587998991.0,
      "reward": 0.707589328289032,
      "reward_std": 0.1688666045665741,
      "rewards/verify_math_reward/mean": 0.7075892686843872,
      "rewards/verify_math_reward/std": 0.45512402057647705,
      "step": 994
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0970982142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3972.0,
      "completions/mean_length": 945.72998046875,
      "completions/mean_terminated_length": 606.9493408203125,
      "completions/min_length": 165.0,
      "completions/min_terminated_length": 165.0,
      "epoch": 9.298542274052478,
      "grad_norm": 0.14389818906784058,
      "learning_rate": 1e-06,
      "loss": -0.0465,
      "num_tokens": 588582557.0,
      "reward": 0.6819196939468384,
      "reward_std": 0.13639964163303375,
      "rewards/verify_math_reward/mean": 0.6819196343421936,
      "rewards/verify_math_reward/std": 0.46599099040031433,
      "step": 995
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0848214285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4025.0,
      "completions/mean_length": 906.7779541015625,
      "completions/mean_terminated_length": 611.1914672851562,
      "completions/min_length": 158.0,
      "completions/min_terminated_length": 158.0,
      "epoch": 9.307871720116617,
      "grad_norm": 0.13750776648521423,
      "learning_rate": 1e-06,
      "loss": -0.0147,
      "num_tokens": 589176174.0,
      "reward": 0.6886160969734192,
      "reward_std": 0.11355367302894592,
      "rewards/verify_math_reward/mean": 0.6886160969734192,
      "rewards/verify_math_reward/std": 0.46331799030303955,
      "step": 996
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0837053571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3042.0,
      "completions/mean_length": 905.4319458007812,
      "completions/mean_terminated_length": 613.9671630859375,
      "completions/min_length": 138.0,
      "completions/min_terminated_length": 138.0,
      "epoch": 9.317201166180759,
      "grad_norm": 0.16331742703914642,
      "learning_rate": 1e-06,
      "loss": -0.0443,
      "num_tokens": 589767569.0,
      "reward": 0.6852678656578064,
      "reward_std": 0.18325723707675934,
      "rewards/verify_math_reward/mean": 0.6852678656578064,
      "rewards/verify_math_reward/std": 0.46466848254203796,
      "step": 997
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0814732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3535.0,
      "completions/mean_length": 891.9029541015625,
      "completions/mean_terminated_length": 607.6998901367188,
      "completions/min_length": 98.0,
      "completions/min_terminated_length": 98.0,
      "epoch": 9.326530612244898,
      "grad_norm": 0.1405782550573349,
      "learning_rate": 1e-06,
      "loss": -0.0219,
      "num_tokens": 590345418.0,
      "reward": 0.6863839626312256,
      "reward_std": 0.14417481422424316,
      "rewards/verify_math_reward/mean": 0.6863839030265808,
      "rewards/verify_math_reward/std": 0.46422141790390015,
      "step": 998
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0904017857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2858.0,
      "completions/mean_length": 855.7332763671875,
      "completions/mean_terminated_length": 533.6944580078125,
      "completions/min_length": 122.0,
      "completions/min_terminated_length": 122.0,
      "epoch": 9.335860058309038,
      "grad_norm": 0.14971332252025604,
      "learning_rate": 1e-06,
      "loss": -0.044,
      "num_tokens": 590871755.0,
      "reward": 0.7042410969734192,
      "reward_std": 0.14771313965320587,
      "rewards/verify_math_reward/mean": 0.7042410969734192,
      "rewards/verify_math_reward/std": 0.45663803815841675,
      "step": 999
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0904017857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3361.0,
      "completions/mean_length": 931.8750610351562,
      "completions/mean_terminated_length": 617.4036865234375,
      "completions/min_length": 169.0,
      "completions/min_terminated_length": 169.0,
      "epoch": 9.345189504373177,
      "grad_norm": 0.1444764882326126,
      "learning_rate": 1e-06,
      "loss": -0.0138,
      "num_tokens": 591469659.0,
      "reward": 0.6830357313156128,
      "reward_std": 0.11971446871757507,
      "rewards/verify_math_reward/mean": 0.6830357313156128,
      "rewards/verify_math_reward/std": 0.46555325388908386,
      "step": 1000
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1529017857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3928.0,
      "completions/mean_length": 1138.9129638671875,
      "completions/mean_terminated_length": 605.1567993164062,
      "completions/min_length": 180.0,
      "completions/min_terminated_length": 180.0,
      "epoch": 9.354518950437317,
      "grad_norm": 0.16810676455497742,
      "learning_rate": 1e-06,
      "loss": -0.0831,
      "num_tokens": 592022821.0,
      "reward": 0.6741071939468384,
      "reward_std": 0.1713821291923523,
      "rewards/verify_math_reward/mean": 0.6741071343421936,
      "rewards/verify_math_reward/std": 0.46896928548812866,
      "step": 1001
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1216517857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4012.0,
      "completions/mean_length": 1000.6585083007812,
      "completions/mean_terminated_length": 571.95166015625,
      "completions/min_length": 147.0,
      "completions/min_terminated_length": 147.0,
      "epoch": 9.363848396501458,
      "grad_norm": 0.1481969803571701,
      "learning_rate": 1e-06,
      "loss": -0.0379,
      "num_tokens": 592555651.0,
      "reward": 0.6674107313156128,
      "reward_std": 0.1141170859336853,
      "rewards/verify_math_reward/mean": 0.6674107313156128,
      "rewards/verify_math_reward/std": 0.47140392661094666,
      "step": 1002
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1272321428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2707.0,
      "completions/mean_length": 1031.969970703125,
      "completions/mean_terminated_length": 585.29541015625,
      "completions/min_length": 136.0,
      "completions/min_terminated_length": 136.0,
      "epoch": 9.373177842565598,
      "grad_norm": 0.15297362208366394,
      "learning_rate": 1e-06,
      "loss": -0.0702,
      "num_tokens": 593101040.0,
      "reward": 0.6428571939468384,
      "reward_std": 0.14207187294960022,
      "rewards/verify_math_reward/mean": 0.6428571343421936,
      "rewards/verify_math_reward/std": 0.4794250428676605,
      "step": 1003
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1082589285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2803.0,
      "completions/mean_length": 1000.8984985351562,
      "completions/mean_terminated_length": 625.147705078125,
      "completions/min_length": 169.0,
      "completions/min_terminated_length": 169.0,
      "epoch": 9.382507288629737,
      "grad_norm": 0.14825621247291565,
      "learning_rate": 1e-06,
      "loss": -0.0752,
      "num_tokens": 593691989.0,
      "reward": 0.6573660969734192,
      "reward_std": 0.1544409692287445,
      "rewards/verify_math_reward/mean": 0.6573660969734192,
      "rewards/verify_math_reward/std": 0.47485533356666565,
      "step": 1004
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1473214285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3943.0,
      "completions/mean_length": 1137.28466796875,
      "completions/mean_terminated_length": 626.0929565429688,
      "completions/min_length": 146.0,
      "completions/min_terminated_length": 146.0,
      "epoch": 9.391836734693877,
      "grad_norm": 0.13579760491847992,
      "learning_rate": 1e-06,
      "loss": -0.0352,
      "num_tokens": 594255356.0,
      "reward": 0.6640625,
      "reward_std": 0.10043821483850479,
      "rewards/verify_math_reward/mean": 0.6640625,
      "rewards/verify_math_reward/std": 0.4725809693336487,
      "step": 1005
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1049107142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3277.0,
      "completions/mean_length": 1002.0279541015625,
      "completions/mean_terminated_length": 639.3927612304688,
      "completions/min_length": 141.0,
      "completions/min_terminated_length": 141.0,
      "epoch": 9.401166180758018,
      "grad_norm": 0.14921943843364716,
      "learning_rate": 1e-06,
      "loss": -0.0322,
      "num_tokens": 594856757.0,
      "reward": 0.6540178656578064,
      "reward_std": 0.13083365559577942,
      "rewards/verify_math_reward/mean": 0.6540178656578064,
      "rewards/verify_math_reward/std": 0.4759531021118164,
      "step": 1006
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1484375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3523.0,
      "completions/mean_length": 1081.48779296875,
      "completions/mean_terminated_length": 556.0222778320312,
      "completions/min_length": 163.0,
      "completions/min_terminated_length": 163.0,
      "epoch": 9.410495626822158,
      "grad_norm": 0.16988012194633484,
      "learning_rate": 1e-06,
      "loss": -0.0603,
      "num_tokens": 595369450.0,
      "reward": 0.6718750596046448,
      "reward_std": 0.1468455195426941,
      "rewards/verify_math_reward/mean": 0.671875,
      "rewards/verify_math_reward/std": 0.46979284286499023,
      "step": 1007
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1160714285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3736.0,
      "completions/mean_length": 1034.6898193359375,
      "completions/mean_terminated_length": 632.6995239257812,
      "completions/min_length": 161.0,
      "completions/min_terminated_length": 161.0,
      "epoch": 9.419825072886297,
      "grad_norm": 0.15362633764743805,
      "learning_rate": 1e-06,
      "loss": -0.0353,
      "num_tokens": 595959132.0,
      "reward": 0.629464328289032,
      "reward_std": 0.13554087281227112,
      "rewards/verify_math_reward/mean": 0.6294642686843872,
      "rewards/verify_math_reward/std": 0.4832179844379425,
      "step": 1008
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1160714285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3153.0,
      "completions/mean_length": 1046.751220703125,
      "completions/mean_terminated_length": 646.3447265625,
      "completions/min_length": 158.0,
      "completions/min_terminated_length": 158.0,
      "epoch": 9.429154518950437,
      "grad_norm": 0.13277749717235565,
      "learning_rate": 1e-06,
      "loss": -0.0552,
      "num_tokens": 596558869.0,
      "reward": 0.6718750596046448,
      "reward_std": 0.12993352115154266,
      "rewards/verify_math_reward/mean": 0.671875,
      "rewards/verify_math_reward/std": 0.46979284286499023,
      "step": 1009
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.109375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2678.0,
      "completions/mean_length": 966.3984985351562,
      "completions/mean_terminated_length": 582.0614013671875,
      "completions/min_length": 151.0,
      "completions/min_terminated_length": 151.0,
      "epoch": 9.438483965014576,
      "grad_norm": 0.17353737354278564,
      "learning_rate": 1e-06,
      "loss": -0.0524,
      "num_tokens": 597120634.0,
      "reward": 0.6495535969734192,
      "reward_std": 0.1468009203672409,
      "rewards/verify_math_reward/mean": 0.6495535969734192,
      "rewards/verify_math_reward/std": 0.477376252412796,
      "step": 1010
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1060267857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3267.0,
      "completions/mean_length": 965.2433471679688,
      "completions/mean_terminated_length": 593.9300537109375,
      "completions/min_length": 156.0,
      "completions/min_terminated_length": 156.0,
      "epoch": 9.447813411078718,
      "grad_norm": 0.1273731142282486,
      "learning_rate": 1e-06,
      "loss": -0.0399,
      "num_tokens": 597686860.0,
      "reward": 0.7299107313156128,
      "reward_std": 0.1226850375533104,
      "rewards/verify_math_reward/mean": 0.7299107313156128,
      "rewards/verify_math_reward/std": 0.44425368309020996,
      "step": 1011
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1183035714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3336.0,
      "completions/mean_length": 973.4989013671875,
      "completions/mean_terminated_length": 554.5303955078125,
      "completions/min_length": 122.0,
      "completions/min_terminated_length": 122.0,
      "epoch": 9.457142857142857,
      "grad_norm": 0.14967063069343567,
      "learning_rate": 1e-06,
      "loss": -0.0464,
      "num_tokens": 598214675.0,
      "reward": 0.7008928656578064,
      "reward_std": 0.12099436670541763,
      "rewards/verify_math_reward/mean": 0.7008928656578064,
      "rewards/verify_math_reward/std": 0.4581226110458374,
      "step": 1012
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1037946428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2466.0,
      "completions/mean_length": 975.9777221679688,
      "completions/mean_terminated_length": 614.630126953125,
      "completions/min_length": 169.0,
      "completions/min_terminated_length": 169.0,
      "epoch": 9.466472303206997,
      "grad_norm": 0.15361587703227997,
      "learning_rate": 1e-06,
      "loss": -0.0746,
      "num_tokens": 598815063.0,
      "reward": 0.6428571939468384,
      "reward_std": 0.17040501534938812,
      "rewards/verify_math_reward/mean": 0.6428571343421936,
      "rewards/verify_math_reward/std": 0.47942501306533813,
      "step": 1013
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1328125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3505.0,
      "completions/mean_length": 1141.0648193359375,
      "completions/mean_terminated_length": 688.507080078125,
      "completions/min_length": 142.0,
      "completions/min_terminated_length": 142.0,
      "epoch": 9.475801749271136,
      "grad_norm": 0.1506103128194809,
      "learning_rate": 1e-06,
      "loss": -0.0689,
      "num_tokens": 599433513.0,
      "reward": 0.6428571939468384,
      "reward_std": 0.16401740908622742,
      "rewards/verify_math_reward/mean": 0.6428571343421936,
      "rewards/verify_math_reward/std": 0.47942501306533813,
      "step": 1014
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1283482142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3918.0,
      "completions/mean_length": 1147.2445068359375,
      "completions/mean_terminated_length": 713.0486450195312,
      "completions/min_length": 167.0,
      "completions/min_terminated_length": 167.0,
      "epoch": 9.485131195335278,
      "grad_norm": 0.16414706408977509,
      "learning_rate": 1e-06,
      "loss": -0.0593,
      "num_tokens": 600080492.0,
      "reward": 0.5736607313156128,
      "reward_std": 0.19309872388839722,
      "rewards/verify_math_reward/mean": 0.5736607313156128,
      "rewards/verify_math_reward/std": 0.4948205351829529,
      "step": 1015
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0870535714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3410.0,
      "completions/mean_length": 872.0859985351562,
      "completions/mean_terminated_length": 564.6712036132812,
      "completions/min_length": 129.0,
      "completions/min_terminated_length": 129.0,
      "epoch": 9.494460641399417,
      "grad_norm": 0.14051392674446106,
      "learning_rate": 1e-06,
      "loss": -0.0586,
      "num_tokens": 600622465.0,
      "reward": 0.6986607313156128,
      "reward_std": 0.1277196854352951,
      "rewards/verify_math_reward/mean": 0.6986607313156128,
      "rewards/verify_math_reward/std": 0.4590960443019867,
      "step": 1016
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1227678571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2636.0,
      "completions/mean_length": 1015.87060546875,
      "completions/mean_terminated_length": 584.8091430664062,
      "completions/min_length": 141.0,
      "completions/min_terminated_length": 141.0,
      "epoch": 9.503790087463557,
      "grad_norm": 0.16711576282978058,
      "learning_rate": 1e-06,
      "loss": -0.0544,
      "num_tokens": 601164437.0,
      "reward": 0.7098214626312256,
      "reward_std": 0.1406829059123993,
      "rewards/verify_math_reward/mean": 0.7098214030265808,
      "rewards/verify_math_reward/std": 0.454098105430603,
      "step": 1017
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1082589285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3433.0,
      "completions/mean_length": 979.5156860351562,
      "completions/mean_terminated_length": 601.1689453125,
      "completions/min_length": 138.0,
      "completions/min_terminated_length": 138.0,
      "epoch": 9.513119533527696,
      "grad_norm": 0.16849058866500854,
      "learning_rate": 1e-06,
      "loss": -0.0812,
      "num_tokens": 601738163.0,
      "reward": 0.7020089626312256,
      "reward_std": 0.19320710003376007,
      "rewards/verify_math_reward/mean": 0.7020089030265808,
      "rewards/verify_math_reward/std": 0.45763099193573,
      "step": 1018
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0892857142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3570.0,
      "completions/mean_length": 936.489990234375,
      "completions/mean_terminated_length": 626.7340698242188,
      "completions/min_length": 122.0,
      "completions/min_terminated_length": 122.0,
      "epoch": 9.522448979591836,
      "grad_norm": 0.1501246690750122,
      "learning_rate": 1e-06,
      "loss": -0.0814,
      "num_tokens": 602330778.0,
      "reward": 0.7488839626312256,
      "reward_std": 0.1563873291015625,
      "rewards/verify_math_reward/mean": 0.7488839030265808,
      "rewards/verify_math_reward/std": 0.43389734625816345,
      "step": 1019
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1506696428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3444.0,
      "completions/mean_length": 1142.0859375,
      "completions/mean_terminated_length": 618.0670166015625,
      "completions/min_length": 123.0,
      "completions/min_terminated_length": 123.0,
      "epoch": 9.531778425655977,
      "grad_norm": 0.1566106081008911,
      "learning_rate": 1e-06,
      "loss": -0.0677,
      "num_tokens": 602885975.0,
      "reward": 0.6227678656578064,
      "reward_std": 0.14733155071735382,
      "rewards/verify_math_reward/mean": 0.6227678656578064,
      "rewards/verify_math_reward/std": 0.4849644601345062,
      "step": 1020
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1417410714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3076.0,
      "completions/mean_length": 1067.13623046875,
      "completions/mean_terminated_length": 566.920654296875,
      "completions/min_length": 159.0,
      "completions/min_terminated_length": 159.0,
      "epoch": 9.541107871720117,
      "grad_norm": 0.17518474161624908,
      "learning_rate": 1e-06,
      "loss": -0.071,
      "num_tokens": 603409729.0,
      "reward": 0.6540178656578064,
      "reward_std": 0.15800705552101135,
      "rewards/verify_math_reward/mean": 0.6540178656578064,
      "rewards/verify_math_reward/std": 0.4759531021118164,
      "step": 1021
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1183035714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3153.0,
      "completions/mean_length": 1032.0357666015625,
      "completions/mean_terminated_length": 620.9215087890625,
      "completions/min_length": 162.0,
      "completions/min_terminated_length": 162.0,
      "epoch": 9.550437317784256,
      "grad_norm": 0.14200487732887268,
      "learning_rate": 1e-06,
      "loss": -0.0492,
      "num_tokens": 603988409.0,
      "reward": 0.6886160969734192,
      "reward_std": 0.12118053436279297,
      "rewards/verify_math_reward/mean": 0.6886160969734192,
      "rewards/verify_math_reward/std": 0.46331802010536194,
      "step": 1022
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1517857142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3979.0,
      "completions/mean_length": 1218.75341796875,
      "completions/mean_terminated_length": 703.877685546875,
      "completions/min_length": 152.0,
      "completions/min_terminated_length": 152.0,
      "epoch": 9.559766763848396,
      "grad_norm": 0.14762155711650848,
      "learning_rate": 1e-06,
      "loss": -0.0586,
      "num_tokens": 604624500.0,
      "reward": 0.574776828289032,
      "reward_std": 0.13729864358901978,
      "rewards/verify_math_reward/mean": 0.5747767686843872,
      "rewards/verify_math_reward/std": 0.49465295672416687,
      "step": 1023
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0814732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3154.0,
      "completions/mean_length": 898.07373046875,
      "completions/mean_terminated_length": 614.4180297851562,
      "completions/min_length": 152.0,
      "completions/min_terminated_length": 152.0,
      "epoch": 9.569096209912537,
      "grad_norm": 0.1496865153312683,
      "learning_rate": 1e-06,
      "loss": -0.03,
      "num_tokens": 605229534.0,
      "reward": 0.6941964626312256,
      "reward_std": 0.14770880341529846,
      "rewards/verify_math_reward/mean": 0.6941964030265808,
      "rewards/verify_math_reward/std": 0.4610042870044708,
      "step": 1024
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1116071428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3939.0,
      "completions/mean_length": 1017.4888916015625,
      "completions/mean_terminated_length": 630.7412109375,
      "completions/min_length": 144.0,
      "completions/min_terminated_length": 144.0,
      "epoch": 9.578425655976677,
      "grad_norm": 0.1566886007785797,
      "learning_rate": 1e-06,
      "loss": -0.0589,
      "num_tokens": 605814708.0,
      "reward": 0.691964328289032,
      "reward_std": 0.15567448735237122,
      "rewards/verify_math_reward/mean": 0.6919642686843872,
      "rewards/verify_math_reward/std": 0.4619392454624176,
      "step": 1025
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1261160714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4006.0,
      "completions/mean_length": 987.8616333007812,
      "completions/mean_terminated_length": 539.3052368164062,
      "completions/min_length": 95.0,
      "completions/min_terminated_length": 95.0,
      "epoch": 9.587755102040816,
      "grad_norm": 0.13987243175506592,
      "learning_rate": 1e-06,
      "loss": -0.0501,
      "num_tokens": 606334264.0,
      "reward": 0.6830357313156128,
      "reward_std": 0.094690702855587,
      "rewards/verify_math_reward/mean": 0.6830357313156128,
      "rewards/verify_math_reward/std": 0.46555325388908386,
      "step": 1026
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0993303571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3537.0,
      "completions/mean_length": 943.87841796875,
      "completions/mean_terminated_length": 596.24658203125,
      "completions/min_length": 124.0,
      "completions/min_terminated_length": 124.0,
      "epoch": 9.597084548104956,
      "grad_norm": 0.1412520706653595,
      "learning_rate": 1e-06,
      "loss": -0.0361,
      "num_tokens": 606901419.0,
      "reward": 0.7142857313156128,
      "reward_std": 0.10074201226234436,
      "rewards/verify_math_reward/mean": 0.7142857313156128,
      "rewards/verify_math_reward/std": 0.4520062506198883,
      "step": 1027
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3698.0,
      "completions/mean_length": 1085.5546875,
      "completions/mean_terminated_length": 655.4910278320312,
      "completions/min_length": 140.0,
      "completions/min_terminated_length": 140.0,
      "epoch": 9.606413994169095,
      "grad_norm": 0.15377016365528107,
      "learning_rate": 1e-06,
      "loss": -0.0562,
      "num_tokens": 607509756.0,
      "reward": 0.6194196939468384,
      "reward_std": 0.14744214713573456,
      "rewards/verify_math_reward/mean": 0.6194196343421936,
      "rewards/verify_math_reward/std": 0.48580074310302734,
      "step": 1028
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1104910714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3705.0,
      "completions/mean_length": 944.6574096679688,
      "completions/mean_terminated_length": 553.2107543945312,
      "completions/min_length": 140.0,
      "completions/min_terminated_length": 140.0,
      "epoch": 9.615743440233237,
      "grad_norm": 0.15246158838272095,
      "learning_rate": 1e-06,
      "loss": -0.0621,
      "num_tokens": 608045433.0,
      "reward": 0.6674107313156128,
      "reward_std": 0.1385371834039688,
      "rewards/verify_math_reward/mean": 0.6674107313156128,
      "rewards/verify_math_reward/std": 0.47140392661094666,
      "step": 1029
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0825892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2611.0,
      "completions/mean_length": 840.896240234375,
      "completions/mean_terminated_length": 547.857666015625,
      "completions/min_length": 147.0,
      "completions/min_terminated_length": 147.0,
      "epoch": 9.625072886297376,
      "grad_norm": 0.1378517597913742,
      "learning_rate": 1e-06,
      "loss": -0.0574,
      "num_tokens": 608581124.0,
      "reward": 0.7198660969734192,
      "reward_std": 0.12297996133565903,
      "rewards/verify_math_reward/mean": 0.7198660969734192,
      "rewards/verify_math_reward/std": 0.44931527972221375,
      "step": 1030
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1116071428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3184.0,
      "completions/mean_length": 1007.8839721679688,
      "completions/mean_terminated_length": 619.9296264648438,
      "completions/min_length": 162.0,
      "completions/min_terminated_length": 162.0,
      "epoch": 9.634402332361516,
      "grad_norm": 0.17682576179504395,
      "learning_rate": 1e-06,
      "loss": -0.0557,
      "num_tokens": 609164252.0,
      "reward": 0.6584821939468384,
      "reward_std": 0.1277197003364563,
      "rewards/verify_math_reward/mean": 0.6584821343421936,
      "rewards/verify_math_reward/std": 0.4744836091995239,
      "step": 1031
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1127232142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3310.0,
      "completions/mean_length": 1014.5391235351562,
      "completions/mean_terminated_length": 623.057861328125,
      "completions/min_length": 120.0,
      "completions/min_terminated_length": 120.0,
      "epoch": 9.643731778425655,
      "grad_norm": 0.15746831893920898,
      "learning_rate": 1e-06,
      "loss": -0.0636,
      "num_tokens": 609753823.0,
      "reward": 0.6674107313156128,
      "reward_std": 0.19419346749782562,
      "rewards/verify_math_reward/mean": 0.6674107313156128,
      "rewards/verify_math_reward/std": 0.47140392661094666,
      "step": 1032
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.109375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2535.0,
      "completions/mean_length": 965.8917846679688,
      "completions/mean_terminated_length": 581.4924926757812,
      "completions/min_length": 129.0,
      "completions/min_terminated_length": 129.0,
      "epoch": 9.653061224489797,
      "grad_norm": 0.15120132267475128,
      "learning_rate": 1e-06,
      "loss": -0.0273,
      "num_tokens": 610301798.0,
      "reward": 0.7120535969734192,
      "reward_std": 0.1345216929912567,
      "rewards/verify_math_reward/mean": 0.7120535969734192,
      "rewards/verify_math_reward/std": 0.4530589282512665,
      "step": 1033
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1383928571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3883.0,
      "completions/mean_length": 1081.328125,
      "completions/mean_terminated_length": 597.106201171875,
      "completions/min_length": 125.0,
      "completions/min_terminated_length": 125.0,
      "epoch": 9.662390670553936,
      "grad_norm": 0.1371956616640091,
      "learning_rate": 1e-06,
      "loss": -0.0355,
      "num_tokens": 610852388.0,
      "reward": 0.6640625,
      "reward_std": 0.12046445161104202,
      "rewards/verify_math_reward/mean": 0.6640625,
      "rewards/verify_math_reward/std": 0.4725809693336487,
      "step": 1034
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0881696428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3800.0,
      "completions/mean_length": 890.560302734375,
      "completions/mean_terminated_length": 580.6095581054688,
      "completions/min_length": 142.0,
      "completions/min_terminated_length": 142.0,
      "epoch": 9.671720116618076,
      "grad_norm": 0.15502233803272247,
      "learning_rate": 1e-06,
      "loss": -0.0734,
      "num_tokens": 611411858.0,
      "reward": 0.6863839626312256,
      "reward_std": 0.16375456750392914,
      "rewards/verify_math_reward/mean": 0.6863839030265808,
      "rewards/verify_math_reward/std": 0.46422144770622253,
      "step": 1035
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1183035714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3866.0,
      "completions/mean_length": 1024.638427734375,
      "completions/mean_terminated_length": 612.5316772460938,
      "completions/min_length": 154.0,
      "completions/min_terminated_length": 154.0,
      "epoch": 9.681049562682215,
      "grad_norm": 0.1512136310338974,
      "learning_rate": 1e-06,
      "loss": -0.075,
      "num_tokens": 611984750.0,
      "reward": 0.6808035969734192,
      "reward_std": 0.15939712524414062,
      "rewards/verify_math_reward/mean": 0.6808035969734192,
      "rewards/verify_math_reward/std": 0.46642565727233887,
      "step": 1036
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1339285714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3534.0,
      "completions/mean_length": 1063.9241943359375,
      "completions/mean_terminated_length": 595.04638671875,
      "completions/min_length": 174.0,
      "completions/min_terminated_length": 174.0,
      "epoch": 9.690379008746355,
      "grad_norm": 0.15705522894859314,
      "learning_rate": 1e-06,
      "loss": -0.0607,
      "num_tokens": 612525778.0,
      "reward": 0.6774553656578064,
      "reward_std": 0.13617070019245148,
      "rewards/verify_math_reward/mean": 0.6774553656578064,
      "rewards/verify_math_reward/std": 0.4677111804485321,
      "step": 1037
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1149553571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2797.0,
      "completions/mean_length": 1029.1842041015625,
      "completions/mean_terminated_length": 630.8461303710938,
      "completions/min_length": 113.0,
      "completions/min_terminated_length": 113.0,
      "epoch": 9.699708454810496,
      "grad_norm": 0.15667232871055603,
      "learning_rate": 1e-06,
      "loss": -0.0679,
      "num_tokens": 613109703.0,
      "reward": 0.6194196939468384,
      "reward_std": 0.17499276995658875,
      "rewards/verify_math_reward/mean": 0.6194196343421936,
      "rewards/verify_math_reward/std": 0.48580074310302734,
      "step": 1038
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0993303571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3893.0,
      "completions/mean_length": 941.700927734375,
      "completions/mean_terminated_length": 593.8289794921875,
      "completions/min_length": 126.0,
      "completions/min_terminated_length": 126.0,
      "epoch": 9.709037900874636,
      "grad_norm": 0.16166822612285614,
      "learning_rate": 1e-06,
      "loss": -0.0346,
      "num_tokens": 613678235.0,
      "reward": 0.65625,
      "reward_std": 0.14260390400886536,
      "rewards/verify_math_reward/mean": 0.65625,
      "rewards/verify_math_reward/std": 0.4752241373062134,
      "step": 1039
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1417410714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2820.0,
      "completions/mean_length": 1102.009033203125,
      "completions/mean_terminated_length": 607.5526733398438,
      "completions/min_length": 148.0,
      "completions/min_terminated_length": 148.0,
      "epoch": 9.718367346938775,
      "grad_norm": 0.15334701538085938,
      "learning_rate": 1e-06,
      "loss": -0.072,
      "num_tokens": 614230603.0,
      "reward": 0.6439732313156128,
      "reward_std": 0.14000847935676575,
      "rewards/verify_math_reward/mean": 0.6439732313156128,
      "rewards/verify_math_reward/std": 0.47909072041511536,
      "step": 1040
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0915178571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3770.0,
      "completions/mean_length": 880.9285888671875,
      "completions/mean_terminated_length": 557.0515747070312,
      "completions/min_length": 168.0,
      "completions/min_terminated_length": 168.0,
      "epoch": 9.727696793002915,
      "grad_norm": 0.15773150324821472,
      "learning_rate": 1e-06,
      "loss": -0.0599,
      "num_tokens": 614774603.0,
      "reward": 0.7042410969734192,
      "reward_std": 0.15353691577911377,
      "rewards/verify_math_reward/mean": 0.7042410969734192,
      "rewards/verify_math_reward/std": 0.45663803815841675,
      "step": 1041
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.078125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3833.0,
      "completions/mean_length": 860.6138916015625,
      "completions/mean_terminated_length": 586.4285888671875,
      "completions/min_length": 157.0,
      "completions/min_terminated_length": 157.0,
      "epoch": 9.737026239067056,
      "grad_norm": 0.15409444272518158,
      "learning_rate": 1e-06,
      "loss": -0.036,
      "num_tokens": 615341377.0,
      "reward": 0.7421875596046448,
      "reward_std": 0.14481674134731293,
      "rewards/verify_math_reward/mean": 0.7421875,
      "rewards/verify_math_reward/std": 0.43767455220222473,
      "step": 1042
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1149553571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2648.0,
      "completions/mean_length": 980.2656860351562,
      "completions/mean_terminated_length": 575.5737915039062,
      "completions/min_length": 129.0,
      "completions/min_terminated_length": 129.0,
      "epoch": 9.746355685131196,
      "grad_norm": 0.15941530466079712,
      "learning_rate": 1e-06,
      "loss": -0.0827,
      "num_tokens": 615884735.0,
      "reward": 0.6584821939468384,
      "reward_std": 0.14414453506469727,
      "rewards/verify_math_reward/mean": 0.6584821343421936,
      "rewards/verify_math_reward/std": 0.4744836091995239,
      "step": 1043
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1205357142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2472.0,
      "completions/mean_length": 1015.44091796875,
      "completions/mean_terminated_length": 593.2322387695312,
      "completions/min_length": 127.0,
      "completions/min_terminated_length": 127.0,
      "epoch": 9.755685131195335,
      "grad_norm": 0.17075827717781067,
      "learning_rate": 1e-06,
      "loss": -0.0778,
      "num_tokens": 616437930.0,
      "reward": 0.691964328289032,
      "reward_std": 0.14692078530788422,
      "rewards/verify_math_reward/mean": 0.6919642686843872,
      "rewards/verify_math_reward/std": 0.4619392454624176,
      "step": 1044
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3558.0,
      "completions/mean_length": 929.5592041015625,
      "completions/mean_terminated_length": 571.6136474609375,
      "completions/min_length": 125.0,
      "completions/min_terminated_length": 125.0,
      "epoch": 9.765014577259475,
      "grad_norm": 0.12774479389190674,
      "learning_rate": 1e-06,
      "loss": -0.0586,
      "num_tokens": 616973047.0,
      "reward": 0.7198660969734192,
      "reward_std": 0.12253489345312119,
      "rewards/verify_math_reward/mean": 0.7198660969734192,
      "rewards/verify_math_reward/std": 0.44931527972221375,
      "step": 1045
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0892857142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2893.0,
      "completions/mean_length": 997.10498046875,
      "completions/mean_terminated_length": 693.2916870117188,
      "completions/min_length": 179.0,
      "completions/min_terminated_length": 179.0,
      "epoch": 9.774344023323614,
      "grad_norm": 0.142217755317688,
      "learning_rate": 1e-06,
      "loss": -0.0456,
      "num_tokens": 617633069.0,
      "reward": 0.668526828289032,
      "reward_std": 0.16206417977809906,
      "rewards/verify_math_reward/mean": 0.6685267686843872,
      "rewards/verify_math_reward/std": 0.4710056483745575,
      "step": 1046
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1607142857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3108.0,
      "completions/mean_length": 1195.575927734375,
      "completions/mean_terminated_length": 640.175537109375,
      "completions/min_length": 179.0,
      "completions/min_terminated_length": 179.0,
      "epoch": 9.783673469387756,
      "grad_norm": 0.14058978855609894,
      "learning_rate": 1e-06,
      "loss": -0.0683,
      "num_tokens": 618211889.0,
      "reward": 0.606026828289032,
      "reward_std": 0.12215623259544373,
      "rewards/verify_math_reward/mean": 0.6060267686843872,
      "rewards/verify_math_reward/std": 0.48890194296836853,
      "step": 1047
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1171875,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3573.0,
      "completions/mean_length": 1019.1953735351562,
      "completions/mean_terminated_length": 610.7699584960938,
      "completions/min_length": 141.0,
      "completions/min_terminated_length": 141.0,
      "epoch": 9.793002915451895,
      "grad_norm": 0.15759840607643127,
      "learning_rate": 1e-06,
      "loss": -0.0574,
      "num_tokens": 618788432.0,
      "reward": 0.6729910969734192,
      "reward_std": 0.13534724712371826,
      "rewards/verify_math_reward/mean": 0.6729910969734192,
      "rewards/verify_math_reward/std": 0.46938255429267883,
      "step": 1048
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0970982142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3467.0,
      "completions/mean_length": 942.2656860351562,
      "completions/mean_terminated_length": 603.1124877929688,
      "completions/min_length": 161.0,
      "completions/min_terminated_length": 161.0,
      "epoch": 9.802332361516035,
      "grad_norm": 0.14560723304748535,
      "learning_rate": 1e-06,
      "loss": -0.0244,
      "num_tokens": 619364190.0,
      "reward": 0.6339285969734192,
      "reward_std": 0.13711388409137726,
      "rewards/verify_math_reward/mean": 0.6339285969734192,
      "rewards/verify_math_reward/std": 0.48199835419654846,
      "step": 1049
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0870535714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3019.0,
      "completions/mean_length": 901.3504638671875,
      "completions/mean_terminated_length": 596.7261962890625,
      "completions/min_length": 94.0,
      "completions/min_terminated_length": 94.0,
      "epoch": 9.811661807580174,
      "grad_norm": 0.14852198958396912,
      "learning_rate": 1e-06,
      "loss": -0.0725,
      "num_tokens": 619940152.0,
      "reward": 0.7299107313156128,
      "reward_std": 0.157290980219841,
      "rewards/verify_math_reward/mean": 0.7299107313156128,
      "rewards/verify_math_reward/std": 0.44425368309020996,
      "step": 1050
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1484375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2936.0,
      "completions/mean_length": 1114.2545166015625,
      "completions/mean_terminated_length": 594.5006713867188,
      "completions/min_length": 116.0,
      "completions/min_terminated_length": 116.0,
      "epoch": 9.820991253644316,
      "grad_norm": 0.1678614467382431,
      "learning_rate": 1e-06,
      "loss": -0.0968,
      "num_tokens": 620474324.0,
      "reward": 0.6662946939468384,
      "reward_std": 0.16078147292137146,
      "rewards/verify_math_reward/mean": 0.6662946343421936,
      "rewards/verify_math_reward/std": 0.47179925441741943,
      "step": 1051
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1729910714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3515.0,
      "completions/mean_length": 1249.8616943359375,
      "completions/mean_terminated_length": 654.5155029296875,
      "completions/min_length": 157.0,
      "completions/min_terminated_length": 157.0,
      "epoch": 9.830320699708455,
      "grad_norm": 0.1502968668937683,
      "learning_rate": 1e-06,
      "loss": -0.0108,
      "num_tokens": 621053568.0,
      "reward": 0.6171875,
      "reward_std": 0.12944427132606506,
      "rewards/verify_math_reward/mean": 0.6171875,
      "rewards/verify_math_reward/std": 0.4863446056842804,
      "step": 1052
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.140625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3461.0,
      "completions/mean_length": 1086.83154296875,
      "completions/mean_terminated_length": 594.4220581054688,
      "completions/min_length": 176.0,
      "completions/min_terminated_length": 176.0,
      "epoch": 9.839650145772595,
      "grad_norm": 0.15375304222106934,
      "learning_rate": 1e-06,
      "loss": -0.0698,
      "num_tokens": 621594113.0,
      "reward": 0.6718750596046448,
      "reward_std": 0.12621337175369263,
      "rewards/verify_math_reward/mean": 0.671875,
      "rewards/verify_math_reward/std": 0.46979284286499023,
      "step": 1053
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1238839285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2540.0,
      "completions/mean_length": 1051.484375,
      "completions/mean_terminated_length": 620.9860229492188,
      "completions/min_length": 152.0,
      "completions/min_terminated_length": 152.0,
      "epoch": 9.848979591836734,
      "grad_norm": 0.13623444736003876,
      "learning_rate": 1e-06,
      "loss": -0.042,
      "num_tokens": 622171643.0,
      "reward": 0.606026828289032,
      "reward_std": 0.14166070520877838,
      "rewards/verify_math_reward/mean": 0.6060267686843872,
      "rewards/verify_math_reward/std": 0.48890194296836853,
      "step": 1054
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1305803571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2847.0,
      "completions/mean_length": 1043.6707763671875,
      "completions/mean_terminated_length": 585.233642578125,
      "completions/min_length": 130.0,
      "completions/min_terminated_length": 130.0,
      "epoch": 9.858309037900874,
      "grad_norm": 0.14690545201301575,
      "learning_rate": 1e-06,
      "loss": -0.0409,
      "num_tokens": 622729692.0,
      "reward": 0.637276828289032,
      "reward_std": 0.12471521645784378,
      "rewards/verify_math_reward/mean": 0.6372767686843872,
      "rewards/verify_math_reward/std": 0.481054425239563,
      "step": 1055
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1183035714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3755.0,
      "completions/mean_length": 1062.4554443359375,
      "completions/mean_terminated_length": 655.4227905273438,
      "completions/min_length": 135.0,
      "completions/min_terminated_length": 135.0,
      "epoch": 9.867638483965015,
      "grad_norm": 0.13697752356529236,
      "learning_rate": 1e-06,
      "loss": -0.0495,
      "num_tokens": 623341684.0,
      "reward": 0.6339285969734192,
      "reward_std": 0.1335780918598175,
      "rewards/verify_math_reward/mean": 0.6339285969734192,
      "rewards/verify_math_reward/std": 0.48199835419654846,
      "step": 1056
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1305803571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3519.0,
      "completions/mean_length": 1059.7489013671875,
      "completions/mean_terminated_length": 603.7265625,
      "completions/min_length": 138.0,
      "completions/min_terminated_length": 138.0,
      "epoch": 9.876967930029155,
      "grad_norm": 0.14473684132099152,
      "learning_rate": 1e-06,
      "loss": -0.0599,
      "num_tokens": 623899091.0,
      "reward": 0.6540178656578064,
      "reward_std": 0.14180973172187805,
      "rewards/verify_math_reward/mean": 0.6540178656578064,
      "rewards/verify_math_reward/std": 0.4759531021118164,
      "step": 1057
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1060267857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3030.0,
      "completions/mean_length": 960.9207763671875,
      "completions/mean_terminated_length": 589.0948486328125,
      "completions/min_length": 182.0,
      "completions/min_terminated_length": 182.0,
      "epoch": 9.886297376093294,
      "grad_norm": 0.17573131620883942,
      "learning_rate": 1e-06,
      "loss": -0.0561,
      "num_tokens": 624464804.0,
      "reward": 0.6808035969734192,
      "reward_std": 0.1548050194978714,
      "rewards/verify_math_reward/mean": 0.6808035969734192,
      "rewards/verify_math_reward/std": 0.4664256274700165,
      "step": 1058
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1116071428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2810.0,
      "completions/mean_length": 1017.9732666015625,
      "completions/mean_terminated_length": 631.2864379882812,
      "completions/min_length": 116.0,
      "completions/min_terminated_length": 116.0,
      "epoch": 9.895626822157434,
      "grad_norm": 0.14279092848300934,
      "learning_rate": 1e-06,
      "loss": -0.0498,
      "num_tokens": 625053820.0,
      "reward": 0.6160714626312256,
      "reward_std": 0.12166837602853775,
      "rewards/verify_math_reward/mean": 0.6160714030265808,
      "rewards/verify_math_reward/std": 0.486612468957901,
      "step": 1059
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1205357142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2843.0,
      "completions/mean_length": 976.1038208007812,
      "completions/mean_terminated_length": 548.5037841796875,
      "completions/min_length": 126.0,
      "completions/min_terminated_length": 126.0,
      "epoch": 9.904956268221575,
      "grad_norm": 0.1449451446533203,
      "learning_rate": 1e-06,
      "loss": -0.0513,
      "num_tokens": 625572225.0,
      "reward": 0.6540178656578064,
      "reward_std": 0.12230778485536575,
      "rewards/verify_math_reward/mean": 0.6540178656578064,
      "rewards/verify_math_reward/std": 0.4759531021118164,
      "step": 1060
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0803571428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3009.0,
      "completions/mean_length": 809.3482666015625,
      "completions/mean_terminated_length": 522.1650390625,
      "completions/min_length": 169.0,
      "completions/min_terminated_length": 169.0,
      "epoch": 9.914285714285715,
      "grad_norm": 0.15559326112270355,
      "learning_rate": 1e-06,
      "loss": -0.0357,
      "num_tokens": 626083113.0,
      "reward": 0.7444196939468384,
      "reward_std": 0.13147373497486115,
      "rewards/verify_math_reward/mean": 0.7444196343421936,
      "rewards/verify_math_reward/std": 0.43643057346343994,
      "step": 1061
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0959821428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3141.0,
      "completions/mean_length": 915.5792846679688,
      "completions/mean_terminated_length": 577.9049682617188,
      "completions/min_length": 193.0,
      "completions/min_terminated_length": 193.0,
      "epoch": 9.923615160349854,
      "grad_norm": 0.1454796940088272,
      "learning_rate": 1e-06,
      "loss": -0.0399,
      "num_tokens": 626640072.0,
      "reward": 0.7020089626312256,
      "reward_std": 0.135757714509964,
      "rewards/verify_math_reward/mean": 0.7020089030265808,
      "rewards/verify_math_reward/std": 0.45763099193573,
      "step": 1062
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.09375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3874.0,
      "completions/mean_length": 903.3516235351562,
      "completions/mean_terminated_length": 573.0775756835938,
      "completions/min_length": 128.0,
      "completions/min_terminated_length": 128.0,
      "epoch": 9.932944606413994,
      "grad_norm": 0.16085104644298553,
      "learning_rate": 1e-06,
      "loss": -0.0282,
      "num_tokens": 627201011.0,
      "reward": 0.6930803656578064,
      "reward_std": 0.13602760434150696,
      "rewards/verify_math_reward/mean": 0.6930803656578064,
      "rewards/verify_math_reward/std": 0.46147337555885315,
      "step": 1063
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1205357142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4053.0,
      "completions/mean_length": 1088.03466796875,
      "completions/mean_terminated_length": 675.775390625,
      "completions/min_length": 185.0,
      "completions/min_terminated_length": 185.0,
      "epoch": 9.942274052478133,
      "grad_norm": 0.17610017955303192,
      "learning_rate": 1e-06,
      "loss": -0.076,
      "num_tokens": 627813362.0,
      "reward": 0.6752232313156128,
      "reward_std": 0.15161874890327454,
      "rewards/verify_math_reward/mean": 0.6752232313156128,
      "rewards/verify_math_reward/std": 0.46855294704437256,
      "step": 1064
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1316964285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3562.0,
      "completions/mean_length": 1067.4888916015625,
      "completions/mean_terminated_length": 608.1516723632812,
      "completions/min_length": 138.0,
      "completions/min_terminated_length": 138.0,
      "epoch": 9.951603498542275,
      "grad_norm": 0.1570524126291275,
      "learning_rate": 1e-06,
      "loss": -0.0795,
      "num_tokens": 628378408.0,
      "reward": 0.6953125596046448,
      "reward_std": 0.15161871910095215,
      "rewards/verify_math_reward/mean": 0.6953125,
      "rewards/verify_math_reward/std": 0.4605320394039154,
      "step": 1065
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0970982142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3239.0,
      "completions/mean_length": 933.4364013671875,
      "completions/mean_terminated_length": 593.333740234375,
      "completions/min_length": 83.0,
      "completions/min_terminated_length": 83.0,
      "epoch": 9.960932944606414,
      "grad_norm": 0.15946438908576965,
      "learning_rate": 1e-06,
      "loss": -0.056,
      "num_tokens": 628949591.0,
      "reward": 0.65625,
      "reward_std": 0.1517256796360016,
      "rewards/verify_math_reward/mean": 0.65625,
      "rewards/verify_math_reward/std": 0.4752241373062134,
      "step": 1066
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1428571428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3924.0,
      "completions/mean_length": 1110.5045166015625,
      "completions/mean_terminated_length": 612.921875,
      "completions/min_length": 135.0,
      "completions/min_terminated_length": 135.0,
      "epoch": 9.970262390670554,
      "grad_norm": 0.17599667608737946,
      "learning_rate": 1e-06,
      "loss": -0.0912,
      "num_tokens": 629513099.0,
      "reward": 0.6506696939468384,
      "reward_std": 0.17979852855205536,
      "rewards/verify_math_reward/mean": 0.6506696343421936,
      "rewards/verify_math_reward/std": 0.47702476382255554,
      "step": 1067
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0915178571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4059.0,
      "completions/mean_length": 910.0491333007812,
      "completions/mean_terminated_length": 589.1056518554688,
      "completions/min_length": 151.0,
      "completions/min_terminated_length": 151.0,
      "epoch": 9.979591836734693,
      "grad_norm": 0.13086532056331635,
      "learning_rate": 1e-06,
      "loss": -0.0395,
      "num_tokens": 630071575.0,
      "reward": 0.7377232313156128,
      "reward_std": 0.1244862899184227,
      "rewards/verify_math_reward/mean": 0.7377232313156128,
      "rewards/verify_math_reward/std": 0.4401180148124695,
      "step": 1068
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1517857142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3421.0,
      "completions/mean_length": 1142.2132568359375,
      "completions/mean_terminated_length": 613.6408081054688,
      "completions/min_length": 135.0,
      "completions/min_terminated_length": 135.0,
      "epoch": 9.988921282798835,
      "grad_norm": 0.14581549167633057,
      "learning_rate": 1e-06,
      "loss": -0.0634,
      "num_tokens": 630642030.0,
      "reward": 0.6194196939468384,
      "reward_std": 0.13955312967300415,
      "rewards/verify_math_reward/mean": 0.6194196343421936,
      "rewards/verify_math_reward/std": 0.48580074310302734,
      "step": 1069
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1335227272727273,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3067.0,
      "completions/mean_length": 1102.3096923828125,
      "completions/mean_terminated_length": 640.9868774414062,
      "completions/min_length": 210.0,
      "completions/min_terminated_length": 210.0,
      "epoch": 9.998250728862974,
      "grad_norm": 0.14599387347698212,
      "learning_rate": 1e-06,
      "loss": -0.0734,
      "num_tokens": 631205603.0,
      "reward": 0.6953125596046448,
      "reward_std": 0.15421095490455627,
      "rewards/verify_math_reward/mean": 0.6953125,
      "rewards/verify_math_reward/std": 0.4605320394039154,
      "step": 1070
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1283482142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3081.0,
      "completions/mean_length": 1036.8929443359375,
      "completions/mean_terminated_length": 586.4481811523438,
      "completions/min_length": 171.0,
      "completions/min_terminated_length": 171.0,
      "epoch": 10.00932944606414,
      "grad_norm": 0.1592647284269333,
      "learning_rate": 1e-06,
      "loss": -0.0385,
      "num_tokens": 631756123.0,
      "reward": 0.6238839626312256,
      "reward_std": 0.14958925545215607,
      "rewards/verify_math_reward/mean": 0.6238839030265808,
      "rewards/verify_math_reward/std": 0.4846802353858948,
      "step": 1071
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1294642857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3764.0,
      "completions/mean_length": 1067.7421875,
      "completions/mean_terminated_length": 617.3859252929688,
      "completions/min_length": 150.0,
      "completions/min_terminated_length": 150.0,
      "epoch": 10.018658892128279,
      "grad_norm": 0.15054309368133545,
      "learning_rate": 1e-06,
      "loss": -0.0666,
      "num_tokens": 632331092.0,
      "reward": 0.625,
      "reward_std": 0.13447962701320648,
      "rewards/verify_math_reward/mean": 0.625,
      "rewards/verify_math_reward/std": 0.48439329862594604,
      "step": 1072
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1261160714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3609.0,
      "completions/mean_length": 1039.235595703125,
      "completions/mean_terminated_length": 598.0932006835938,
      "completions/min_length": 127.0,
      "completions/min_terminated_length": 127.0,
      "epoch": 10.02798833819242,
      "grad_norm": 0.15350356698036194,
      "learning_rate": 1e-06,
      "loss": -0.0826,
      "num_tokens": 632890327.0,
      "reward": 0.6819196939468384,
      "reward_std": 0.1584860384464264,
      "rewards/verify_math_reward/mean": 0.6819196343421936,
      "rewards/verify_math_reward/std": 0.46599099040031433,
      "step": 1073
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.109375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3400.0,
      "completions/mean_length": 974.411865234375,
      "completions/mean_terminated_length": 591.0588989257812,
      "completions/min_length": 154.0,
      "completions/min_terminated_length": 154.0,
      "epoch": 10.03731778425656,
      "grad_norm": 0.15882743895053864,
      "learning_rate": 1e-06,
      "loss": -0.0523,
      "num_tokens": 633447944.0,
      "reward": 0.7008928656578064,
      "reward_std": 0.14094392955303192,
      "rewards/verify_math_reward/mean": 0.7008928656578064,
      "rewards/verify_math_reward/std": 0.458122581243515,
      "step": 1074
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1138392857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4084.0,
      "completions/mean_length": 1041.391845703125,
      "completions/mean_terminated_length": 648.9861450195312,
      "completions/min_length": 111.0,
      "completions/min_terminated_length": 111.0,
      "epoch": 10.0466472303207,
      "grad_norm": 0.1584683358669281,
      "learning_rate": 1e-06,
      "loss": -0.0762,
      "num_tokens": 634058671.0,
      "reward": 0.6640625,
      "reward_std": 0.15826597809791565,
      "rewards/verify_math_reward/mean": 0.6640625,
      "rewards/verify_math_reward/std": 0.4725809693336487,
      "step": 1075
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0881696428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3758.0,
      "completions/mean_length": 876.3125610351562,
      "completions/mean_terminated_length": 564.9840698242188,
      "completions/min_length": 124.0,
      "completions/min_terminated_length": 124.0,
      "epoch": 10.055976676384839,
      "grad_norm": 0.15721167623996735,
      "learning_rate": 1e-06,
      "loss": -0.0481,
      "num_tokens": 634596839.0,
      "reward": 0.7645089626312256,
      "reward_std": 0.13655047118663788,
      "rewards/verify_math_reward/mean": 0.7645089030265808,
      "rewards/verify_math_reward/std": 0.42454230785369873,
      "step": 1076
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1305803571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4074.0,
      "completions/mean_length": 1095.872802734375,
      "completions/mean_terminated_length": 645.2760009765625,
      "completions/min_length": 120.0,
      "completions/min_terminated_length": 120.0,
      "epoch": 10.06530612244898,
      "grad_norm": 0.13884396851062775,
      "learning_rate": 1e-06,
      "loss": -0.0713,
      "num_tokens": 635191853.0,
      "reward": 0.6752232313156128,
      "reward_std": 0.1356835663318634,
      "rewards/verify_math_reward/mean": 0.6752232313156128,
      "rewards/verify_math_reward/std": 0.46855294704437256,
      "step": 1077
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1127232142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3146.0,
      "completions/mean_length": 1035.4554443359375,
      "completions/mean_terminated_length": 646.6314086914062,
      "completions/min_length": 126.0,
      "completions/min_terminated_length": 126.0,
      "epoch": 10.07463556851312,
      "grad_norm": 0.17396743595600128,
      "learning_rate": 1e-06,
      "loss": -0.0641,
      "num_tokens": 635793405.0,
      "reward": 0.6629464626312256,
      "reward_std": 0.17085261642932892,
      "rewards/verify_math_reward/mean": 0.6629464030265808,
      "rewards/verify_math_reward/std": 0.47296738624572754,
      "step": 1078
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1205357142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3872.0,
      "completions/mean_length": 1029.828125,
      "completions/mean_terminated_length": 609.5913696289062,
      "completions/min_length": 92.0,
      "completions/min_terminated_length": 92.0,
      "epoch": 10.08396501457726,
      "grad_norm": 0.15869788825511932,
      "learning_rate": 1e-06,
      "loss": -0.0717,
      "num_tokens": 636376547.0,
      "reward": 0.652901828289032,
      "reward_std": 0.14278724789619446,
      "rewards/verify_math_reward/mean": 0.6529017686843872,
      "rewards/verify_math_reward/std": 0.47631317377090454,
      "step": 1079
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1294642857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4086.0,
      "completions/mean_length": 1071.9442138671875,
      "completions/mean_terminated_length": 622.2128295898438,
      "completions/min_length": 147.0,
      "completions/min_terminated_length": 147.0,
      "epoch": 10.093294460641399,
      "grad_norm": 0.13492093980312347,
      "learning_rate": 1e-06,
      "loss": -0.0686,
      "num_tokens": 636963745.0,
      "reward": 0.6707589626312256,
      "reward_std": 0.13230746984481812,
      "rewards/verify_math_reward/mean": 0.6707589030265808,
      "rewards/verify_math_reward/std": 0.4702001214027405,
      "step": 1080
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1194196428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2528.0,
      "completions/mean_length": 995.6964721679688,
      "completions/mean_terminated_length": 575.2496948242188,
      "completions/min_length": 150.0,
      "completions/min_terminated_length": 150.0,
      "epoch": 10.102623906705539,
      "grad_norm": 0.15736635029315948,
      "learning_rate": 1e-06,
      "loss": -0.061,
      "num_tokens": 637510281.0,
      "reward": 0.6573660969734192,
      "reward_std": 0.1522146463394165,
      "rewards/verify_math_reward/mean": 0.6573660969734192,
      "rewards/verify_math_reward/std": 0.47485533356666565,
      "step": 1081
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1026785714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2751.0,
      "completions/mean_length": 938.6629638671875,
      "completions/mean_terminated_length": 577.3756103515625,
      "completions/min_length": 136.0,
      "completions/min_terminated_length": 136.0,
      "epoch": 10.11195335276968,
      "grad_norm": 0.1429387331008911,
      "learning_rate": 1e-06,
      "loss": -0.0675,
      "num_tokens": 638069763.0,
      "reward": 0.65625,
      "reward_std": 0.13632366061210632,
      "rewards/verify_math_reward/mean": 0.65625,
      "rewards/verify_math_reward/std": 0.4752241373062134,
      "step": 1082
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0970982142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3363.0,
      "completions/mean_length": 952.0335083007812,
      "completions/mean_terminated_length": 613.9307861328125,
      "completions/min_length": 125.0,
      "completions/min_terminated_length": 125.0,
      "epoch": 10.12128279883382,
      "grad_norm": 0.1521647572517395,
      "learning_rate": 1e-06,
      "loss": -0.0679,
      "num_tokens": 638666449.0,
      "reward": 0.6741071939468384,
      "reward_std": 0.12651757895946503,
      "rewards/verify_math_reward/mean": 0.6741071343421936,
      "rewards/verify_math_reward/std": 0.4689692258834839,
      "step": 1083
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1238839285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3644.0,
      "completions/mean_length": 1054.157470703125,
      "completions/mean_terminated_length": 624.0369262695312,
      "completions/min_length": 157.0,
      "completions/min_terminated_length": 157.0,
      "epoch": 10.130612244897959,
      "grad_norm": 0.14456379413604736,
      "learning_rate": 1e-06,
      "loss": -0.0326,
      "num_tokens": 639250758.0,
      "reward": 0.6104910969734192,
      "reward_std": 0.1328292191028595,
      "rewards/verify_math_reward/mean": 0.6104910969734192,
      "rewards/verify_math_reward/std": 0.48791125416755676,
      "step": 1084
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0881696428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3379.0,
      "completions/mean_length": 945.0011596679688,
      "completions/mean_terminated_length": 640.3145751953125,
      "completions/min_length": 96.0,
      "completions/min_terminated_length": 96.0,
      "epoch": 10.139941690962099,
      "grad_norm": 0.1351221203804016,
      "learning_rate": 1e-06,
      "loss": -0.0504,
      "num_tokens": 639857543.0,
      "reward": 0.6886160969734192,
      "reward_std": 0.13632294535636902,
      "rewards/verify_math_reward/mean": 0.6886160969734192,
      "rewards/verify_math_reward/std": 0.46331802010536194,
      "step": 1085
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1026785714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3385.0,
      "completions/mean_length": 887.974365234375,
      "completions/mean_terminated_length": 520.8867797851562,
      "completions/min_length": 127.0,
      "completions/min_terminated_length": 127.0,
      "epoch": 10.14927113702624,
      "grad_norm": 0.15145932137966156,
      "learning_rate": 1e-06,
      "loss": -0.0333,
      "num_tokens": 640359888.0,
      "reward": 0.7533482313156128,
      "reward_std": 0.11881474405527115,
      "rewards/verify_math_reward/mean": 0.7533482313156128,
      "rewards/verify_math_reward/std": 0.4313030242919922,
      "step": 1086
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1316964285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3285.0,
      "completions/mean_length": 1033.5670166015625,
      "completions/mean_terminated_length": 569.0848388671875,
      "completions/min_length": 127.0,
      "completions/min_terminated_length": 127.0,
      "epoch": 10.15860058309038,
      "grad_norm": 0.15234391391277313,
      "learning_rate": 1e-06,
      "loss": -0.0441,
      "num_tokens": 640897724.0,
      "reward": 0.6875000596046448,
      "reward_std": 0.11840105801820755,
      "rewards/verify_math_reward/mean": 0.6875,
      "rewards/verify_math_reward/std": 0.4637712836265564,
      "step": 1087
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1506696428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3239.0,
      "completions/mean_length": 1144.555908203125,
      "completions/mean_terminated_length": 620.9750366210938,
      "completions/min_length": 165.0,
      "completions/min_terminated_length": 165.0,
      "epoch": 10.167930029154519,
      "grad_norm": 0.18200331926345825,
      "learning_rate": 1e-06,
      "loss": -0.0862,
      "num_tokens": 641455310.0,
      "reward": 0.6796875596046448,
      "reward_std": 0.1666409969329834,
      "rewards/verify_math_reward/mean": 0.6796875,
      "rewards/verify_math_reward/std": 0.4668572247028351,
      "step": 1088
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1417410714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3760.0,
      "completions/mean_length": 1143.0279541015625,
      "completions/mean_terminated_length": 655.3458862304688,
      "completions/min_length": 143.0,
      "completions/min_terminated_length": 143.0,
      "epoch": 10.177259475218658,
      "grad_norm": 0.15295979380607605,
      "learning_rate": 1e-06,
      "loss": -0.0284,
      "num_tokens": 642050831.0,
      "reward": 0.6350446939468384,
      "reward_std": 0.1215910017490387,
      "rewards/verify_math_reward/mean": 0.6350446343421936,
      "rewards/verify_math_reward/std": 0.481686532497406,
      "step": 1089
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1060267857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2933.0,
      "completions/mean_length": 922.83935546875,
      "completions/mean_terminated_length": 546.4968872070312,
      "completions/min_length": 141.0,
      "completions/min_terminated_length": 141.0,
      "epoch": 10.186588921282798,
      "grad_norm": 0.1875612586736679,
      "learning_rate": 1e-06,
      "loss": -0.0659,
      "num_tokens": 642573999.0,
      "reward": 0.7321428656578064,
      "reward_std": 0.14053206145763397,
      "rewards/verify_math_reward/mean": 0.7321428656578064,
      "rewards/verify_math_reward/std": 0.4430900514125824,
      "step": 1090
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0904017857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4059.0,
      "completions/mean_length": 864.3248291015625,
      "completions/mean_terminated_length": 543.139892578125,
      "completions/min_length": 150.0,
      "completions/min_terminated_length": 150.0,
      "epoch": 10.19591836734694,
      "grad_norm": 0.17017853260040283,
      "learning_rate": 1e-06,
      "loss": -0.0586,
      "num_tokens": 643100810.0,
      "reward": 0.7399553656578064,
      "reward_std": 0.14109547436237335,
      "rewards/verify_math_reward/mean": 0.7399553656578064,
      "rewards/verify_math_reward/std": 0.43890368938446045,
      "step": 1091
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1573660714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3600.0,
      "completions/mean_length": 1183.9296875,
      "completions/mean_terminated_length": 640.0861206054688,
      "completions/min_length": 158.0,
      "completions/min_terminated_length": 158.0,
      "epoch": 10.205247813411079,
      "grad_norm": 0.15581268072128296,
      "learning_rate": 1e-06,
      "loss": -0.0602,
      "num_tokens": 643675971.0,
      "reward": 0.5814732313156128,
      "reward_std": 0.16871143877506256,
      "rewards/verify_math_reward/mean": 0.5814732313156128,
      "rewards/verify_math_reward/std": 0.4935929775238037,
      "step": 1092
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1272321428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3823.0,
      "completions/mean_length": 1058.7679443359375,
      "completions/mean_terminated_length": 616.0,
      "completions/min_length": 137.0,
      "completions/min_terminated_length": 137.0,
      "epoch": 10.214577259475218,
      "grad_norm": 0.15681420266628265,
      "learning_rate": 1e-06,
      "loss": -0.0534,
      "num_tokens": 644240875.0,
      "reward": 0.6484375,
      "reward_std": 0.127794548869133,
      "rewards/verify_math_reward/mean": 0.6484375,
      "rewards/verify_math_reward/std": 0.4777248501777649,
      "step": 1093
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1372767857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2770.0,
      "completions/mean_length": 1101.3226318359375,
      "completions/mean_terminated_length": 624.8085327148438,
      "completions/min_length": 148.0,
      "completions/min_terminated_length": 148.0,
      "epoch": 10.223906705539358,
      "grad_norm": 0.1676253229379654,
      "learning_rate": 1e-06,
      "loss": -0.0415,
      "num_tokens": 644820436.0,
      "reward": 0.6473214626312256,
      "reward_std": 0.1525605320930481,
      "rewards/verify_math_reward/mean": 0.6473214030265808,
      "rewards/verify_math_reward/std": 0.47807058691978455,
      "step": 1094
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1104910714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2845.0,
      "completions/mean_length": 987.5324096679688,
      "completions/mean_terminated_length": 601.4115600585938,
      "completions/min_length": 168.0,
      "completions/min_terminated_length": 168.0,
      "epoch": 10.2332361516035,
      "grad_norm": 0.16065557301044464,
      "learning_rate": 1e-06,
      "loss": -0.0608,
      "num_tokens": 645383737.0,
      "reward": 0.668526828289032,
      "reward_std": 0.15416745841503143,
      "rewards/verify_math_reward/mean": 0.6685267686843872,
      "rewards/verify_math_reward/std": 0.4710056483745575,
      "step": 1095
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1897321428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2385.0,
      "completions/mean_length": 1279.7366943359375,
      "completions/mean_terminated_length": 620.281005859375,
      "completions/min_length": 155.0,
      "completions/min_terminated_length": 155.0,
      "epoch": 10.242565597667639,
      "grad_norm": 0.18482163548469543,
      "learning_rate": 1e-06,
      "loss": -0.086,
      "num_tokens": 645924821.0,
      "reward": 0.6339285969734192,
      "reward_std": 0.15454541146755219,
      "rewards/verify_math_reward/mean": 0.6339285969734192,
      "rewards/verify_math_reward/std": 0.48199835419654846,
      "step": 1096
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0915178571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3035.0,
      "completions/mean_length": 911.0859985351562,
      "completions/mean_terminated_length": 590.2468872070312,
      "completions/min_length": 145.0,
      "completions/min_terminated_length": 145.0,
      "epoch": 10.251895043731778,
      "grad_norm": 0.13147014379501343,
      "learning_rate": 1e-06,
      "loss": -0.0463,
      "num_tokens": 646491762.0,
      "reward": 0.7243303656578064,
      "reward_std": 0.10957279056310654,
      "rewards/verify_math_reward/mean": 0.7243303656578064,
      "rewards/verify_math_reward/std": 0.4471006691455841,
      "step": 1097
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1517857142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3002.0,
      "completions/mean_length": 1121.532470703125,
      "completions/mean_terminated_length": 589.2592163085938,
      "completions/min_length": 159.0,
      "completions/min_terminated_length": 159.0,
      "epoch": 10.261224489795918,
      "grad_norm": 0.18821877241134644,
      "learning_rate": 1e-06,
      "loss": -0.0758,
      "num_tokens": 647025767.0,
      "reward": 0.6718750596046448,
      "reward_std": 0.14563976228237152,
      "rewards/verify_math_reward/mean": 0.671875,
      "rewards/verify_math_reward/std": 0.46979284286499023,
      "step": 1098
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1238839285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2475.0,
      "completions/mean_length": 994.2120971679688,
      "completions/mean_terminated_length": 555.6152954101562,
      "completions/min_length": 140.0,
      "completions/min_terminated_length": 140.0,
      "epoch": 10.270553935860057,
      "grad_norm": 0.15949542820453644,
      "learning_rate": 1e-06,
      "loss": -0.0513,
      "num_tokens": 647555245.0,
      "reward": 0.6718750596046448,
      "reward_std": 0.1250917762517929,
      "rewards/verify_math_reward/mean": 0.671875,
      "rewards/verify_math_reward/std": 0.46979284286499023,
      "step": 1099
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1395089285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3541.0,
      "completions/mean_length": 1059.86279296875,
      "completions/mean_terminated_length": 567.62255859375,
      "completions/min_length": 144.0,
      "completions/min_terminated_length": 144.0,
      "epoch": 10.279883381924199,
      "grad_norm": 0.15890197455883026,
      "learning_rate": 1e-06,
      "loss": -0.068,
      "num_tokens": 648094338.0,
      "reward": 0.6104910969734192,
      "reward_std": 0.14369018375873566,
      "rewards/verify_math_reward/mean": 0.6104910969734192,
      "rewards/verify_math_reward/std": 0.48791128396987915,
      "step": 1100
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0926339285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3352.0,
      "completions/mean_length": 900.8761596679688,
      "completions/mean_terminated_length": 574.6826171875,
      "completions/min_length": 172.0,
      "completions/min_terminated_length": 172.0,
      "epoch": 10.289212827988338,
      "grad_norm": 0.1462251842021942,
      "learning_rate": 1e-06,
      "loss": -0.0407,
      "num_tokens": 648651491.0,
      "reward": 0.6897321939468384,
      "reward_std": 0.15082526206970215,
      "rewards/verify_math_reward/mean": 0.6897321343421936,
      "rewards/verify_math_reward/std": 0.462861567735672,
      "step": 1101
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1339285714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4056.0,
      "completions/mean_length": 1093.40185546875,
      "completions/mean_terminated_length": 629.0824584960938,
      "completions/min_length": 147.0,
      "completions/min_terminated_length": 147.0,
      "epoch": 10.298542274052478,
      "grad_norm": 0.17598183453083038,
      "learning_rate": 1e-06,
      "loss": -0.1181,
      "num_tokens": 649232307.0,
      "reward": 0.6417410969734192,
      "reward_std": 0.18468095362186432,
      "rewards/verify_math_reward/mean": 0.6417410969734192,
      "rewards/verify_math_reward/std": 0.47975659370422363,
      "step": 1102
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1294642857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3879.0,
      "completions/mean_length": 1065.2020263671875,
      "completions/mean_terminated_length": 614.4679565429688,
      "completions/min_length": 147.0,
      "completions/min_terminated_length": 147.0,
      "epoch": 10.307871720116617,
      "grad_norm": 0.16817888617515564,
      "learning_rate": 1e-06,
      "loss": -0.0647,
      "num_tokens": 649804608.0,
      "reward": 0.6741071939468384,
      "reward_std": 0.15774603188037872,
      "rewards/verify_math_reward/mean": 0.6741071343421936,
      "rewards/verify_math_reward/std": 0.4689692556858063,
      "step": 1103
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1339285714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3969.0,
      "completions/mean_length": 1103.15625,
      "completions/mean_terminated_length": 640.3453369140625,
      "completions/min_length": 158.0,
      "completions/min_terminated_length": 158.0,
      "epoch": 10.317201166180759,
      "grad_norm": 0.13881205022335052,
      "learning_rate": 1e-06,
      "loss": -0.0595,
      "num_tokens": 650385860.0,
      "reward": 0.6383928656578064,
      "reward_std": 0.13387976586818695,
      "rewards/verify_math_reward/mean": 0.6383928656578064,
      "rewards/verify_math_reward/std": 0.4807341992855072,
      "step": 1104
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1339285714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2864.0,
      "completions/mean_length": 1060.6663818359375,
      "completions/mean_terminated_length": 591.2847900390625,
      "completions/min_length": 150.0,
      "completions/min_terminated_length": 150.0,
      "epoch": 10.326530612244898,
      "grad_norm": 0.15930691361427307,
      "learning_rate": 1e-06,
      "loss": -0.101,
      "num_tokens": 650929385.0,
      "reward": 0.6618303656578064,
      "reward_std": 0.14210577309131622,
      "rewards/verify_math_reward/mean": 0.6618303656578064,
      "rewards/verify_math_reward/std": 0.4733508229255676,
      "step": 1105
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1082589285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2614.0,
      "completions/mean_length": 991.09716796875,
      "completions/mean_terminated_length": 614.1564331054688,
      "completions/min_length": 141.0,
      "completions/min_terminated_length": 141.0,
      "epoch": 10.335860058309038,
      "grad_norm": 0.16268374025821686,
      "learning_rate": 1e-06,
      "loss": -0.0545,
      "num_tokens": 651504872.0,
      "reward": 0.6573660969734192,
      "reward_std": 0.1695399135351181,
      "rewards/verify_math_reward/mean": 0.6573660969734192,
      "rewards/verify_math_reward/std": 0.47485536336898804,
      "step": 1106
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0770089285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2973.0,
      "completions/mean_length": 817.107177734375,
      "completions/mean_terminated_length": 543.5357055664062,
      "completions/min_length": 135.0,
      "completions/min_terminated_length": 135.0,
      "epoch": 10.345189504373177,
      "grad_norm": 0.16051194071769714,
      "learning_rate": 1e-06,
      "loss": -0.0424,
      "num_tokens": 652040960.0,
      "reward": 0.7087053656578064,
      "reward_std": 0.13883958756923676,
      "rewards/verify_math_reward/mean": 0.7087053656578064,
      "rewards/verify_math_reward/std": 0.45461276173591614,
      "step": 1107
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1037946428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3574.0,
      "completions/mean_length": 951.17529296875,
      "completions/mean_terminated_length": 586.9551391601562,
      "completions/min_length": 123.0,
      "completions/min_terminated_length": 123.0,
      "epoch": 10.354518950437317,
      "grad_norm": 0.1385965794324875,
      "learning_rate": 1e-06,
      "loss": -0.0719,
      "num_tokens": 652596197.0,
      "reward": 0.7176339626312256,
      "reward_std": 0.14011837542057037,
      "rewards/verify_math_reward/mean": 0.7176339030265808,
      "rewards/verify_math_reward/std": 0.4504019320011139,
      "step": 1108
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0970982142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3001.0,
      "completions/mean_length": 906.2500610351562,
      "completions/mean_terminated_length": 563.2237548828125,
      "completions/min_length": 99.0,
      "completions/min_terminated_length": 99.0,
      "epoch": 10.363848396501458,
      "grad_norm": 0.15858794748783112,
      "learning_rate": 1e-06,
      "loss": -0.0595,
      "num_tokens": 653139237.0,
      "reward": 0.7187500596046448,
      "reward_std": 0.1417366862297058,
      "rewards/verify_math_reward/mean": 0.71875,
      "rewards/verify_math_reward/std": 0.4498603343963623,
      "step": 1109
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.140625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4081.0,
      "completions/mean_length": 1086.62841796875,
      "completions/mean_terminated_length": 594.1857299804688,
      "completions/min_length": 177.0,
      "completions/min_terminated_length": 177.0,
      "epoch": 10.373177842565598,
      "grad_norm": 0.14364366233348846,
      "learning_rate": 1e-06,
      "loss": -0.0691,
      "num_tokens": 653681856.0,
      "reward": 0.6361607313156128,
      "reward_std": 0.12790516018867493,
      "rewards/verify_math_reward/mean": 0.6361607313156128,
      "rewards/verify_math_reward/std": 0.4813718795776367,
      "step": 1110
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1305803571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3847.0,
      "completions/mean_length": 1050.5703125,
      "completions/mean_terminated_length": 593.1694946289062,
      "completions/min_length": 130.0,
      "completions/min_terminated_length": 130.0,
      "epoch": 10.382507288629737,
      "grad_norm": 0.14082437753677368,
      "learning_rate": 1e-06,
      "loss": -0.0634,
      "num_tokens": 654217687.0,
      "reward": 0.6718750596046448,
      "reward_std": 0.12117871642112732,
      "rewards/verify_math_reward/mean": 0.671875,
      "rewards/verify_math_reward/std": 0.46979284286499023,
      "step": 1111
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1216517857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3709.0,
      "completions/mean_length": 992.2913208007812,
      "completions/mean_terminated_length": 562.4256591796875,
      "completions/min_length": 174.0,
      "completions/min_terminated_length": 174.0,
      "epoch": 10.391836734693877,
      "grad_norm": 0.16474808752536774,
      "learning_rate": 1e-06,
      "loss": -0.0607,
      "num_tokens": 654745836.0,
      "reward": 0.7299107313156128,
      "reward_std": 0.1471807062625885,
      "rewards/verify_math_reward/mean": 0.7299107313156128,
      "rewards/verify_math_reward/std": 0.44425368309020996,
      "step": 1112
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1752232142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3031.0,
      "completions/mean_length": 1236.1317138671875,
      "completions/mean_terminated_length": 628.5547485351562,
      "completions/min_length": 117.0,
      "completions/min_terminated_length": 117.0,
      "epoch": 10.401166180758018,
      "grad_norm": 0.1435873955488205,
      "learning_rate": 1e-06,
      "loss": -0.0601,
      "num_tokens": 655310090.0,
      "reward": 0.6071428656578064,
      "reward_std": 0.09596949070692062,
      "rewards/verify_math_reward/mean": 0.6071428656578064,
      "rewards/verify_math_reward/std": 0.48865827918052673,
      "step": 1113
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1104910714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2884.0,
      "completions/mean_length": 966.1886596679688,
      "completions/mean_terminated_length": 577.4165649414062,
      "completions/min_length": 125.0,
      "completions/min_terminated_length": 125.0,
      "epoch": 10.410495626822158,
      "grad_norm": 0.14196231961250305,
      "learning_rate": 1e-06,
      "loss": -0.071,
      "num_tokens": 655864659.0,
      "reward": 0.7087053656578064,
      "reward_std": 0.13816556334495544,
      "rewards/verify_math_reward/mean": 0.7087053656578064,
      "rewards/verify_math_reward/std": 0.45461276173591614,
      "step": 1114
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1383928571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2549.0,
      "completions/mean_length": 1070.321533203125,
      "completions/mean_terminated_length": 584.3316040039062,
      "completions/min_length": 152.0,
      "completions/min_terminated_length": 152.0,
      "epoch": 10.419825072886297,
      "grad_norm": 0.1511499583721161,
      "learning_rate": 1e-06,
      "loss": -0.0889,
      "num_tokens": 656395059.0,
      "reward": 0.7042410969734192,
      "reward_std": 0.1297919601202011,
      "rewards/verify_math_reward/mean": 0.7042410969734192,
      "rewards/verify_math_reward/std": 0.45663803815841675,
      "step": 1115
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1495535714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3452.0,
      "completions/mean_length": 1130.4007568359375,
      "completions/mean_terminated_length": 608.8910522460938,
      "completions/min_length": 158.0,
      "completions/min_terminated_length": 158.0,
      "epoch": 10.429154518950437,
      "grad_norm": 0.18190579116344452,
      "learning_rate": 1e-06,
      "loss": -0.0725,
      "num_tokens": 656951290.0,
      "reward": 0.6227678656578064,
      "reward_std": 0.1635277271270752,
      "rewards/verify_math_reward/mean": 0.6227678656578064,
      "rewards/verify_math_reward/std": 0.4849644899368286,
      "step": 1116
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1004464285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3742.0,
      "completions/mean_length": 944.6239013671875,
      "completions/mean_terminated_length": 592.7332153320312,
      "completions/min_length": 108.0,
      "completions/min_terminated_length": 108.0,
      "epoch": 10.438483965014576,
      "grad_norm": 0.1650150716304779,
      "learning_rate": 1e-06,
      "loss": -0.0663,
      "num_tokens": 657508921.0,
      "reward": 0.7053571939468384,
      "reward_std": 0.1522895097732544,
      "rewards/verify_math_reward/mean": 0.7053571343421936,
      "rewards/verify_math_reward/std": 0.45613664388656616,
      "step": 1117
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1272321428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2980.0,
      "completions/mean_length": 1069.6160888671875,
      "completions/mean_terminated_length": 628.4296875,
      "completions/min_length": 143.0,
      "completions/min_terminated_length": 143.0,
      "epoch": 10.447813411078718,
      "grad_norm": 0.16112597286701202,
      "learning_rate": 1e-06,
      "loss": -0.0687,
      "num_tokens": 658091625.0,
      "reward": 0.6495535969734192,
      "reward_std": 0.16022199392318726,
      "rewards/verify_math_reward/mean": 0.6495535969734192,
      "rewards/verify_math_reward/std": 0.477376252412796,
      "step": 1118
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1428571428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2301.0,
      "completions/mean_length": 1105.52685546875,
      "completions/mean_terminated_length": 607.1146240234375,
      "completions/min_length": 145.0,
      "completions/min_terminated_length": 145.0,
      "epoch": 10.457142857142857,
      "grad_norm": 0.36504054069519043,
      "learning_rate": 1e-06,
      "loss": -0.0644,
      "num_tokens": 658651041.0,
      "reward": 0.598214328289032,
      "reward_std": 0.14740823209285736,
      "rewards/verify_math_reward/mean": 0.5982142686843872,
      "rewards/verify_math_reward/std": 0.49053287506103516,
      "step": 1119
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1584821428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3316.0,
      "completions/mean_length": 1135.7176513671875,
      "completions/mean_terminated_length": 578.2108764648438,
      "completions/min_length": 135.0,
      "completions/min_terminated_length": 135.0,
      "epoch": 10.466472303206997,
      "grad_norm": 0.13417407870292664,
      "learning_rate": 1e-06,
      "loss": -0.092,
      "num_tokens": 659178388.0,
      "reward": 0.6819196939468384,
      "reward_std": 0.1305733323097229,
      "rewards/verify_math_reward/mean": 0.6819196343421936,
      "rewards/verify_math_reward/std": 0.46599099040031433,
      "step": 1120
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1361607142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2335.0,
      "completions/mean_length": 1073.161865234375,
      "completions/mean_terminated_length": 596.6937866210938,
      "completions/min_length": 118.0,
      "completions/min_terminated_length": 118.0,
      "epoch": 10.475801749271136,
      "grad_norm": 0.14503762125968933,
      "learning_rate": 1e-06,
      "loss": -0.0713,
      "num_tokens": 659731205.0,
      "reward": 0.6618303656578064,
      "reward_std": 0.1402692198753357,
      "rewards/verify_math_reward/mean": 0.6618303656578064,
      "rewards/verify_math_reward/std": 0.4733508229255676,
      "step": 1121
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1205357142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3090.0,
      "completions/mean_length": 987.4609985351562,
      "completions/mean_terminated_length": 561.41748046875,
      "completions/min_length": 117.0,
      "completions/min_terminated_length": 117.0,
      "epoch": 10.485131195335278,
      "grad_norm": 0.17859601974487305,
      "learning_rate": 1e-06,
      "loss": -0.074,
      "num_tokens": 660254234.0,
      "reward": 0.7332589626312256,
      "reward_std": 0.14316701889038086,
      "rewards/verify_math_reward/mean": 0.7332589030265808,
      "rewards/verify_math_reward/std": 0.4425029158592224,
      "step": 1122
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1037946428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3738.0,
      "completions/mean_length": 976.4375610351562,
      "completions/mean_terminated_length": 615.1431884765625,
      "completions/min_length": 161.0,
      "completions/min_terminated_length": 161.0,
      "epoch": 10.494460641399417,
      "grad_norm": 0.1336556077003479,
      "learning_rate": 1e-06,
      "loss": -0.0693,
      "num_tokens": 660832290.0,
      "reward": 0.6930803656578064,
      "reward_std": 0.1184028759598732,
      "rewards/verify_math_reward/mean": 0.6930803656578064,
      "rewards/verify_math_reward/std": 0.46147337555885315,
      "step": 1123
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1205357142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3536.0,
      "completions/mean_length": 992.0748291015625,
      "completions/mean_terminated_length": 566.6636962890625,
      "completions/min_length": 130.0,
      "completions/min_terminated_length": 130.0,
      "epoch": 10.503790087463557,
      "grad_norm": 0.15979568660259247,
      "learning_rate": 1e-06,
      "loss": -0.0559,
      "num_tokens": 661364725.0,
      "reward": 0.6718750596046448,
      "reward_std": 0.1386127471923828,
      "rewards/verify_math_reward/mean": 0.671875,
      "rewards/verify_math_reward/std": 0.46979284286499023,
      "step": 1124
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0982142857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3947.0,
      "completions/mean_length": 911.8973388671875,
      "completions/mean_terminated_length": 565.1138305664062,
      "completions/min_length": 136.0,
      "completions/min_terminated_length": 136.0,
      "epoch": 10.513119533527696,
      "grad_norm": 0.13665249943733215,
      "learning_rate": 1e-06,
      "loss": -0.0603,
      "num_tokens": 661901561.0,
      "reward": 0.7120535969734192,
      "reward_std": 0.13264445960521698,
      "rewards/verify_math_reward/mean": 0.7120535969734192,
      "rewards/verify_math_reward/std": 0.4530589282512665,
      "step": 1125
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1171875,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4092.0,
      "completions/mean_length": 977.8359985351562,
      "completions/mean_terminated_length": 563.9203491210938,
      "completions/min_length": 150.0,
      "completions/min_terminated_length": 150.0,
      "epoch": 10.522448979591836,
      "grad_norm": 0.16451455652713776,
      "learning_rate": 1e-06,
      "loss": -0.0352,
      "num_tokens": 662453406.0,
      "reward": 0.6941964626312256,
      "reward_std": 0.12163377553224564,
      "rewards/verify_math_reward/mean": 0.6941964030265808,
      "rewards/verify_math_reward/std": 0.46100425720214844,
      "step": 1126
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1104910714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2797.0,
      "completions/mean_length": 929.3739013671875,
      "completions/mean_terminated_length": 536.0288696289062,
      "completions/min_length": 153.0,
      "completions/min_terminated_length": 153.0,
      "epoch": 10.531778425655977,
      "grad_norm": 0.16072551906108856,
      "learning_rate": 1e-06,
      "loss": -0.0569,
      "num_tokens": 662972461.0,
      "reward": 0.7031250596046448,
      "reward_std": 0.13929423689842224,
      "rewards/verify_math_reward/mean": 0.703125,
      "rewards/verify_math_reward/std": 0.4571361541748047,
      "step": 1127
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1506696428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3121.0,
      "completions/mean_length": 1076.204345703125,
      "completions/mean_terminated_length": 540.4979858398438,
      "completions/min_length": 143.0,
      "completions/min_terminated_length": 143.0,
      "epoch": 10.541107871720117,
      "grad_norm": 0.1522071361541748,
      "learning_rate": 1e-06,
      "loss": -0.061,
      "num_tokens": 663485268.0,
      "reward": 0.6741071939468384,
      "reward_std": 0.13711389899253845,
      "rewards/verify_math_reward/mean": 0.6741071343421936,
      "rewards/verify_math_reward/std": 0.46896928548812866,
      "step": 1128
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1149553571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3468.0,
      "completions/mean_length": 970.818115234375,
      "completions/mean_terminated_length": 564.8991088867188,
      "completions/min_length": 132.0,
      "completions/min_terminated_length": 132.0,
      "epoch": 10.550437317784256,
      "grad_norm": 0.18777716159820557,
      "learning_rate": 1e-06,
      "loss": -0.06,
      "num_tokens": 664022017.0,
      "reward": 0.6696428656578064,
      "reward_std": 0.1272311508655548,
      "rewards/verify_math_reward/mean": 0.6696428656578064,
      "rewards/verify_math_reward/std": 0.47060438990592957,
      "step": 1129
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1049107142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3026.0,
      "completions/mean_length": 964.97216796875,
      "completions/mean_terminated_length": 597.9937744140625,
      "completions/min_length": 152.0,
      "completions/min_terminated_length": 152.0,
      "epoch": 10.559766763848396,
      "grad_norm": 0.1461143046617508,
      "learning_rate": 1e-06,
      "loss": -0.0512,
      "num_tokens": 664588520.0,
      "reward": 0.723214328289032,
      "reward_std": 0.13519570231437683,
      "rewards/verify_math_reward/mean": 0.7232142686843872,
      "rewards/verify_math_reward/std": 0.44765952229499817,
      "step": 1130
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0959821428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3775.0,
      "completions/mean_length": 907.4029541015625,
      "completions/mean_terminated_length": 568.8605346679688,
      "completions/min_length": 146.0,
      "completions/min_terminated_length": 146.0,
      "epoch": 10.569096209912537,
      "grad_norm": 0.1546100229024887,
      "learning_rate": 1e-06,
      "loss": -0.0612,
      "num_tokens": 665129761.0,
      "reward": 0.6886160969734192,
      "reward_std": 0.10772736370563507,
      "rewards/verify_math_reward/mean": 0.6886160969734192,
      "rewards/verify_math_reward/std": 0.46331799030303955,
      "step": 1131
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1674107142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4043.0,
      "completions/mean_length": 1245.7489013671875,
      "completions/mean_terminated_length": 672.64208984375,
      "completions/min_length": 154.0,
      "completions/min_terminated_length": 154.0,
      "epoch": 10.578425655976677,
      "grad_norm": 0.17100751399993896,
      "learning_rate": 1e-06,
      "loss": -0.0992,
      "num_tokens": 665725400.0,
      "reward": 0.6305803656578064,
      "reward_std": 0.1565767079591751,
      "rewards/verify_math_reward/mean": 0.6305803656578064,
      "rewards/verify_math_reward/std": 0.4829172194004059,
      "step": 1132
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1004464285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3373.0,
      "completions/mean_length": 957.8136596679688,
      "completions/mean_terminated_length": 607.395751953125,
      "completions/min_length": 143.0,
      "completions/min_terminated_length": 143.0,
      "epoch": 10.587755102040816,
      "grad_norm": 0.1777399778366089,
      "learning_rate": 1e-06,
      "loss": -0.0276,
      "num_tokens": 666313881.0,
      "reward": 0.6741071939468384,
      "reward_std": 0.14007559418678284,
      "rewards/verify_math_reward/mean": 0.6741071343421936,
      "rewards/verify_math_reward/std": 0.4689692556858063,
      "step": 1133
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1339285714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4002.0,
      "completions/mean_length": 1098.4967041015625,
      "completions/mean_terminated_length": 634.9652099609375,
      "completions/min_length": 139.0,
      "completions/min_terminated_length": 139.0,
      "epoch": 10.597084548104956,
      "grad_norm": 0.1605757772922516,
      "learning_rate": 1e-06,
      "loss": -0.0571,
      "num_tokens": 666896446.0,
      "reward": 0.6238839626312256,
      "reward_std": 0.16044881939888,
      "rewards/verify_math_reward/mean": 0.6238839030265808,
      "rewards/verify_math_reward/std": 0.48468026518821716,
      "step": 1134
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1127232142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3384.0,
      "completions/mean_length": 976.9855346679688,
      "completions/mean_terminated_length": 580.7333374023438,
      "completions/min_length": 104.0,
      "completions/min_terminated_length": 104.0,
      "epoch": 10.606413994169095,
      "grad_norm": 0.17219507694244385,
      "learning_rate": 1e-06,
      "loss": -0.0469,
      "num_tokens": 667443337.0,
      "reward": 0.7254464626312256,
      "reward_std": 0.14037981629371643,
      "rewards/verify_math_reward/mean": 0.7254464030265808,
      "rewards/verify_math_reward/std": 0.4465382993221283,
      "step": 1135
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1439732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2925.0,
      "completions/mean_length": 1091.5045166015625,
      "completions/mean_terminated_length": 586.1851196289062,
      "completions/min_length": 108.0,
      "completions/min_terminated_length": 108.0,
      "epoch": 10.615743440233237,
      "grad_norm": 0.15457606315612793,
      "learning_rate": 1e-06,
      "loss": -0.0748,
      "num_tokens": 667980669.0,
      "reward": 0.660714328289032,
      "reward_std": 0.14992374181747437,
      "rewards/verify_math_reward/mean": 0.6607142686843872,
      "rewards/verify_math_reward/std": 0.4737313687801361,
      "step": 1136
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1551339285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2700.0,
      "completions/mean_length": 1113.646240234375,
      "completions/mean_terminated_length": 566.0277709960938,
      "completions/min_length": 121.0,
      "completions/min_terminated_length": 121.0,
      "epoch": 10.625072886297376,
      "grad_norm": 0.1607721745967865,
      "learning_rate": 1e-06,
      "loss": -0.0575,
      "num_tokens": 668505952.0,
      "reward": 0.6662946939468384,
      "reward_std": 0.13632294535636902,
      "rewards/verify_math_reward/mean": 0.6662946343421936,
      "rewards/verify_math_reward/std": 0.47179925441741943,
      "step": 1137
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.09375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4069.0,
      "completions/mean_length": 918.1785888671875,
      "completions/mean_terminated_length": 589.4384155273438,
      "completions/min_length": 95.0,
      "completions/min_terminated_length": 95.0,
      "epoch": 10.634402332361516,
      "grad_norm": 0.15498511493206024,
      "learning_rate": 1e-06,
      "loss": -0.0545,
      "num_tokens": 669076512.0,
      "reward": 0.7276785969734192,
      "reward_std": 0.12373882532119751,
      "rewards/verify_math_reward/mean": 0.7276785969734192,
      "rewards/verify_math_reward/std": 0.4454030692577362,
      "step": 1138
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1104910714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2971.0,
      "completions/mean_length": 975.7857666015625,
      "completions/mean_terminated_length": 588.2057495117188,
      "completions/min_length": 137.0,
      "completions/min_terminated_length": 137.0,
      "epoch": 10.643731778425655,
      "grad_norm": 0.14636258780956268,
      "learning_rate": 1e-06,
      "loss": -0.0479,
      "num_tokens": 669622448.0,
      "reward": 0.6718750596046448,
      "reward_std": 0.11126275360584259,
      "rewards/verify_math_reward/mean": 0.671875,
      "rewards/verify_math_reward/std": 0.46979284286499023,
      "step": 1139
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1506696428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3205.0,
      "completions/mean_length": 1157.4107666015625,
      "completions/mean_terminated_length": 636.1103515625,
      "completions/min_length": 135.0,
      "completions/min_terminated_length": 135.0,
      "epoch": 10.653061224489797,
      "grad_norm": 0.1539439857006073,
      "learning_rate": 1e-06,
      "loss": -0.058,
      "num_tokens": 670205104.0,
      "reward": 0.6339285969734192,
      "reward_std": 0.14914487302303314,
      "rewards/verify_math_reward/mean": 0.6339285969734192,
      "rewards/verify_math_reward/std": 0.48199835419654846,
      "step": 1140
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1473214285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3847.0,
      "completions/mean_length": 1152.1160888671875,
      "completions/mean_terminated_length": 643.4869384765625,
      "completions/min_length": 130.0,
      "completions/min_terminated_length": 130.0,
      "epoch": 10.662390670553936,
      "grad_norm": 0.12876634299755096,
      "learning_rate": 1e-06,
      "loss": -0.0878,
      "num_tokens": 670780664.0,
      "reward": 0.6428571939468384,
      "reward_std": 0.13609471917152405,
      "rewards/verify_math_reward/mean": 0.6428571343421936,
      "rewards/verify_math_reward/std": 0.4794250428676605,
      "step": 1141
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2911.0,
      "completions/mean_length": 1046.185302734375,
      "completions/mean_terminated_length": 610.4974365234375,
      "completions/min_length": 184.0,
      "completions/min_terminated_length": 184.0,
      "epoch": 10.671720116618076,
      "grad_norm": 0.15197999775409698,
      "learning_rate": 1e-06,
      "loss": -0.0876,
      "num_tokens": 671353886.0,
      "reward": 0.715401828289032,
      "reward_std": 0.1395556628704071,
      "rewards/verify_math_reward/mean": 0.7154017686843872,
      "rewards/verify_math_reward/std": 0.4514748752117157,
      "step": 1142
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1316964285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3132.0,
      "completions/mean_length": 1013.1116333007812,
      "completions/mean_terminated_length": 545.5269775390625,
      "completions/min_length": 126.0,
      "completions/min_terminated_length": 126.0,
      "epoch": 10.681049562682215,
      "grad_norm": 0.15575233101844788,
      "learning_rate": 1e-06,
      "loss": -0.0595,
      "num_tokens": 671877178.0,
      "reward": 0.6941964626312256,
      "reward_std": 0.12102645635604858,
      "rewards/verify_math_reward/mean": 0.6941964030265808,
      "rewards/verify_math_reward/std": 0.4610042870044708,
      "step": 1143
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1339285714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4001.0,
      "completions/mean_length": 1055.6585693359375,
      "completions/mean_terminated_length": 585.5025634765625,
      "completions/min_length": 159.0,
      "completions/min_terminated_length": 159.0,
      "epoch": 10.690379008746355,
      "grad_norm": 0.1572081446647644,
      "learning_rate": 1e-06,
      "loss": -0.0793,
      "num_tokens": 672421184.0,
      "reward": 0.6540178656578064,
      "reward_std": 0.15962213277816772,
      "rewards/verify_math_reward/mean": 0.6540178656578064,
      "rewards/verify_math_reward/std": 0.4759531021118164,
      "step": 1144
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1283482142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2792.0,
      "completions/mean_length": 1030.509033203125,
      "completions/mean_terminated_length": 579.1242065429688,
      "completions/min_length": 126.0,
      "completions/min_terminated_length": 126.0,
      "epoch": 10.699708454810496,
      "grad_norm": 0.18195772171020508,
      "learning_rate": 1e-06,
      "loss": -0.0902,
      "num_tokens": 672962536.0,
      "reward": 0.6696428656578064,
      "reward_std": 0.14579172432422638,
      "rewards/verify_math_reward/mean": 0.6696428656578064,
      "rewards/verify_math_reward/std": 0.47060438990592957,
      "step": 1145
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1127232142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3729.0,
      "completions/mean_length": 1003.8683471679688,
      "completions/mean_terminated_length": 611.0314331054688,
      "completions/min_length": 142.0,
      "completions/min_terminated_length": 142.0,
      "epoch": 10.709037900874636,
      "grad_norm": 0.15055854618549347,
      "learning_rate": 1e-06,
      "loss": -0.0471,
      "num_tokens": 673530002.0,
      "reward": 0.6640625,
      "reward_std": 0.13470645248889923,
      "rewards/verify_math_reward/mean": 0.6640625,
      "rewards/verify_math_reward/std": 0.4725809693336487,
      "step": 1146
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1395089285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3551.0,
      "completions/mean_length": 1094.1551513671875,
      "completions/mean_terminated_length": 607.4747314453125,
      "completions/min_length": 154.0,
      "completions/min_terminated_length": 154.0,
      "epoch": 10.718367346938775,
      "grad_norm": 0.13042044639587402,
      "learning_rate": 1e-06,
      "loss": -0.0335,
      "num_tokens": 674088045.0,
      "reward": 0.6964285969734192,
      "reward_std": 0.09532869607210159,
      "rewards/verify_math_reward/mean": 0.6964285969734192,
      "rewards/verify_math_reward/std": 0.4600565731525421,
      "step": 1147
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1350446428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2898.0,
      "completions/mean_length": 1082.0,
      "completions/mean_terminated_length": 611.4271240234375,
      "completions/min_length": 118.0,
      "completions/min_terminated_length": 118.0,
      "epoch": 10.727696793002915,
      "grad_norm": 0.1522504836320877,
      "learning_rate": 1e-06,
      "loss": -0.075,
      "num_tokens": 674647589.0,
      "reward": 0.6540178656578064,
      "reward_std": 0.14687760174274445,
      "rewards/verify_math_reward/mean": 0.6540178656578064,
      "rewards/verify_math_reward/std": 0.4759531021118164,
      "step": 1148
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1183035714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2928.0,
      "completions/mean_length": 1041.966552734375,
      "completions/mean_terminated_length": 632.184814453125,
      "completions/min_length": 131.0,
      "completions/min_terminated_length": 131.0,
      "epoch": 10.737026239067056,
      "grad_norm": 0.15824255347251892,
      "learning_rate": 1e-06,
      "loss": -0.1141,
      "num_tokens": 675232543.0,
      "reward": 0.707589328289032,
      "reward_std": 0.17362776398658752,
      "rewards/verify_math_reward/mean": 0.7075892686843872,
      "rewards/verify_math_reward/std": 0.45512402057647705,
      "step": 1149
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1316964285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3480.0,
      "completions/mean_length": 1091.946533203125,
      "completions/mean_terminated_length": 636.3187866210938,
      "completions/min_length": 152.0,
      "completions/min_terminated_length": 152.0,
      "epoch": 10.746355685131196,
      "grad_norm": 0.14918987452983856,
      "learning_rate": 1e-06,
      "loss": -0.0837,
      "num_tokens": 675818879.0,
      "reward": 0.6205357313156128,
      "reward_std": 0.174875870347023,
      "rewards/verify_math_reward/mean": 0.6205357313156128,
      "rewards/verify_math_reward/std": 0.4855247139930725,
      "step": 1150
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1361607142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4070.0,
      "completions/mean_length": 1094.774658203125,
      "completions/mean_terminated_length": 621.7131958007812,
      "completions/min_length": 167.0,
      "completions/min_terminated_length": 167.0,
      "epoch": 10.755685131195335,
      "grad_norm": 0.15345510840415955,
      "learning_rate": 1e-06,
      "loss": -0.0424,
      "num_tokens": 676390605.0,
      "reward": 0.6183035969734192,
      "reward_std": 0.13902321457862854,
      "rewards/verify_math_reward/mean": 0.6183035969734192,
      "rewards/verify_math_reward/std": 0.4860740303993225,
      "step": 1151
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1573660714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3995.0,
      "completions/mean_length": 1161.8660888671875,
      "completions/mean_terminated_length": 613.9019775390625,
      "completions/min_length": 161.0,
      "completions/min_terminated_length": 161.0,
      "epoch": 10.765014577259475,
      "grad_norm": 0.14456294476985931,
      "learning_rate": 1e-06,
      "loss": -0.0509,
      "num_tokens": 676948021.0,
      "reward": 0.6383928656578064,
      "reward_std": 0.11062336713075638,
      "rewards/verify_math_reward/mean": 0.6383928656578064,
      "rewards/verify_math_reward/std": 0.4807341694831848,
      "step": 1152
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1160714285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3539.0,
      "completions/mean_length": 989.404052734375,
      "completions/mean_terminated_length": 581.4671630859375,
      "completions/min_length": 161.0,
      "completions/min_terminated_length": 161.0,
      "epoch": 10.774344023323614,
      "grad_norm": 0.13233256340026855,
      "learning_rate": 1e-06,
      "loss": -0.0837,
      "num_tokens": 677499959.0,
      "reward": 0.6964285969734192,
      "reward_std": 0.12636421620845795,
      "rewards/verify_math_reward/mean": 0.6964285969734192,
      "rewards/verify_math_reward/std": 0.4600565731525421,
      "step": 1153
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1116071428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3740.0,
      "completions/mean_length": 1012.05029296875,
      "completions/mean_terminated_length": 624.6193237304688,
      "completions/min_length": 176.0,
      "completions/min_terminated_length": 176.0,
      "epoch": 10.783673469387756,
      "grad_norm": 0.16096143424510956,
      "learning_rate": 1e-06,
      "loss": -0.0545,
      "num_tokens": 678085556.0,
      "reward": 0.707589328289032,
      "reward_std": 0.1534174531698227,
      "rewards/verify_math_reward/mean": 0.7075892686843872,
      "rewards/verify_math_reward/std": 0.45512402057647705,
      "step": 1154
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1026785714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3837.0,
      "completions/mean_length": 961.6808471679688,
      "completions/mean_terminated_length": 603.02734375,
      "completions/min_length": 177.0,
      "completions/min_terminated_length": 177.0,
      "epoch": 10.793002915451895,
      "grad_norm": 0.14721722900867462,
      "learning_rate": 1e-06,
      "loss": -0.0535,
      "num_tokens": 678657694.0,
      "reward": 0.6383928656578064,
      "reward_std": 0.13729682564735413,
      "rewards/verify_math_reward/mean": 0.6383928656578064,
      "rewards/verify_math_reward/std": 0.4807341992855072,
      "step": 1155
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1383928571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3112.0,
      "completions/mean_length": 1073.1953125,
      "completions/mean_terminated_length": 587.6670532226562,
      "completions/min_length": 151.0,
      "completions/min_terminated_length": 151.0,
      "epoch": 10.802332361516035,
      "grad_norm": 0.15671247243881226,
      "learning_rate": 1e-06,
      "loss": -0.0454,
      "num_tokens": 679202237.0,
      "reward": 0.645089328289032,
      "reward_std": 0.1281326860189438,
      "rewards/verify_math_reward/mean": 0.6450892686843872,
      "rewards/verify_math_reward/std": 0.4787535071372986,
      "step": 1156
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1294642857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3751.0,
      "completions/mean_length": 1038.360595703125,
      "completions/mean_terminated_length": 583.6346435546875,
      "completions/min_length": 127.0,
      "completions/min_terminated_length": 127.0,
      "epoch": 10.811661807580174,
      "grad_norm": 0.18394167721271515,
      "learning_rate": 1e-06,
      "loss": -0.102,
      "num_tokens": 679755992.0,
      "reward": 0.6774553656578064,
      "reward_std": 0.15533748269081116,
      "rewards/verify_math_reward/mean": 0.6774553656578064,
      "rewards/verify_math_reward/std": 0.4677111804485321,
      "step": 1157
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1339285714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2841.0,
      "completions/mean_length": 1051.384033203125,
      "completions/mean_terminated_length": 580.5670166015625,
      "completions/min_length": 126.0,
      "completions/min_terminated_length": 126.0,
      "epoch": 10.820991253644316,
      "grad_norm": 0.16373591125011444,
      "learning_rate": 1e-06,
      "loss": -0.044,
      "num_tokens": 680296296.0,
      "reward": 0.6796875596046448,
      "reward_std": 0.13203756511211395,
      "rewards/verify_math_reward/mean": 0.6796875,
      "rewards/verify_math_reward/std": 0.4668572247028351,
      "step": 1158
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0680803571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3278.0,
      "completions/mean_length": 790.7199096679688,
      "completions/mean_terminated_length": 549.2562866210938,
      "completions/min_length": 114.0,
      "completions/min_terminated_length": 114.0,
      "epoch": 10.830320699708455,
      "grad_norm": 0.15067870914936066,
      "learning_rate": 1e-06,
      "loss": -0.0323,
      "num_tokens": 680840077.0,
      "reward": 0.7812500596046448,
      "reward_std": 0.10919371992349625,
      "rewards/verify_math_reward/mean": 0.78125,
      "rewards/verify_math_reward/std": 0.41362953186035156,
      "step": 1159
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1450892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3370.0,
      "completions/mean_length": 1111.4364013671875,
      "completions/mean_terminated_length": 604.9177856445312,
      "completions/min_length": 140.0,
      "completions/min_terminated_length": 140.0,
      "epoch": 10.839650145772595,
      "grad_norm": 0.1591438353061676,
      "learning_rate": 1e-06,
      "loss": -0.1107,
      "num_tokens": 681397252.0,
      "reward": 0.6651785969734192,
      "reward_std": 0.16288693249225616,
      "rewards/verify_math_reward/mean": 0.6651785969734192,
      "rewards/verify_math_reward/std": 0.47219157218933105,
      "step": 1160
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1127232142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4014.0,
      "completions/mean_length": 967.5558471679688,
      "completions/mean_terminated_length": 570.1056518554688,
      "completions/min_length": 132.0,
      "completions/min_terminated_length": 132.0,
      "epoch": 10.848979591836734,
      "grad_norm": 0.17167727649211884,
      "learning_rate": 1e-06,
      "loss": -0.0579,
      "num_tokens": 681932534.0,
      "reward": 0.715401828289032,
      "reward_std": 0.16150008141994476,
      "rewards/verify_math_reward/mean": 0.7154017686843872,
      "rewards/verify_math_reward/std": 0.4514748752117157,
      "step": 1161
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0904017857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3729.0,
      "completions/mean_length": 932.87841796875,
      "completions/mean_terminated_length": 618.5067749023438,
      "completions/min_length": 196.0,
      "completions/min_terminated_length": 196.0,
      "epoch": 10.858309037900874,
      "grad_norm": 0.15950097143650055,
      "learning_rate": 1e-06,
      "loss": -0.0563,
      "num_tokens": 682524241.0,
      "reward": 0.6941964626312256,
      "reward_std": 0.15431900322437286,
      "rewards/verify_math_reward/mean": 0.6941964030265808,
      "rewards/verify_math_reward/std": 0.4610042870044708,
      "step": 1162
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0714285714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3398.0,
      "completions/mean_length": 822.8873291015625,
      "completions/mean_terminated_length": 571.109375,
      "completions/min_length": 110.0,
      "completions/min_terminated_length": 110.0,
      "epoch": 10.867638483965015,
      "grad_norm": 0.1505705863237381,
      "learning_rate": 1e-06,
      "loss": -0.037,
      "num_tokens": 683089084.0,
      "reward": 0.7265625596046448,
      "reward_std": 0.14496758580207825,
      "rewards/verify_math_reward/mean": 0.7265625,
      "rewards/verify_math_reward/std": 0.4459724426269531,
      "step": 1163
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1372767857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4044.0,
      "completions/mean_length": 1105.24560546875,
      "completions/mean_terminated_length": 629.3557739257812,
      "completions/min_length": 147.0,
      "completions/min_terminated_length": 147.0,
      "epoch": 10.876967930029155,
      "grad_norm": 0.1730506718158722,
      "learning_rate": 1e-06,
      "loss": -0.032,
      "num_tokens": 683657912.0,
      "reward": 0.645089328289032,
      "reward_std": 0.12839369475841522,
      "rewards/verify_math_reward/mean": 0.6450892686843872,
      "rewards/verify_math_reward/std": 0.4787535071372986,
      "step": 1164
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1328125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3497.0,
      "completions/mean_length": 1048.118408203125,
      "completions/mean_terminated_length": 581.3256225585938,
      "completions/min_length": 139.0,
      "completions/min_terminated_length": 139.0,
      "epoch": 10.886297376093294,
      "grad_norm": 0.15575364232063293,
      "learning_rate": 1e-06,
      "loss": -0.0605,
      "num_tokens": 684194818.0,
      "reward": 0.7053571939468384,
      "reward_std": 0.12790516018867493,
      "rewards/verify_math_reward/mean": 0.7053571343421936,
      "rewards/verify_math_reward/std": 0.45613667368888855,
      "step": 1165
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1060267857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3066.0,
      "completions/mean_length": 960.5100708007812,
      "completions/mean_terminated_length": 588.6354370117188,
      "completions/min_length": 170.0,
      "completions/min_terminated_length": 170.0,
      "epoch": 10.895626822157434,
      "grad_norm": 0.152786985039711,
      "learning_rate": 1e-06,
      "loss": -0.0338,
      "num_tokens": 684754787.0,
      "reward": 0.7433035969734192,
      "reward_std": 0.11073465645313263,
      "rewards/verify_math_reward/mean": 0.7433035969734192,
      "rewards/verify_math_reward/std": 0.43705442547798157,
      "step": 1166
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0982142857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2268.0,
      "completions/mean_length": 920.9933471679688,
      "completions/mean_terminated_length": 575.2005004882812,
      "completions/min_length": 164.0,
      "completions/min_terminated_length": 164.0,
      "epoch": 10.904956268221575,
      "grad_norm": 0.16257745027542114,
      "learning_rate": 1e-06,
      "loss": -0.0529,
      "num_tokens": 685307397.0,
      "reward": 0.7399553656578064,
      "reward_std": 0.14091001451015472,
      "rewards/verify_math_reward/mean": 0.7399553656578064,
      "rewards/verify_math_reward/std": 0.43890365958213806,
      "step": 1167
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1149553571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2327.0,
      "completions/mean_length": 924.6094360351562,
      "completions/mean_terminated_length": 512.6885375976562,
      "completions/min_length": 169.0,
      "completions/min_terminated_length": 169.0,
      "epoch": 10.914285714285715,
      "grad_norm": 0.16915538907051086,
      "learning_rate": 1e-06,
      "loss": -0.0461,
      "num_tokens": 685805711.0,
      "reward": 0.7254464626312256,
      "reward_std": 0.12343572080135345,
      "rewards/verify_math_reward/mean": 0.7254464030265808,
      "rewards/verify_math_reward/std": 0.4465382993221283,
      "step": 1168
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1484375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2432.0,
      "completions/mean_length": 1178.3638916015625,
      "completions/mean_terminated_length": 669.7850952148438,
      "completions/min_length": 159.0,
      "completions/min_terminated_length": 159.0,
      "epoch": 10.923615160349854,
      "grad_norm": 0.17527371644973755,
      "learning_rate": 1e-06,
      "loss": -0.0376,
      "num_tokens": 686402853.0,
      "reward": 0.6026785969734192,
      "reward_std": 0.17652417719364166,
      "rewards/verify_math_reward/mean": 0.6026785969734192,
      "rewards/verify_math_reward/std": 0.48961687088012695,
      "step": 1169
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1216517857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3981.0,
      "completions/mean_length": 988.1629638671875,
      "completions/mean_terminated_length": 557.7255249023438,
      "completions/min_length": 145.0,
      "completions/min_terminated_length": 145.0,
      "epoch": 10.932944606413994,
      "grad_norm": 0.1803063005208969,
      "learning_rate": 1e-06,
      "loss": -0.0959,
      "num_tokens": 686933031.0,
      "reward": 0.6741071939468384,
      "reward_std": 0.17908315360546112,
      "rewards/verify_math_reward/mean": 0.6741071343421936,
      "rewards/verify_math_reward/std": 0.4689692556858063,
      "step": 1170
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1138392857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3868.0,
      "completions/mean_length": 1003.6473388671875,
      "completions/mean_terminated_length": 606.3929443359375,
      "completions/min_length": 100.0,
      "completions/min_terminated_length": 100.0,
      "epoch": 10.942274052478133,
      "grad_norm": 0.16941961646080017,
      "learning_rate": 1e-06,
      "loss": -0.0566,
      "num_tokens": 687510931.0,
      "reward": 0.609375,
      "reward_std": 0.16378848254680634,
      "rewards/verify_math_reward/mean": 0.609375,
      "rewards/verify_math_reward/std": 0.48816296458244324,
      "step": 1171
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1517857142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3778.0,
      "completions/mean_length": 1172.798095703125,
      "completions/mean_terminated_length": 649.69873046875,
      "completions/min_length": 183.0,
      "completions/min_terminated_length": 183.0,
      "epoch": 10.951603498542275,
      "grad_norm": 0.15194791555404663,
      "learning_rate": 1e-06,
      "loss": -0.0693,
      "num_tokens": 688095086.0,
      "reward": 0.606026828289032,
      "reward_std": 0.1280987709760666,
      "rewards/verify_math_reward/mean": 0.6060267686843872,
      "rewards/verify_math_reward/std": 0.48890194296836853,
      "step": 1172
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3887.0,
      "completions/mean_length": 1112.8226318359375,
      "completions/mean_terminated_length": 686.654296875,
      "completions/min_length": 181.0,
      "completions/min_terminated_length": 181.0,
      "epoch": 10.960932944606414,
      "grad_norm": 0.14911498129367828,
      "learning_rate": 1e-06,
      "loss": -0.0518,
      "num_tokens": 688714951.0,
      "reward": 0.6272321939468384,
      "reward_std": 0.17217238247394562,
      "rewards/verify_math_reward/mean": 0.6272321343421936,
      "rewards/verify_math_reward/std": 0.4838111698627472,
      "step": 1173
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1049107142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3428.0,
      "completions/mean_length": 993.4163208007812,
      "completions/mean_terminated_length": 629.7718505859375,
      "completions/min_length": 159.0,
      "completions/min_terminated_length": 159.0,
      "epoch": 10.970262390670554,
      "grad_norm": 0.1688496470451355,
      "learning_rate": 1e-06,
      "loss": -0.0539,
      "num_tokens": 689305444.0,
      "reward": 0.6629464626312256,
      "reward_std": 0.1675853133201599,
      "rewards/verify_math_reward/mean": 0.6629464030265808,
      "rewards/verify_math_reward/std": 0.47296738624572754,
      "step": 1174
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0814732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3238.0,
      "completions/mean_length": 907.755615234375,
      "completions/mean_terminated_length": 624.958740234375,
      "completions/min_length": 132.0,
      "completions/min_terminated_length": 132.0,
      "epoch": 10.979591836734693,
      "grad_norm": 0.1482262909412384,
      "learning_rate": 1e-06,
      "loss": -0.0588,
      "num_tokens": 689912785.0,
      "reward": 0.7220982313156128,
      "reward_std": 0.14905862510204315,
      "rewards/verify_math_reward/mean": 0.7220982313156128,
      "rewards/verify_math_reward/std": 0.44821488857269287,
      "step": 1175
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0948660714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3820.0,
      "completions/mean_length": 927.6172485351562,
      "completions/mean_terminated_length": 595.5425415039062,
      "completions/min_length": 179.0,
      "completions/min_terminated_length": 179.0,
      "epoch": 10.988921282798835,
      "grad_norm": 0.17880500853061676,
      "learning_rate": 1e-06,
      "loss": -0.0766,
      "num_tokens": 690485650.0,
      "reward": 0.7165178656578064,
      "reward_std": 0.1725853532552719,
      "rewards/verify_math_reward/mean": 0.7165178656578064,
      "rewards/verify_math_reward/std": 0.4509401023387909,
      "step": 1176
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.09659090909090906,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3607.0,
      "completions/mean_length": 1018.5540161132812,
      "completions/mean_terminated_length": 689.5188598632812,
      "completions/min_length": 166.0,
      "completions/min_terminated_length": 166.0,
      "epoch": 10.998250728862974,
      "grad_norm": 0.16327017545700073,
      "learning_rate": 1e-06,
      "loss": -0.0616,
      "num_tokens": 691083697.0,
      "reward": 0.6283482313156128,
      "reward_std": 0.13665924966335297,
      "rewards/verify_math_reward/mean": 0.6283482313156128,
      "rewards/verify_math_reward/std": 0.4835159480571747,
      "step": 1177
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1116071428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2466.0,
      "completions/mean_length": 1012.38623046875,
      "completions/mean_terminated_length": 624.9974975585938,
      "completions/min_length": 202.0,
      "completions/min_terminated_length": 202.0,
      "epoch": 11.00932944606414,
      "grad_norm": 0.15496833622455597,
      "learning_rate": 1e-06,
      "loss": -0.0673,
      "num_tokens": 691674891.0,
      "reward": 0.7031250596046448,
      "reward_std": 0.13673663139343262,
      "rewards/verify_math_reward/mean": 0.703125,
      "rewards/verify_math_reward/std": 0.4571361541748047,
      "step": 1178
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1439732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3647.0,
      "completions/mean_length": 1168.1015625,
      "completions/mean_terminated_length": 675.6649169921875,
      "completions/min_length": 152.0,
      "completions/min_terminated_length": 152.0,
      "epoch": 11.018658892128279,
      "grad_norm": 0.14826743304729462,
      "learning_rate": 1e-06,
      "loss": -0.0581,
      "num_tokens": 692284598.0,
      "reward": 0.6316964626312256,
      "reward_std": 0.15165013074874878,
      "rewards/verify_math_reward/mean": 0.6316964030265808,
      "rewards/verify_math_reward/std": 0.4826137125492096,
      "step": 1179
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1350446428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3563.0,
      "completions/mean_length": 1042.72998046875,
      "completions/mean_terminated_length": 566.0258178710938,
      "completions/min_length": 160.0,
      "completions/min_terminated_length": 160.0,
      "epoch": 11.02798833819242,
      "grad_norm": 0.18478870391845703,
      "learning_rate": 1e-06,
      "loss": -0.0947,
      "num_tokens": 692804636.0,
      "reward": 0.6640625,
      "reward_std": 0.1838906854391098,
      "rewards/verify_math_reward/mean": 0.6640625,
      "rewards/verify_math_reward/std": 0.4725809693336487,
      "step": 1180
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1272321428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3737.0,
      "completions/mean_length": 1021.7288208007812,
      "completions/mean_terminated_length": 573.5614013671875,
      "completions/min_length": 176.0,
      "completions/min_terminated_length": 176.0,
      "epoch": 11.03731778425656,
      "grad_norm": 0.1643456667661667,
      "learning_rate": 1e-06,
      "loss": -0.0697,
      "num_tokens": 693339313.0,
      "reward": 0.6819196939468384,
      "reward_std": 0.1612725704908371,
      "rewards/verify_math_reward/mean": 0.6819196343421936,
      "rewards/verify_math_reward/std": 0.46599099040031433,
      "step": 1181
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1138392857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3494.0,
      "completions/mean_length": 1022.122802734375,
      "completions/mean_terminated_length": 627.2417602539062,
      "completions/min_length": 155.0,
      "completions/min_terminated_length": 155.0,
      "epoch": 11.0466472303207,
      "grad_norm": 0.15247948467731476,
      "learning_rate": 1e-06,
      "loss": -0.0473,
      "num_tokens": 693923807.0,
      "reward": 0.6696428656578064,
      "reward_std": 0.15157341957092285,
      "rewards/verify_math_reward/mean": 0.6696428656578064,
      "rewards/verify_math_reward/std": 0.47060438990592957,
      "step": 1182
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1261160714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3656.0,
      "completions/mean_length": 1027.6607666015625,
      "completions/mean_terminated_length": 584.8480224609375,
      "completions/min_length": 170.0,
      "completions/min_terminated_length": 170.0,
      "epoch": 11.055976676384839,
      "grad_norm": 0.1399637758731842,
      "learning_rate": 1e-06,
      "loss": -0.0477,
      "num_tokens": 694471127.0,
      "reward": 0.6975446939468384,
      "reward_std": 0.11753343045711517,
      "rewards/verify_math_reward/mean": 0.6975446343421936,
      "rewards/verify_math_reward/std": 0.45957788825035095,
      "step": 1183
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1272321428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2712.0,
      "completions/mean_length": 1075.77685546875,
      "completions/mean_terminated_length": 635.4884643554688,
      "completions/min_length": 139.0,
      "completions/min_terminated_length": 139.0,
      "epoch": 11.06530612244898,
      "grad_norm": 0.16120658814907074,
      "learning_rate": 1e-06,
      "loss": -0.0507,
      "num_tokens": 695064015.0,
      "reward": 0.6595982313156128,
      "reward_std": 0.1583426594734192,
      "rewards/verify_math_reward/mean": 0.6595982313156128,
      "rewards/verify_math_reward/std": 0.4741089344024658,
      "step": 1184
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1462053571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2909.0,
      "completions/mean_length": 1113.318115234375,
      "completions/mean_terminated_length": 602.5581665039062,
      "completions/min_length": 143.0,
      "completions/min_terminated_length": 143.0,
      "epoch": 11.07463556851312,
      "grad_norm": 0.17223554849624634,
      "learning_rate": 1e-06,
      "loss": -0.1264,
      "num_tokens": 695614524.0,
      "reward": 0.6428571939468384,
      "reward_std": 0.16660960018634796,
      "rewards/verify_math_reward/mean": 0.6428571343421936,
      "rewards/verify_math_reward/std": 0.4794250428676605,
      "step": 1185
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1618303571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3482.0,
      "completions/mean_length": 1174.4241943359375,
      "completions/mean_terminated_length": 610.3381958007812,
      "completions/min_length": 115.0,
      "completions/min_terminated_length": 115.0,
      "epoch": 11.08396501457726,
      "grad_norm": 0.15575699508190155,
      "learning_rate": 1e-06,
      "loss": -0.0596,
      "num_tokens": 696167816.0,
      "reward": 0.6194196939468384,
      "reward_std": 0.11054850369691849,
      "rewards/verify_math_reward/mean": 0.6194196343421936,
      "rewards/verify_math_reward/std": 0.48580074310302734,
      "step": 1186
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1183035714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2536.0,
      "completions/mean_length": 978.62841796875,
      "completions/mean_terminated_length": 560.3480834960938,
      "completions/min_length": 131.0,
      "completions/min_terminated_length": 131.0,
      "epoch": 11.093294460641399,
      "grad_norm": 0.1487502008676529,
      "learning_rate": 1e-06,
      "loss": -0.0602,
      "num_tokens": 696703563.0,
      "reward": 0.684151828289032,
      "reward_std": 0.13256961107254028,
      "rewards/verify_math_reward/mean": 0.6841517686843872,
      "rewards/verify_math_reward/std": 0.4651124179363251,
      "step": 1187
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1261160714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3119.0,
      "completions/mean_length": 1066.2098388671875,
      "completions/mean_terminated_length": 628.9603881835938,
      "completions/min_length": 97.0,
      "completions/min_terminated_length": 97.0,
      "epoch": 11.102623906705539,
      "grad_norm": 0.1588362604379654,
      "learning_rate": 1e-06,
      "loss": -0.0894,
      "num_tokens": 697296047.0,
      "reward": 0.6395089626312256,
      "reward_std": 0.1698404997587204,
      "rewards/verify_math_reward/mean": 0.6395089030265808,
      "rewards/verify_math_reward/std": 0.4804111123085022,
      "step": 1188
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1149553571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4086.0,
      "completions/mean_length": 950.0469360351562,
      "completions/mean_terminated_length": 541.4299926757812,
      "completions/min_length": 171.0,
      "completions/min_terminated_length": 171.0,
      "epoch": 11.11195335276968,
      "grad_norm": 0.15742437541484833,
      "learning_rate": 1e-06,
      "loss": -0.0566,
      "num_tokens": 697800745.0,
      "reward": 0.7053571939468384,
      "reward_std": 0.11498401314020157,
      "rewards/verify_math_reward/mean": 0.7053571343421936,
      "rewards/verify_math_reward/std": 0.45613664388656616,
      "step": 1189
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1997767857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3760.0,
      "completions/mean_length": 1293.1741943359375,
      "completions/mean_terminated_length": 593.4448852539062,
      "completions/min_length": 123.0,
      "completions/min_terminated_length": 123.0,
      "epoch": 11.12128279883382,
      "grad_norm": 0.14927829802036285,
      "learning_rate": 1e-06,
      "loss": -0.0831,
      "num_tokens": 698321021.0,
      "reward": 0.640625,
      "reward_std": 0.11742536723613739,
      "rewards/verify_math_reward/mean": 0.640625,
      "rewards/verify_math_reward/std": 0.48008525371551514,
      "step": 1190
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3805.0,
      "completions/mean_length": 994.3973388671875,
      "completions/mean_terminated_length": 551.3112182617188,
      "completions/min_length": 141.0,
      "completions/min_terminated_length": 141.0,
      "epoch": 11.130612244897959,
      "grad_norm": 0.12305353581905365,
      "learning_rate": 1e-06,
      "loss": -0.0703,
      "num_tokens": 698830321.0,
      "reward": 0.6975446939468384,
      "reward_std": 0.0839071124792099,
      "rewards/verify_math_reward/mean": 0.6975446343421936,
      "rewards/verify_math_reward/std": 0.45957788825035095,
      "step": 1191
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1662946428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2690.0,
      "completions/mean_length": 1222.7801513671875,
      "completions/mean_terminated_length": 649.6746826171875,
      "completions/min_length": 122.0,
      "completions/min_terminated_length": 122.0,
      "epoch": 11.139941690962099,
      "grad_norm": 0.1583167016506195,
      "learning_rate": 1e-06,
      "loss": -0.052,
      "num_tokens": 699418660.0,
      "reward": 0.5558035969734192,
      "reward_std": 0.13658326864242554,
      "rewards/verify_math_reward/mean": 0.5558035969734192,
      "rewards/verify_math_reward/std": 0.49715372920036316,
      "step": 1192
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1305803571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2730.0,
      "completions/mean_length": 1045.46875,
      "completions/mean_terminated_length": 587.3016967773438,
      "completions/min_length": 146.0,
      "completions/min_terminated_length": 146.0,
      "epoch": 11.14927113702624,
      "grad_norm": 0.1539103090763092,
      "learning_rate": 1e-06,
      "loss": -0.0591,
      "num_tokens": 699960192.0,
      "reward": 0.6852678656578064,
      "reward_std": 0.13534656167030334,
      "rewards/verify_math_reward/mean": 0.6852678656578064,
      "rewards/verify_math_reward/std": 0.46466848254203796,
      "step": 1193
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1283482142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3318.0,
      "completions/mean_length": 1040.3426513671875,
      "completions/mean_terminated_length": 590.4058837890625,
      "completions/min_length": 143.0,
      "completions/min_terminated_length": 143.0,
      "epoch": 11.15860058309038,
      "grad_norm": 0.26502928137779236,
      "learning_rate": 1e-06,
      "loss": -0.0305,
      "num_tokens": 700505795.0,
      "reward": 0.7332589626312256,
      "reward_std": 0.12084423005580902,
      "rewards/verify_math_reward/mean": 0.7332589030265808,
      "rewards/verify_math_reward/std": 0.4425029158592224,
      "step": 1194
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1450892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3942.0,
      "completions/mean_length": 1070.2757568359375,
      "completions/mean_terminated_length": 556.7715454101562,
      "completions/min_length": 104.0,
      "completions/min_terminated_length": 104.0,
      "epoch": 11.167930029154519,
      "grad_norm": 0.14310236275196075,
      "learning_rate": 1e-06,
      "loss": -0.0797,
      "num_tokens": 701034250.0,
      "reward": 0.6629464626312256,
      "reward_std": 0.12467243522405624,
      "rewards/verify_math_reward/mean": 0.6629464030265808,
      "rewards/verify_math_reward/std": 0.47296738624572754,
      "step": 1195
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1674107142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3851.0,
      "completions/mean_length": 1199.85498046875,
      "completions/mean_terminated_length": 617.5201416015625,
      "completions/min_length": 127.0,
      "completions/min_terminated_length": 127.0,
      "epoch": 11.177259475218658,
      "grad_norm": 0.16276240348815918,
      "learning_rate": 1e-06,
      "loss": -0.1077,
      "num_tokens": 701573664.0,
      "reward": 0.6796875596046448,
      "reward_std": 0.16055506467819214,
      "rewards/verify_math_reward/mean": 0.6796875,
      "rewards/verify_math_reward/std": 0.4668572247028351,
      "step": 1196
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1071428571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3972.0,
      "completions/mean_length": 922.1563110351562,
      "completions/mean_terminated_length": 541.2949829101562,
      "completions/min_length": 139.0,
      "completions/min_terminated_length": 139.0,
      "epoch": 11.186588921282798,
      "grad_norm": 0.1686992347240448,
      "learning_rate": 1e-06,
      "loss": -0.0853,
      "num_tokens": 702093356.0,
      "reward": 0.7042410969734192,
      "reward_std": 0.1265924572944641,
      "rewards/verify_math_reward/mean": 0.7042410969734192,
      "rewards/verify_math_reward/std": 0.45663803815841675,
      "step": 1197
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3806.0,
      "completions/mean_length": 1037.734375,
      "completions/mean_terminated_length": 600.8392944335938,
      "completions/min_length": 96.0,
      "completions/min_terminated_length": 96.0,
      "epoch": 11.19591836734694,
      "grad_norm": 0.13966944813728333,
      "learning_rate": 1e-06,
      "loss": -0.054,
      "num_tokens": 702648830.0,
      "reward": 0.6886160969734192,
      "reward_std": 0.13940481841564178,
      "rewards/verify_math_reward/mean": 0.6886160969734192,
      "rewards/verify_math_reward/std": 0.46331802010536194,
      "step": 1198
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1573660714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2947.0,
      "completions/mean_length": 1110.55810546875,
      "completions/mean_terminated_length": 553.0119018554688,
      "completions/min_length": 168.0,
      "completions/min_terminated_length": 168.0,
      "epoch": 11.205247813411079,
      "grad_norm": 0.14603488147258759,
      "learning_rate": 1e-06,
      "loss": -0.0722,
      "num_tokens": 703162066.0,
      "reward": 0.6551339626312256,
      "reward_std": 0.11460494995117188,
      "rewards/verify_math_reward/mean": 0.6551339030265808,
      "rewards/verify_math_reward/std": 0.4755900800228119,
      "step": 1199
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1183035714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2967.0,
      "completions/mean_length": 1005.5547485351562,
      "completions/mean_terminated_length": 590.8873291015625,
      "completions/min_length": 144.0,
      "completions/min_terminated_length": 144.0,
      "epoch": 11.214577259475218,
      "grad_norm": 0.3345600962638855,
      "learning_rate": 1e-06,
      "loss": -0.0945,
      "num_tokens": 703724075.0,
      "reward": 0.6908482313156128,
      "reward_std": 0.1744275689125061,
      "rewards/verify_math_reward/mean": 0.6908482313156128,
      "rewards/verify_math_reward/std": 0.46240198612213135,
      "step": 1200
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1339285714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4008.0,
      "completions/mean_length": 1078.5648193359375,
      "completions/mean_terminated_length": 611.9509887695312,
      "completions/min_length": 150.0,
      "completions/min_terminated_length": 150.0,
      "epoch": 11.223906705539358,
      "grad_norm": 0.2436329424381256,
      "learning_rate": 1e-06,
      "loss": -0.0624,
      "num_tokens": 704281645.0,
      "reward": 0.6551339626312256,
      "reward_std": 0.1331976056098938,
      "rewards/verify_math_reward/mean": 0.6551339030265808,
      "rewards/verify_math_reward/std": 0.4755900502204895,
      "step": 1201
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1238839285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2771.0,
      "completions/mean_length": 1040.2578125,
      "completions/mean_terminated_length": 608.1719970703125,
      "completions/min_length": 121.0,
      "completions/min_terminated_length": 121.0,
      "epoch": 11.2332361516035,
      "grad_norm": 0.14937366545200348,
      "learning_rate": 1e-06,
      "loss": -0.0556,
      "num_tokens": 704846148.0,
      "reward": 0.7098214626312256,
      "reward_std": 0.09528662264347076,
      "rewards/verify_math_reward/mean": 0.7098214030265808,
      "rewards/verify_math_reward/std": 0.454098105430603,
      "step": 1202
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1383928571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3325.0,
      "completions/mean_length": 1042.2545166015625,
      "completions/mean_terminated_length": 551.7564697265625,
      "completions/min_length": 139.0,
      "completions/min_terminated_length": 139.0,
      "epoch": 11.242565597667639,
      "grad_norm": 0.14984160661697388,
      "learning_rate": 1e-06,
      "loss": -0.0645,
      "num_tokens": 705358536.0,
      "reward": 0.7332589626312256,
      "reward_std": 0.11201275140047073,
      "rewards/verify_math_reward/mean": 0.7332589030265808,
      "rewards/verify_math_reward/std": 0.4425029158592224,
      "step": 1203
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1116071428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3981.0,
      "completions/mean_length": 975.1629638671875,
      "completions/mean_terminated_length": 583.0979614257812,
      "completions/min_length": 144.0,
      "completions/min_terminated_length": 144.0,
      "epoch": 11.251895043731778,
      "grad_norm": 0.15446537733078003,
      "learning_rate": 1e-06,
      "loss": -0.0677,
      "num_tokens": 705915834.0,
      "reward": 0.738839328289032,
      "reward_std": 0.1284346580505371,
      "rewards/verify_math_reward/mean": 0.7388392686843872,
      "rewards/verify_math_reward/std": 0.439512699842453,
      "step": 1204
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1305803571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4024.0,
      "completions/mean_length": 1101.9832763671875,
      "completions/mean_terminated_length": 652.3042602539062,
      "completions/min_length": 165.0,
      "completions/min_terminated_length": 165.0,
      "epoch": 11.261224489795918,
      "grad_norm": 0.177810400724411,
      "learning_rate": 1e-06,
      "loss": -0.0713,
      "num_tokens": 706533539.0,
      "reward": 0.6082589626312256,
      "reward_std": 0.14481674134731293,
      "rewards/verify_math_reward/mean": 0.6082589030265808,
      "rewards/verify_math_reward/std": 0.4884119927883148,
      "step": 1205
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1026785714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2734.0,
      "completions/mean_length": 907.114990234375,
      "completions/mean_terminated_length": 542.2176513671875,
      "completions/min_length": 160.0,
      "completions/min_terminated_length": 160.0,
      "epoch": 11.270553935860057,
      "grad_norm": 0.17740251123905182,
      "learning_rate": 1e-06,
      "loss": -0.0433,
      "num_tokens": 707060418.0,
      "reward": 0.684151828289032,
      "reward_std": 0.1470380276441574,
      "rewards/verify_math_reward/mean": 0.6841517686843872,
      "rewards/verify_math_reward/std": 0.4651124179363251,
      "step": 1206
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1238839285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3246.0,
      "completions/mean_length": 1018.427490234375,
      "completions/mean_terminated_length": 583.2547607421875,
      "completions/min_length": 110.0,
      "completions/min_terminated_length": 110.0,
      "epoch": 11.279883381924199,
      "grad_norm": 0.161585733294487,
      "learning_rate": 1e-06,
      "loss": -0.0692,
      "num_tokens": 707604401.0,
      "reward": 0.6852678656578064,
      "reward_std": 0.16239726543426514,
      "rewards/verify_math_reward/mean": 0.6852678656578064,
      "rewards/verify_math_reward/std": 0.46466848254203796,
      "step": 1207
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1149553571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3945.0,
      "completions/mean_length": 972.3750610351562,
      "completions/mean_terminated_length": 566.6582641601562,
      "completions/min_length": 150.0,
      "completions/min_terminated_length": 150.0,
      "epoch": 11.289212827988338,
      "grad_norm": 0.14691539108753204,
      "learning_rate": 1e-06,
      "loss": -0.0569,
      "num_tokens": 708146729.0,
      "reward": 0.668526828289032,
      "reward_std": 0.12181740999221802,
      "rewards/verify_math_reward/mean": 0.6685267686843872,
      "rewards/verify_math_reward/std": 0.4710056781768799,
      "step": 1208
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1037946428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2608.0,
      "completions/mean_length": 923.67529296875,
      "completions/mean_terminated_length": 556.2702026367188,
      "completions/min_length": 100.0,
      "completions/min_terminated_length": 100.0,
      "epoch": 11.298542274052478,
      "grad_norm": 0.15742585062980652,
      "learning_rate": 1e-06,
      "loss": -0.0696,
      "num_tokens": 708681102.0,
      "reward": 0.691964328289032,
      "reward_std": 0.12043306231498718,
      "rewards/verify_math_reward/mean": 0.6919642686843872,
      "rewards/verify_math_reward/std": 0.4619392454624176,
      "step": 1209
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1004464285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3961.0,
      "completions/mean_length": 906.2366333007812,
      "completions/mean_terminated_length": 550.0595703125,
      "completions/min_length": 147.0,
      "completions/min_terminated_length": 147.0,
      "epoch": 11.307871720116617,
      "grad_norm": 0.16292087733745575,
      "learning_rate": 1e-06,
      "loss": -0.049,
      "num_tokens": 709203706.0,
      "reward": 0.7421875596046448,
      "reward_std": 0.10990910232067108,
      "rewards/verify_math_reward/mean": 0.7421875,
      "rewards/verify_math_reward/std": 0.43767455220222473,
      "step": 1210
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1841517857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2962.0,
      "completions/mean_length": 1280.727783203125,
      "completions/mean_terminated_length": 645.26953125,
      "completions/min_length": 137.0,
      "completions/min_terminated_length": 137.0,
      "epoch": 11.317201166180759,
      "grad_norm": 0.1581532210111618,
      "learning_rate": 1e-06,
      "loss": -0.0836,
      "num_tokens": 709775614.0,
      "reward": 0.629464328289032,
      "reward_std": 0.13444501161575317,
      "rewards/verify_math_reward/mean": 0.6294642686843872,
      "rewards/verify_math_reward/std": 0.4832179844379425,
      "step": 1211
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1283482142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3938.0,
      "completions/mean_length": 1014.2578735351562,
      "completions/mean_terminated_length": 560.4801635742188,
      "completions/min_length": 160.0,
      "completions/min_terminated_length": 160.0,
      "epoch": 11.326530612244898,
      "grad_norm": 0.15922218561172485,
      "learning_rate": 1e-06,
      "loss": -0.0763,
      "num_tokens": 710299853.0,
      "reward": 0.6830357313156128,
      "reward_std": 0.13616888225078583,
      "rewards/verify_math_reward/mean": 0.6830357313156128,
      "rewards/verify_math_reward/std": 0.46555325388908386,
      "step": 1212
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1071428571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2643.0,
      "completions/mean_length": 932.7578735351562,
      "completions/mean_terminated_length": 553.1687622070312,
      "completions/min_length": 157.0,
      "completions/min_terminated_length": 157.0,
      "epoch": 11.335860058309038,
      "grad_norm": 0.1400115191936493,
      "learning_rate": 1e-06,
      "loss": -0.0186,
      "num_tokens": 710819716.0,
      "reward": 0.7243303656578064,
      "reward_std": 0.10092677175998688,
      "rewards/verify_math_reward/mean": 0.7243303656578064,
      "rewards/verify_math_reward/std": 0.4471006691455841,
      "step": 1213
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1450892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3967.0,
      "completions/mean_length": 1073.798095703125,
      "completions/mean_terminated_length": 560.8916625976562,
      "completions/min_length": 115.0,
      "completions/min_terminated_length": 115.0,
      "epoch": 11.345189504373177,
      "grad_norm": 0.17650899291038513,
      "learning_rate": 1e-06,
      "loss": -0.0366,
      "num_tokens": 711337575.0,
      "reward": 0.6462053656578064,
      "reward_std": 0.12009353935718536,
      "rewards/verify_math_reward/mean": 0.6462053656578064,
      "rewards/verify_math_reward/std": 0.478413462638855,
      "step": 1214
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1216517857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2247.0,
      "completions/mean_length": 1052.0670166015625,
      "completions/mean_terminated_length": 630.4802856445312,
      "completions/min_length": 182.0,
      "completions/min_terminated_length": 182.0,
      "epoch": 11.354518950437317,
      "grad_norm": 0.15283583104610443,
      "learning_rate": 1e-06,
      "loss": -0.0726,
      "num_tokens": 711928219.0,
      "reward": 0.7053571939468384,
      "reward_std": 0.1345216929912567,
      "rewards/verify_math_reward/mean": 0.7053571343421936,
      "rewards/verify_math_reward/std": 0.45613667368888855,
      "step": 1215
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1395089285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2247.0,
      "completions/mean_length": 1090.2254638671875,
      "completions/mean_terminated_length": 602.9078979492188,
      "completions/min_length": 128.0,
      "completions/min_terminated_length": 128.0,
      "epoch": 11.363848396501458,
      "grad_norm": 0.15655484795570374,
      "learning_rate": 1e-06,
      "loss": -0.0574,
      "num_tokens": 712474885.0,
      "reward": 0.6573660969734192,
      "reward_std": 0.11960610747337341,
      "rewards/verify_math_reward/mean": 0.6573660969734192,
      "rewards/verify_math_reward/std": 0.47485536336898804,
      "step": 1216
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4002.0,
      "completions/mean_length": 1033.4888916015625,
      "completions/mean_terminated_length": 595.9872436523438,
      "completions/min_length": 159.0,
      "completions/min_terminated_length": 159.0,
      "epoch": 11.373177842565598,
      "grad_norm": 0.1904921531677246,
      "learning_rate": 1e-06,
      "loss": -0.0602,
      "num_tokens": 713042387.0,
      "reward": 0.6830357313156128,
      "reward_std": 0.17964698374271393,
      "rewards/verify_math_reward/mean": 0.6830357313156128,
      "rewards/verify_math_reward/std": 0.46555325388908386,
      "step": 1217
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.140625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2593.0,
      "completions/mean_length": 1093.075927734375,
      "completions/mean_terminated_length": 601.6882934570312,
      "completions/min_length": 160.0,
      "completions/min_terminated_length": 160.0,
      "epoch": 11.382507288629737,
      "grad_norm": 0.14345112442970276,
      "learning_rate": 1e-06,
      "loss": -0.0723,
      "num_tokens": 713600703.0,
      "reward": 0.7131696939468384,
      "reward_std": 0.11283759027719498,
      "rewards/verify_math_reward/mean": 0.7131696343421936,
      "rewards/verify_math_reward/std": 0.4525342881679535,
      "step": 1218
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.109375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4055.0,
      "completions/mean_length": 959.4654541015625,
      "completions/mean_terminated_length": 574.2769165039062,
      "completions/min_length": 153.0,
      "completions/min_terminated_length": 153.0,
      "epoch": 11.391836734693877,
      "grad_norm": 0.164277583360672,
      "learning_rate": 1e-06,
      "loss": -0.0468,
      "num_tokens": 714149000.0,
      "reward": 0.6863839626312256,
      "reward_std": 0.14120283722877502,
      "rewards/verify_math_reward/mean": 0.6863839030265808,
      "rewards/verify_math_reward/std": 0.46422141790390015,
      "step": 1219
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1060267857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3164.0,
      "completions/mean_length": 966.4576416015625,
      "completions/mean_terminated_length": 595.2883911132812,
      "completions/min_length": 153.0,
      "completions/min_terminated_length": 153.0,
      "epoch": 11.401166180758018,
      "grad_norm": 0.19730933010578156,
      "learning_rate": 1e-06,
      "loss": -0.0436,
      "num_tokens": 714713730.0,
      "reward": 0.6975446939468384,
      "reward_std": 0.14147524535655975,
      "rewards/verify_math_reward/mean": 0.6975446343421936,
      "rewards/verify_math_reward/std": 0.45957788825035095,
      "step": 1220
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1506696428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3959.0,
      "completions/mean_length": 1147.0670166015625,
      "completions/mean_terminated_length": 623.931640625,
      "completions/min_length": 148.0,
      "completions/min_terminated_length": 148.0,
      "epoch": 11.410495626822158,
      "grad_norm": 0.1630871444940567,
      "learning_rate": 1e-06,
      "loss": -0.0694,
      "num_tokens": 715270942.0,
      "reward": 0.6350446939468384,
      "reward_std": 0.14582450687885284,
      "rewards/verify_math_reward/mean": 0.6350446343421936,
      "rewards/verify_math_reward/std": 0.481686532497406,
      "step": 1221
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1305803571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3800.0,
      "completions/mean_length": 1077.9442138671875,
      "completions/mean_terminated_length": 624.6547241210938,
      "completions/min_length": 156.0,
      "completions/min_terminated_length": 156.0,
      "epoch": 11.419825072886297,
      "grad_norm": 0.16397812962532043,
      "learning_rate": 1e-06,
      "loss": -0.0779,
      "num_tokens": 715844940.0,
      "reward": 0.7142857313156128,
      "reward_std": 0.14751701056957245,
      "rewards/verify_math_reward/mean": 0.7142857313156128,
      "rewards/verify_math_reward/std": 0.4520062506198883,
      "step": 1222
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1183035714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4056.0,
      "completions/mean_length": 1096.1060791015625,
      "completions/mean_terminated_length": 693.588623046875,
      "completions/min_length": 147.0,
      "completions/min_terminated_length": 147.0,
      "epoch": 11.429154518950437,
      "grad_norm": 0.1463952511548996,
      "learning_rate": 1e-06,
      "loss": -0.0609,
      "num_tokens": 716472979.0,
      "reward": 0.6707589626312256,
      "reward_std": 0.1424841582775116,
      "rewards/verify_math_reward/mean": 0.6707589030265808,
      "rewards/verify_math_reward/std": 0.4702001214027405,
      "step": 1223
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1116071428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3269.0,
      "completions/mean_length": 945.107177734375,
      "completions/mean_terminated_length": 549.2662963867188,
      "completions/min_length": 99.0,
      "completions/min_terminated_length": 99.0,
      "epoch": 11.438483965014576,
      "grad_norm": 0.1774315983057022,
      "learning_rate": 1e-06,
      "loss": -0.0833,
      "num_tokens": 716994339.0,
      "reward": 0.723214328289032,
      "reward_std": 0.13444501161575317,
      "rewards/verify_math_reward/mean": 0.7232142686843872,
      "rewards/verify_math_reward/std": 0.44765952229499817,
      "step": 1224
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1741071428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2899.0,
      "completions/mean_length": 1220.46435546875,
      "completions/mean_terminated_length": 614.270263671875,
      "completions/min_length": 157.0,
      "completions/min_terminated_length": 157.0,
      "epoch": 11.447813411078718,
      "grad_norm": 0.14416812360286713,
      "learning_rate": 1e-06,
      "loss": -0.052,
      "num_tokens": 717540859.0,
      "reward": 0.6361607313156128,
      "reward_std": 0.12058139592409134,
      "rewards/verify_math_reward/mean": 0.6361607313156128,
      "rewards/verify_math_reward/std": 0.4813718795776367,
      "step": 1225
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1350446428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3193.0,
      "completions/mean_length": 1058.6004638671875,
      "completions/mean_terminated_length": 584.3742065429688,
      "completions/min_length": 138.0,
      "completions/min_terminated_length": 138.0,
      "epoch": 11.457142857142857,
      "grad_norm": 0.1534576416015625,
      "learning_rate": 1e-06,
      "loss": -0.0851,
      "num_tokens": 718070165.0,
      "reward": 0.6863839626312256,
      "reward_std": 0.15503577888011932,
      "rewards/verify_math_reward/mean": 0.6863839030265808,
      "rewards/verify_math_reward/std": 0.46422144770622253,
      "step": 1226
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1227678571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3743.0,
      "completions/mean_length": 996.9063110351562,
      "completions/mean_terminated_length": 563.1908569335938,
      "completions/min_length": 167.0,
      "completions/min_terminated_length": 167.0,
      "epoch": 11.466472303206997,
      "grad_norm": 0.15613774955272675,
      "learning_rate": 1e-06,
      "loss": -0.0739,
      "num_tokens": 718594841.0,
      "reward": 0.6941964626312256,
      "reward_std": 0.10713215172290802,
      "rewards/verify_math_reward/mean": 0.6941964030265808,
      "rewards/verify_math_reward/std": 0.4610042870044708,
      "step": 1227
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1506696428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3383.0,
      "completions/mean_length": 1134.29248046875,
      "completions/mean_terminated_length": 608.8909301757812,
      "completions/min_length": 182.0,
      "completions/min_terminated_length": 182.0,
      "epoch": 11.475801749271136,
      "grad_norm": 0.18411079049110413,
      "learning_rate": 1e-06,
      "loss": -0.0962,
      "num_tokens": 719142807.0,
      "reward": 0.6729910969734192,
      "reward_std": 0.1811119168996811,
      "rewards/verify_math_reward/mean": 0.6729910969734192,
      "rewards/verify_math_reward/std": 0.46938255429267883,
      "step": 1228
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1618303571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3980.0,
      "completions/mean_length": 1106.0045166015625,
      "completions/mean_terminated_length": 528.7083740234375,
      "completions/min_length": 177.0,
      "completions/min_terminated_length": 177.0,
      "epoch": 11.485131195335278,
      "grad_norm": 0.17371106147766113,
      "learning_rate": 1e-06,
      "loss": -0.0519,
      "num_tokens": 719611939.0,
      "reward": 0.6774553656578064,
      "reward_std": 0.1385032683610916,
      "rewards/verify_math_reward/mean": 0.6774553656578064,
      "rewards/verify_math_reward/std": 0.4677111804485321,
      "step": 1229
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1205357142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3494.0,
      "completions/mean_length": 1053.766845703125,
      "completions/mean_terminated_length": 636.8109130859375,
      "completions/min_length": 174.0,
      "completions/min_terminated_length": 174.0,
      "epoch": 11.494460641399417,
      "grad_norm": 0.16804476082324982,
      "learning_rate": 1e-06,
      "loss": -0.0669,
      "num_tokens": 720205602.0,
      "reward": 0.6897321939468384,
      "reward_std": 0.18280190229415894,
      "rewards/verify_math_reward/mean": 0.6897321343421936,
      "rewards/verify_math_reward/std": 0.4628615975379944,
      "step": 1230
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1595982142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3170.0,
      "completions/mean_length": 1140.96435546875,
      "completions/mean_terminated_length": 579.7822265625,
      "completions/min_length": 125.0,
      "completions/min_terminated_length": 125.0,
      "epoch": 11.503790087463557,
      "grad_norm": 0.18616646528244019,
      "learning_rate": 1e-06,
      "loss": -0.0912,
      "num_tokens": 720735442.0,
      "reward": 0.6651785969734192,
      "reward_std": 0.1310625821352005,
      "rewards/verify_math_reward/mean": 0.6651785969734192,
      "rewards/verify_math_reward/std": 0.47219160199165344,
      "step": 1231
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0982142857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2625.0,
      "completions/mean_length": 938.8114013671875,
      "completions/mean_terminated_length": 594.9591674804688,
      "completions/min_length": 118.0,
      "completions/min_terminated_length": 118.0,
      "epoch": 11.513119533527696,
      "grad_norm": 0.17584441602230072,
      "learning_rate": 1e-06,
      "loss": -0.0665,
      "num_tokens": 721307465.0,
      "reward": 0.645089328289032,
      "reward_std": 0.146052747964859,
      "rewards/verify_math_reward/mean": 0.6450892686843872,
      "rewards/verify_math_reward/std": 0.4787535071372986,
      "step": 1232
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.140625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2501.0,
      "completions/mean_length": 1074.7020263671875,
      "completions/mean_terminated_length": 580.3078002929688,
      "completions/min_length": 146.0,
      "completions/min_terminated_length": 146.0,
      "epoch": 11.522448979591836,
      "grad_norm": 0.15414857864379883,
      "learning_rate": 1e-06,
      "loss": -0.084,
      "num_tokens": 721836742.0,
      "reward": 0.6908482313156128,
      "reward_std": 0.1335773915052414,
      "rewards/verify_math_reward/mean": 0.6908482313156128,
      "rewards/verify_math_reward/std": 0.46240198612213135,
      "step": 1233
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1149553571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3040.0,
      "completions/mean_length": 1039.44873046875,
      "completions/mean_terminated_length": 642.4439086914062,
      "completions/min_length": 124.0,
      "completions/min_terminated_length": 124.0,
      "epoch": 11.531778425655977,
      "grad_norm": 0.16479934751987457,
      "learning_rate": 1e-06,
      "loss": -0.097,
      "num_tokens": 722437680.0,
      "reward": 0.691964328289032,
      "reward_std": 0.17735788226127625,
      "rewards/verify_math_reward/mean": 0.6919642686843872,
      "rewards/verify_math_reward/std": 0.4619392454624176,
      "step": 1234
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.15625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2586.0,
      "completions/mean_length": 1177.0145263671875,
      "completions/mean_terminated_length": 636.4616088867188,
      "completions/min_length": 116.0,
      "completions/min_terminated_length": 116.0,
      "epoch": 11.541107871720117,
      "grad_norm": 0.15359684824943542,
      "learning_rate": 1e-06,
      "loss": -0.0975,
      "num_tokens": 723006973.0,
      "reward": 0.645089328289032,
      "reward_std": 0.13865482807159424,
      "rewards/verify_math_reward/mean": 0.6450892686843872,
      "rewards/verify_math_reward/std": 0.4787535071372986,
      "step": 1235
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1383928571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3926.0,
      "completions/mean_length": 1101.485595703125,
      "completions/mean_terminated_length": 620.5012817382812,
      "completions/min_length": 188.0,
      "completions/min_terminated_length": 188.0,
      "epoch": 11.550437317784256,
      "grad_norm": 0.16850945353507996,
      "learning_rate": 1e-06,
      "loss": -0.0861,
      "num_tokens": 723570232.0,
      "reward": 0.6819196939468384,
      "reward_std": 0.16469958424568176,
      "rewards/verify_math_reward/mean": 0.6819196343421936,
      "rewards/verify_math_reward/std": 0.46599099040031433,
      "step": 1236
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1584821428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3914.0,
      "completions/mean_length": 1160.8326416015625,
      "completions/mean_terminated_length": 608.0556640625,
      "completions/min_length": 130.0,
      "completions/min_terminated_length": 130.0,
      "epoch": 11.559766763848396,
      "grad_norm": 0.1628613919019699,
      "learning_rate": 1e-06,
      "loss": -0.0777,
      "num_tokens": 724133754.0,
      "reward": 0.6662946939468384,
      "reward_std": 0.1304224729537964,
      "rewards/verify_math_reward/mean": 0.6662946343421936,
      "rewards/verify_math_reward/std": 0.47179922461509705,
      "step": 1237
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1506696428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4018.0,
      "completions/mean_length": 1143.20654296875,
      "completions/mean_terminated_length": 619.3862915039062,
      "completions/min_length": 181.0,
      "completions/min_terminated_length": 181.0,
      "epoch": 11.569096209912537,
      "grad_norm": 0.15765686333179474,
      "learning_rate": 1e-06,
      "loss": -0.0853,
      "num_tokens": 724693307.0,
      "reward": 0.6774553656578064,
      "reward_std": 0.14766854047775269,
      "rewards/verify_math_reward/mean": 0.6774553656578064,
      "rewards/verify_math_reward/std": 0.4677111804485321,
      "step": 1238
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1026785714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2472.0,
      "completions/mean_length": 948.685302734375,
      "completions/mean_terminated_length": 588.5447387695312,
      "completions/min_length": 145.0,
      "completions/min_terminated_length": 145.0,
      "epoch": 11.578425655976677,
      "grad_norm": 0.15406832098960876,
      "learning_rate": 1e-06,
      "loss": -0.0617,
      "num_tokens": 725253945.0,
      "reward": 0.7332589626312256,
      "reward_std": 0.13970790803432465,
      "rewards/verify_math_reward/mean": 0.7332589030265808,
      "rewards/verify_math_reward/std": 0.4425028860569,
      "step": 1239
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1484375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3812.0,
      "completions/mean_length": 1106.3616943359375,
      "completions/mean_terminated_length": 585.2319946289062,
      "completions/min_length": 152.0,
      "completions/min_terminated_length": 152.0,
      "epoch": 11.587755102040816,
      "grad_norm": 0.14483070373535156,
      "learning_rate": 1e-06,
      "loss": -0.0653,
      "num_tokens": 725794829.0,
      "reward": 0.6741071939468384,
      "reward_std": 0.09848611801862717,
      "rewards/verify_math_reward/mean": 0.6741071343421936,
      "rewards/verify_math_reward/std": 0.4689692556858063,
      "step": 1240
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1595982142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4068.0,
      "completions/mean_length": 1157.888427734375,
      "completions/mean_terminated_length": 599.9203491210938,
      "completions/min_length": 128.0,
      "completions/min_terminated_length": 128.0,
      "epoch": 11.597084548104956,
      "grad_norm": 0.18773047626018524,
      "learning_rate": 1e-06,
      "loss": -0.065,
      "num_tokens": 726351737.0,
      "reward": 0.6328125,
      "reward_std": 0.15300628542900085,
      "rewards/verify_math_reward/mean": 0.6328125,
      "rewards/verify_math_reward/std": 0.48230743408203125,
      "step": 1241
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1417410714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4006.0,
      "completions/mean_length": 1102.5546875,
      "completions/mean_terminated_length": 608.1885375976562,
      "completions/min_length": 170.0,
      "completions/min_terminated_length": 170.0,
      "epoch": 11.606413994169095,
      "grad_norm": 0.16885830461978912,
      "learning_rate": 1e-06,
      "loss": -0.0714,
      "num_tokens": 726921914.0,
      "reward": 0.6160714626312256,
      "reward_std": 0.1453377902507782,
      "rewards/verify_math_reward/mean": 0.6160714030265808,
      "rewards/verify_math_reward/std": 0.486612468957901,
      "step": 1242
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1183035714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3845.0,
      "completions/mean_length": 1059.421875,
      "completions/mean_terminated_length": 651.9822998046875,
      "completions/min_length": 169.0,
      "completions/min_terminated_length": 169.0,
      "epoch": 11.615743440233237,
      "grad_norm": 0.14501583576202393,
      "learning_rate": 1e-06,
      "loss": -0.0432,
      "num_tokens": 727525540.0,
      "reward": 0.6785714626312256,
      "reward_std": 0.11561454832553864,
      "rewards/verify_math_reward/mean": 0.6785714030265808,
      "rewards/verify_math_reward/std": 0.46728572249412537,
      "step": 1243
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1261160714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3266.0,
      "completions/mean_length": 1040.430908203125,
      "completions/mean_terminated_length": 599.4610595703125,
      "completions/min_length": 122.0,
      "completions/min_terminated_length": 122.0,
      "epoch": 11.625072886297376,
      "grad_norm": 0.1352197825908661,
      "learning_rate": 1e-06,
      "loss": -0.0927,
      "num_tokens": 728083126.0,
      "reward": 0.7611607313156128,
      "reward_std": 0.1000591367483139,
      "rewards/verify_math_reward/mean": 0.7611607313156128,
      "rewards/verify_math_reward/std": 0.4266124963760376,
      "step": 1244
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1417410714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3022.0,
      "completions/mean_length": 1103.1328125,
      "completions/mean_terminated_length": 608.8621826171875,
      "completions/min_length": 112.0,
      "completions/min_terminated_length": 112.0,
      "epoch": 11.634402332361516,
      "grad_norm": 0.16308481991291046,
      "learning_rate": 1e-06,
      "loss": -0.0698,
      "num_tokens": 728639013.0,
      "reward": 0.660714328289032,
      "reward_std": 0.1360626220703125,
      "rewards/verify_math_reward/mean": 0.6607142686843872,
      "rewards/verify_math_reward/std": 0.4737313687801361,
      "step": 1245
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1428571428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3524.0,
      "completions/mean_length": 1109.404052734375,
      "completions/mean_terminated_length": 611.6380615234375,
      "completions/min_length": 142.0,
      "completions/min_terminated_length": 142.0,
      "epoch": 11.643731778425655,
      "grad_norm": 0.1664920151233673,
      "learning_rate": 1e-06,
      "loss": -0.0631,
      "num_tokens": 729197095.0,
      "reward": 0.6674107313156128,
      "reward_std": 0.14165747165679932,
      "rewards/verify_math_reward/mean": 0.6674107313156128,
      "rewards/verify_math_reward/std": 0.47140392661094666,
      "step": 1246
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1372767857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3043.0,
      "completions/mean_length": 1073.8046875,
      "completions/mean_terminated_length": 592.9120483398438,
      "completions/min_length": 152.0,
      "completions/min_terminated_length": 152.0,
      "epoch": 11.653061224489797,
      "grad_norm": 0.1885533183813095,
      "learning_rate": 1e-06,
      "loss": -0.0947,
      "num_tokens": 729744600.0,
      "reward": 0.6830357313156128,
      "reward_std": 0.1555236279964447,
      "rewards/verify_math_reward/mean": 0.6830357313156128,
      "rewards/verify_math_reward/std": 0.46555325388908386,
      "step": 1247
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1506696428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2721.0,
      "completions/mean_length": 1117.083740234375,
      "completions/mean_terminated_length": 588.62939453125,
      "completions/min_length": 146.0,
      "completions/min_terminated_length": 146.0,
      "epoch": 11.662390670553936,
      "grad_norm": 0.18340948224067688,
      "learning_rate": 1e-06,
      "loss": -0.0962,
      "num_tokens": 730272659.0,
      "reward": 0.6830357313156128,
      "reward_std": 0.15856975317001343,
      "rewards/verify_math_reward/mean": 0.6830357313156128,
      "rewards/verify_math_reward/std": 0.46555325388908386,
      "step": 1248
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1495535714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2767.0,
      "completions/mean_length": 1137.3817138671875,
      "completions/mean_terminated_length": 617.0997314453125,
      "completions/min_length": 157.0,
      "completions/min_terminated_length": 157.0,
      "epoch": 11.671720116618076,
      "grad_norm": 0.16311952471733093,
      "learning_rate": 1e-06,
      "loss": -0.0612,
      "num_tokens": 730827473.0,
      "reward": 0.6774553656578064,
      "reward_std": 0.14995764195919037,
      "rewards/verify_math_reward/mean": 0.6774553656578064,
      "rewards/verify_math_reward/std": 0.4677111804485321,
      "step": 1249
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1473214285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3126.0,
      "completions/mean_length": 1131.6351318359375,
      "completions/mean_terminated_length": 619.46728515625,
      "completions/min_length": 163.0,
      "completions/min_terminated_length": 163.0,
      "epoch": 11.681049562682215,
      "grad_norm": 0.17689785361289978,
      "learning_rate": 1e-06,
      "loss": -0.0791,
      "num_tokens": 731392418.0,
      "reward": 0.6439732313156128,
      "reward_std": 0.15233227610588074,
      "rewards/verify_math_reward/mean": 0.6439732313156128,
      "rewards/verify_math_reward/std": 0.47909072041511536,
      "step": 1250
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.15625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3951.0,
      "completions/mean_length": 1203.5703125,
      "completions/mean_terminated_length": 667.9351806640625,
      "completions/min_length": 162.0,
      "completions/min_terminated_length": 162.0,
      "epoch": 11.690379008746355,
      "grad_norm": 0.18204478919506073,
      "learning_rate": 1e-06,
      "loss": -0.0769,
      "num_tokens": 731989369.0,
      "reward": 0.6026785969734192,
      "reward_std": 0.1702541708946228,
      "rewards/verify_math_reward/mean": 0.6026785969734192,
      "rewards/verify_math_reward/std": 0.48961687088012695,
      "step": 1251
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1294642857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3308.0,
      "completions/mean_length": 1070.3382568359375,
      "completions/mean_terminated_length": 620.3679809570312,
      "completions/min_length": 159.0,
      "completions/min_terminated_length": 159.0,
      "epoch": 11.699708454810496,
      "grad_norm": 0.14226645231246948,
      "learning_rate": 1e-06,
      "loss": -0.0557,
      "num_tokens": 732569696.0,
      "reward": 0.6819196939468384,
      "reward_std": 0.11768680810928345,
      "rewards/verify_math_reward/mean": 0.6819196343421936,
      "rewards/verify_math_reward/std": 0.46599099040031433,
      "step": 1252
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1640625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3946.0,
      "completions/mean_length": 1205.2879638671875,
      "completions/mean_terminated_length": 637.9519653320312,
      "completions/min_length": 154.0,
      "completions/min_terminated_length": 154.0,
      "epoch": 11.709037900874636,
      "grad_norm": 0.12835073471069336,
      "learning_rate": 1e-06,
      "loss": -0.1102,
      "num_tokens": 733134450.0,
      "reward": 0.6964285969734192,
      "reward_std": 0.11629742383956909,
      "rewards/verify_math_reward/mean": 0.6964285969734192,
      "rewards/verify_math_reward/std": 0.4600566029548645,
      "step": 1253
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1517857142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3644.0,
      "completions/mean_length": 1139.524658203125,
      "completions/mean_terminated_length": 610.4710693359375,
      "completions/min_length": 159.0,
      "completions/min_terminated_length": 159.0,
      "epoch": 11.718367346938775,
      "grad_norm": 0.1552983522415161,
      "learning_rate": 1e-06,
      "loss": -0.0867,
      "num_tokens": 733688000.0,
      "reward": 0.6651785969734192,
      "reward_std": 0.13516110181808472,
      "rewards/verify_math_reward/mean": 0.6651785969734192,
      "rewards/verify_math_reward/std": 0.47219160199165344,
      "step": 1254
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.15625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3595.0,
      "completions/mean_length": 1179.3951416015625,
      "completions/mean_terminated_length": 639.2830200195312,
      "completions/min_length": 115.0,
      "completions/min_terminated_length": 115.0,
      "epoch": 11.727696793002915,
      "grad_norm": 0.18033091723918915,
      "learning_rate": 1e-06,
      "loss": -0.0972,
      "num_tokens": 734266410.0,
      "reward": 0.676339328289032,
      "reward_std": 0.16638068854808807,
      "rewards/verify_math_reward/mean": 0.6763392686843872,
      "rewards/verify_math_reward/std": 0.4681335985660553,
      "step": 1255
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1160714285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3076.0,
      "completions/mean_length": 1009.32373046875,
      "completions/mean_terminated_length": 604.0025024414062,
      "completions/min_length": 144.0,
      "completions/min_terminated_length": 144.0,
      "epoch": 11.737026239067056,
      "grad_norm": 0.17618383467197418,
      "learning_rate": 1e-06,
      "loss": -0.0647,
      "num_tokens": 734836812.0,
      "reward": 0.7087053656578064,
      "reward_std": 0.13726656138896942,
      "rewards/verify_math_reward/mean": 0.7087053656578064,
      "rewards/verify_math_reward/std": 0.45461276173591614,
      "step": 1256
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1383928571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3984.0,
      "completions/mean_length": 1138.4609375,
      "completions/mean_terminated_length": 663.415771484375,
      "completions/min_length": 163.0,
      "completions/min_terminated_length": 163.0,
      "epoch": 11.746355685131196,
      "grad_norm": 0.14334562420845032,
      "learning_rate": 1e-06,
      "loss": -0.0623,
      "num_tokens": 735439817.0,
      "reward": 0.6439732313156128,
      "reward_std": 0.12181992828845978,
      "rewards/verify_math_reward/mean": 0.6439732313156128,
      "rewards/verify_math_reward/std": 0.47909072041511536,
      "step": 1257
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1696428571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3802.0,
      "completions/mean_length": 1200.634033203125,
      "completions/mean_terminated_length": 609.1075439453125,
      "completions/min_length": 159.0,
      "completions/min_terminated_length": 159.0,
      "epoch": 11.755685131195335,
      "grad_norm": 0.19604165852069855,
      "learning_rate": 1e-06,
      "loss": -0.083,
      "num_tokens": 735987929.0,
      "reward": 0.609375,
      "reward_std": 0.16491642594337463,
      "rewards/verify_math_reward/mean": 0.609375,
      "rewards/verify_math_reward/std": 0.48816296458244324,
      "step": 1258
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1573660714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3705.0,
      "completions/mean_length": 1178.26123046875,
      "completions/mean_terminated_length": 633.3589477539062,
      "completions/min_length": 125.0,
      "completions/min_terminated_length": 125.0,
      "epoch": 11.765014577259475,
      "grad_norm": 0.1749848872423172,
      "learning_rate": 1e-06,
      "loss": -0.0935,
      "num_tokens": 736557547.0,
      "reward": 0.6361607313156128,
      "reward_std": 0.1557818502187729,
      "rewards/verify_math_reward/mean": 0.6361607313156128,
      "rewards/verify_math_reward/std": 0.4813718795776367,
      "step": 1259
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1160714285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3408.0,
      "completions/mean_length": 980.2098388671875,
      "completions/mean_terminated_length": 571.065673828125,
      "completions/min_length": 176.0,
      "completions/min_terminated_length": 176.0,
      "epoch": 11.774344023323614,
      "grad_norm": 0.14527100324630737,
      "learning_rate": 1e-06,
      "loss": -0.0472,
      "num_tokens": 737094071.0,
      "reward": 0.7031250596046448,
      "reward_std": 0.10344410687685013,
      "rewards/verify_math_reward/mean": 0.703125,
      "rewards/verify_math_reward/std": 0.4571361541748047,
      "step": 1260
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1227678571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3577.0,
      "completions/mean_length": 965.427490234375,
      "completions/mean_terminated_length": 527.306640625,
      "completions/min_length": 167.0,
      "completions/min_terminated_length": 167.0,
      "epoch": 11.783673469387756,
      "grad_norm": 0.18137522041797638,
      "learning_rate": 1e-06,
      "loss": -0.1077,
      "num_tokens": 737591590.0,
      "reward": 0.7031250596046448,
      "reward_std": 0.16675932705402374,
      "rewards/verify_math_reward/mean": 0.703125,
      "rewards/verify_math_reward/std": 0.4571361541748047,
      "step": 1261
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1439732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4006.0,
      "completions/mean_length": 1069.5592041015625,
      "completions/mean_terminated_length": 560.5488891601562,
      "completions/min_length": 123.0,
      "completions/min_terminated_length": 123.0,
      "epoch": 11.793002915451895,
      "grad_norm": 0.17344211041927338,
      "learning_rate": 1e-06,
      "loss": -0.0853,
      "num_tokens": 738107835.0,
      "reward": 0.7165178656578064,
      "reward_std": 0.13185282051563263,
      "rewards/verify_math_reward/mean": 0.7165178656578064,
      "rewards/verify_math_reward/std": 0.4509401023387909,
      "step": 1262
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1629464285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3313.0,
      "completions/mean_length": 1221.5926513671875,
      "completions/mean_terminated_length": 662.0413208007812,
      "completions/min_length": 166.0,
      "completions/min_terminated_length": 166.0,
      "epoch": 11.802332361516035,
      "grad_norm": 0.13384099304676056,
      "learning_rate": 1e-06,
      "loss": -0.102,
      "num_tokens": 738690086.0,
      "reward": 0.668526828289032,
      "reward_std": 0.12971526384353638,
      "rewards/verify_math_reward/mean": 0.6685267686843872,
      "rewards/verify_math_reward/std": 0.4710056483745575,
      "step": 1263
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1395089285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3739.0,
      "completions/mean_length": 1090.3013916015625,
      "completions/mean_terminated_length": 602.99609375,
      "completions/min_length": 143.0,
      "completions/min_terminated_length": 143.0,
      "epoch": 11.811661807580174,
      "grad_norm": 0.16794444620609283,
      "learning_rate": 1e-06,
      "loss": -0.1056,
      "num_tokens": 739242076.0,
      "reward": 0.7299107313156128,
      "reward_std": 0.14489160478115082,
      "rewards/verify_math_reward/mean": 0.7299107313156128,
      "rewards/verify_math_reward/std": 0.44425368309020996,
      "step": 1264
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1785714285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3056.0,
      "completions/mean_length": 1239.03125,
      "completions/mean_terminated_length": 617.9511108398438,
      "completions/min_length": 148.0,
      "completions/min_terminated_length": 148.0,
      "epoch": 11.820991253644316,
      "grad_norm": 0.16162048280239105,
      "learning_rate": 1e-06,
      "loss": -0.1131,
      "num_tokens": 739785552.0,
      "reward": 0.6462053656578064,
      "reward_std": 0.14728990197181702,
      "rewards/verify_math_reward/mean": 0.6462053656578064,
      "rewards/verify_math_reward/std": 0.478413462638855,
      "step": 1265
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1350446428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3972.0,
      "completions/mean_length": 1130.3013916015625,
      "completions/mean_terminated_length": 667.2697143554688,
      "completions/min_length": 193.0,
      "completions/min_terminated_length": 193.0,
      "epoch": 11.830320699708455,
      "grad_norm": 0.1715167611837387,
      "learning_rate": 1e-06,
      "loss": -0.0913,
      "num_tokens": 740385726.0,
      "reward": 0.6238839626312256,
      "reward_std": 0.15623538196086884,
      "rewards/verify_math_reward/mean": 0.6238839030265808,
      "rewards/verify_math_reward/std": 0.48468026518821716,
      "step": 1266
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1361607142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3002.0,
      "completions/mean_length": 1083.3270263671875,
      "completions/mean_terminated_length": 608.4612426757812,
      "completions/min_length": 147.0,
      "completions/min_terminated_length": 147.0,
      "epoch": 11.839650145772595,
      "grad_norm": 0.14971394836902618,
      "learning_rate": 1e-06,
      "loss": -0.0644,
      "num_tokens": 740949067.0,
      "reward": 0.6808035969734192,
      "reward_std": 0.10382387042045593,
      "rewards/verify_math_reward/mean": 0.6808035969734192,
      "rewards/verify_math_reward/std": 0.46642565727233887,
      "step": 1267
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1171875,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2904.0,
      "completions/mean_length": 1000.9598388671875,
      "completions/mean_terminated_length": 590.1138305664062,
      "completions/min_length": 156.0,
      "completions/min_terminated_length": 156.0,
      "epoch": 11.848979591836734,
      "grad_norm": 0.1726934313774109,
      "learning_rate": 1e-06,
      "loss": -0.0681,
      "num_tokens": 741518895.0,
      "reward": 0.6618303656578064,
      "reward_std": 0.14713652431964874,
      "rewards/verify_math_reward/mean": 0.6618303656578064,
      "rewards/verify_math_reward/std": 0.4733508229255676,
      "step": 1268
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1149553571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2939.0,
      "completions/mean_length": 980.0535888671875,
      "completions/mean_terminated_length": 575.3341674804688,
      "completions/min_length": 141.0,
      "completions/min_terminated_length": 141.0,
      "epoch": 11.858309037900874,
      "grad_norm": 0.1491033136844635,
      "learning_rate": 1e-06,
      "loss": -0.0598,
      "num_tokens": 742063103.0,
      "reward": 0.6796875596046448,
      "reward_std": 0.14432819187641144,
      "rewards/verify_math_reward/mean": 0.6796875,
      "rewards/verify_math_reward/std": 0.4668572247028351,
      "step": 1269
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1171875,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3839.0,
      "completions/mean_length": 1025.4788818359375,
      "completions/mean_terminated_length": 617.8875122070312,
      "completions/min_length": 142.0,
      "completions/min_terminated_length": 142.0,
      "epoch": 11.867638483965015,
      "grad_norm": 0.1738765686750412,
      "learning_rate": 1e-06,
      "loss": -0.068,
      "num_tokens": 742643388.0,
      "reward": 0.6618303656578064,
      "reward_std": 0.16352704167366028,
      "rewards/verify_math_reward/mean": 0.6618303656578064,
      "rewards/verify_math_reward/std": 0.4733508229255676,
      "step": 1270
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1272321428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2302.0,
      "completions/mean_length": 1015.8248291015625,
      "completions/mean_terminated_length": 566.7966918945312,
      "completions/min_length": 156.0,
      "completions/min_terminated_length": 156.0,
      "epoch": 11.876967930029155,
      "grad_norm": 0.18321920931339264,
      "learning_rate": 1e-06,
      "loss": -0.0846,
      "num_tokens": 743183023.0,
      "reward": 0.691964328289032,
      "reward_std": 0.15349414944648743,
      "rewards/verify_math_reward/mean": 0.6919642686843872,
      "rewards/verify_math_reward/std": 0.4619392454624176,
      "step": 1271
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1127232142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3067.0,
      "completions/mean_length": 1014.185302734375,
      "completions/mean_terminated_length": 622.6591186523438,
      "completions/min_length": 190.0,
      "completions/min_terminated_length": 190.0,
      "epoch": 11.886297376093294,
      "grad_norm": 0.16701650619506836,
      "learning_rate": 1e-06,
      "loss": -0.0693,
      "num_tokens": 743773133.0,
      "reward": 0.6629464626312256,
      "reward_std": 0.16330133378505707,
      "rewards/verify_math_reward/mean": 0.6629464030265808,
      "rewards/verify_math_reward/std": 0.47296738624572754,
      "step": 1272
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1707589285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4091.0,
      "completions/mean_length": 1210.6116943359375,
      "completions/mean_terminated_length": 616.4468383789062,
      "completions/min_length": 161.0,
      "completions/min_terminated_length": 161.0,
      "epoch": 11.895626822157434,
      "grad_norm": 0.1822567880153656,
      "learning_rate": 1e-06,
      "loss": -0.0881,
      "num_tokens": 744319833.0,
      "reward": 0.6495535969734192,
      "reward_std": 0.143612802028656,
      "rewards/verify_math_reward/mean": 0.6495535969734192,
      "rewards/verify_math_reward/std": 0.477376252412796,
      "step": 1273
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1417410714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3199.0,
      "completions/mean_length": 1136.5703125,
      "completions/mean_terminated_length": 647.8218383789062,
      "completions/min_length": 158.0,
      "completions/min_terminated_length": 158.0,
      "epoch": 11.904956268221575,
      "grad_norm": 0.22753936052322388,
      "learning_rate": 1e-06,
      "loss": -0.0717,
      "num_tokens": 744907336.0,
      "reward": 0.6383928656578064,
      "reward_std": 0.15785619616508484,
      "rewards/verify_math_reward/mean": 0.6383928656578064,
      "rewards/verify_math_reward/std": 0.4807341992855072,
      "step": 1274
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1573660714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3380.0,
      "completions/mean_length": 1187.5145263671875,
      "completions/mean_terminated_length": 644.3403930664062,
      "completions/min_length": 141.0,
      "completions/min_terminated_length": 141.0,
      "epoch": 11.914285714285715,
      "grad_norm": 0.165482297539711,
      "learning_rate": 1e-06,
      "loss": -0.0867,
      "num_tokens": 745484989.0,
      "reward": 0.6629464626312256,
      "reward_std": 0.15518732368946075,
      "rewards/verify_math_reward/mean": 0.6629464030265808,
      "rewards/verify_math_reward/std": 0.47296738624572754,
      "step": 1275
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1361607142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4050.0,
      "completions/mean_length": 1082.474365234375,
      "completions/mean_terminated_length": 607.4741821289062,
      "completions/min_length": 120.0,
      "completions/min_terminated_length": 120.0,
      "epoch": 11.923615160349854,
      "grad_norm": 0.17899391055107117,
      "learning_rate": 1e-06,
      "loss": -0.098,
      "num_tokens": 746046854.0,
      "reward": 0.640625,
      "reward_std": 0.15857228636741638,
      "rewards/verify_math_reward/mean": 0.640625,
      "rewards/verify_math_reward/std": 0.48008525371551514,
      "step": 1276
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0948660714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3311.0,
      "completions/mean_length": 945.3449096679688,
      "completions/mean_terminated_length": 615.1282348632812,
      "completions/min_length": 125.0,
      "completions/min_terminated_length": 125.0,
      "epoch": 11.932944606413994,
      "grad_norm": 0.14702017605304718,
      "learning_rate": 1e-06,
      "loss": -0.0662,
      "num_tokens": 746636195.0,
      "reward": 0.7120535969734192,
      "reward_std": 0.1410936564207077,
      "rewards/verify_math_reward/mean": 0.7120535969734192,
      "rewards/verify_math_reward/std": 0.4530589282512665,
      "step": 1277
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1484375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3058.0,
      "completions/mean_length": 1152.7890625,
      "completions/mean_terminated_length": 639.7523193359375,
      "completions/min_length": 183.0,
      "completions/min_terminated_length": 183.0,
      "epoch": 11.942274052478133,
      "grad_norm": 0.1499701738357544,
      "learning_rate": 1e-06,
      "loss": -0.105,
      "num_tokens": 747208342.0,
      "reward": 0.625,
      "reward_std": 0.13591037690639496,
      "rewards/verify_math_reward/mean": 0.625,
      "rewards/verify_math_reward/std": 0.48439329862594604,
      "step": 1278
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1473214285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3220.0,
      "completions/mean_length": 1110.583740234375,
      "completions/mean_terminated_length": 594.77880859375,
      "completions/min_length": 148.0,
      "completions/min_terminated_length": 148.0,
      "epoch": 11.951603498542275,
      "grad_norm": 0.17317593097686768,
      "learning_rate": 1e-06,
      "loss": -0.0545,
      "num_tokens": 747744369.0,
      "reward": 0.6674107313156128,
      "reward_std": 0.13726474344730377,
      "rewards/verify_math_reward/mean": 0.6674107313156128,
      "rewards/verify_math_reward/std": 0.47140392661094666,
      "step": 1279
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1316964285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3289.0,
      "completions/mean_length": 1026.282470703125,
      "completions/mean_terminated_length": 560.6953735351562,
      "completions/min_length": 116.0,
      "completions/min_terminated_length": 116.0,
      "epoch": 11.960932944606414,
      "grad_norm": 0.17023083567619324,
      "learning_rate": 1e-06,
      "loss": -0.0866,
      "num_tokens": 748274374.0,
      "reward": 0.7220982313156128,
      "reward_std": 0.13827574253082275,
      "rewards/verify_math_reward/mean": 0.7220982313156128,
      "rewards/verify_math_reward/std": 0.44821488857269287,
      "step": 1280
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1272321428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2443.0,
      "completions/mean_length": 1021.966552734375,
      "completions/mean_terminated_length": 573.833740234375,
      "completions/min_length": 176.0,
      "completions/min_terminated_length": 176.0,
      "epoch": 11.970262390670554,
      "grad_norm": 0.18156953155994415,
      "learning_rate": 1e-06,
      "loss": -0.0975,
      "num_tokens": 748815280.0,
      "reward": 0.7220982313156128,
      "reward_std": 0.16209599375724792,
      "rewards/verify_math_reward/mean": 0.7220982313156128,
      "rewards/verify_math_reward/std": 0.44821488857269287,
      "step": 1281
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3055.0,
      "completions/mean_length": 1062.44091796875,
      "completions/mean_terminated_length": 629.0752563476562,
      "completions/min_length": 156.0,
      "completions/min_terminated_length": 156.0,
      "epoch": 11.979591836734693,
      "grad_norm": 0.15937910974025726,
      "learning_rate": 1e-06,
      "loss": -0.0678,
      "num_tokens": 749402003.0,
      "reward": 0.6941964626312256,
      "reward_std": 0.14733155071735382,
      "rewards/verify_math_reward/mean": 0.6941964030265808,
      "rewards/verify_math_reward/std": 0.46100425720214844,
      "step": 1282
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1383928571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3242.0,
      "completions/mean_length": 1133.294677734375,
      "completions/mean_terminated_length": 657.419677734375,
      "completions/min_length": 150.0,
      "completions/min_terminated_length": 150.0,
      "epoch": 11.988921282798835,
      "grad_norm": 0.1433684229850769,
      "learning_rate": 1e-06,
      "loss": -0.0757,
      "num_tokens": 750002723.0,
      "reward": 0.6584821939468384,
      "reward_std": 0.12741659581661224,
      "rewards/verify_math_reward/mean": 0.6584821343421936,
      "rewards/verify_math_reward/std": 0.4744836091995239,
      "step": 1283
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.19034090909090906,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2724.0,
      "completions/mean_length": 1256.84375,
      "completions/mean_terminated_length": 589.3930053710938,
      "completions/min_length": 170.0,
      "completions/min_terminated_length": 170.0,
      "epoch": 11.998250728862974,
      "grad_norm": 0.17795808613300323,
      "learning_rate": 1e-06,
      "loss": -0.078,
      "num_tokens": 750565713.0,
      "reward": 0.6875000596046448,
      "reward_std": 0.14102061092853546,
      "rewards/verify_math_reward/mean": 0.6875,
      "rewards/verify_math_reward/std": 0.4637712836265564,
      "step": 1284
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1261160714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2969.0,
      "completions/mean_length": 1007.1663208007812,
      "completions/mean_terminated_length": 561.3958740234375,
      "completions/min_length": 125.0,
      "completions/min_terminated_length": 125.0,
      "epoch": 12.00932944606414,
      "grad_norm": 0.16885869204998016,
      "learning_rate": 1e-06,
      "loss": -0.0403,
      "num_tokens": 751092782.0,
      "reward": 0.6551339626312256,
      "reward_std": 0.1293700933456421,
      "rewards/verify_math_reward/mean": 0.6551339030265808,
      "rewards/verify_math_reward/std": 0.4755900502204895,
      "step": 1285
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1283482142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2520.0,
      "completions/mean_length": 1026.368408203125,
      "completions/mean_terminated_length": 574.3739013671875,
      "completions/min_length": 150.0,
      "completions/min_terminated_length": 150.0,
      "epoch": 12.018658892128279,
      "grad_norm": 0.17770910263061523,
      "learning_rate": 1e-06,
      "loss": -0.0745,
      "num_tokens": 751629448.0,
      "reward": 0.699776828289032,
      "reward_std": 0.1338074505329132,
      "rewards/verify_math_reward/mean": 0.6997767686843872,
      "rewards/verify_math_reward/std": 0.4586109220981598,
      "step": 1286
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1127232142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3398.0,
      "completions/mean_length": 974.47216796875,
      "completions/mean_terminated_length": 577.900634765625,
      "completions/min_length": 131.0,
      "completions/min_terminated_length": 131.0,
      "epoch": 12.02798833819242,
      "grad_norm": 0.1872461885213852,
      "learning_rate": 1e-06,
      "loss": -0.045,
      "num_tokens": 752180247.0,
      "reward": 0.7321428656578064,
      "reward_std": 0.1294114738702774,
      "rewards/verify_math_reward/mean": 0.7321428656578064,
      "rewards/verify_math_reward/std": 0.4430900514125824,
      "step": 1287
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0982142857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3137.0,
      "completions/mean_length": 950.8225708007812,
      "completions/mean_terminated_length": 608.2784423828125,
      "completions/min_length": 145.0,
      "completions/min_terminated_length": 145.0,
      "epoch": 12.03731778425656,
      "grad_norm": 0.1401851773262024,
      "learning_rate": 1e-06,
      "loss": -0.0535,
      "num_tokens": 752760056.0,
      "reward": 0.7332589626312256,
      "reward_std": 0.13455308973789215,
      "rewards/verify_math_reward/mean": 0.7332589030265808,
      "rewards/verify_math_reward/std": 0.4425028860569,
      "step": 1288
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1116071428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3172.0,
      "completions/mean_length": 966.3928833007812,
      "completions/mean_terminated_length": 573.2261352539062,
      "completions/min_length": 154.0,
      "completions/min_terminated_length": 154.0,
      "epoch": 12.0466472303207,
      "grad_norm": 0.15536810457706451,
      "learning_rate": 1e-06,
      "loss": -0.064,
      "num_tokens": 753294280.0,
      "reward": 0.7477678656578064,
      "reward_std": 0.1104736328125,
      "rewards/verify_math_reward/mean": 0.7477678656578064,
      "rewards/verify_math_reward/std": 0.4345363676548004,
      "step": 1289
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1227678571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3675.0,
      "completions/mean_length": 1040.2132568359375,
      "completions/mean_terminated_length": 612.5585327148438,
      "completions/min_length": 143.0,
      "completions/min_terminated_length": 143.0,
      "epoch": 12.055976676384839,
      "grad_norm": 0.1755959689617157,
      "learning_rate": 1e-06,
      "loss": -0.0522,
      "num_tokens": 753862343.0,
      "reward": 0.7020089626312256,
      "reward_std": 0.14789748191833496,
      "rewards/verify_math_reward/mean": 0.7020089030265808,
      "rewards/verify_math_reward/std": 0.45763099193573,
      "step": 1290
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1149553571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3198.0,
      "completions/mean_length": 1001.052490234375,
      "completions/mean_terminated_length": 599.060546875,
      "completions/min_length": 122.0,
      "completions/min_terminated_length": 122.0,
      "epoch": 12.06530612244898,
      "grad_norm": 0.1526235193014145,
      "learning_rate": 1e-06,
      "loss": -0.0484,
      "num_tokens": 754428006.0,
      "reward": 0.6573660969734192,
      "reward_std": 0.14406605064868927,
      "rewards/verify_math_reward/mean": 0.6573660969734192,
      "rewards/verify_math_reward/std": 0.47485536336898804,
      "step": 1291
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3308.0,
      "completions/mean_length": 1024.805908203125,
      "completions/mean_terminated_length": 586.0637817382812,
      "completions/min_length": 154.0,
      "completions/min_terminated_length": 154.0,
      "epoch": 12.07463556851312,
      "grad_norm": 0.1724972277879715,
      "learning_rate": 1e-06,
      "loss": -0.0757,
      "num_tokens": 754982016.0,
      "reward": 0.6718750596046448,
      "reward_std": 0.14835324883460999,
      "rewards/verify_math_reward/mean": 0.671875,
      "rewards/verify_math_reward/std": 0.46979284286499023,
      "step": 1292
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1227678571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2994.0,
      "completions/mean_length": 1059.7020263671875,
      "completions/mean_terminated_length": 634.7748413085938,
      "completions/min_length": 175.0,
      "completions/min_terminated_length": 175.0,
      "epoch": 12.08396501457726,
      "grad_norm": 0.1610293835401535,
      "learning_rate": 1e-06,
      "loss": -0.0627,
      "num_tokens": 755571205.0,
      "reward": 0.6886160969734192,
      "reward_std": 0.14702913165092468,
      "rewards/verify_math_reward/mean": 0.6886160969734192,
      "rewards/verify_math_reward/std": 0.46331802010536194,
      "step": 1293
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1417410714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4017.0,
      "completions/mean_length": 1100.349365234375,
      "completions/mean_terminated_length": 605.6189575195312,
      "completions/min_length": 132.0,
      "completions/min_terminated_length": 132.0,
      "epoch": 12.093294460641399,
      "grad_norm": 0.17970708012580872,
      "learning_rate": 1e-06,
      "loss": -0.0558,
      "num_tokens": 756131374.0,
      "reward": 0.6383928656578064,
      "reward_std": 0.1629229635000229,
      "rewards/verify_math_reward/mean": 0.6383928656578064,
      "rewards/verify_math_reward/std": 0.4807341992855072,
      "step": 1294
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1194196428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3686.0,
      "completions/mean_length": 1041.83154296875,
      "completions/mean_terminated_length": 627.6412963867188,
      "completions/min_length": 124.0,
      "completions/min_terminated_length": 124.0,
      "epoch": 12.102623906705539,
      "grad_norm": 0.16404816508293152,
      "learning_rate": 1e-06,
      "loss": -0.0829,
      "num_tokens": 756719287.0,
      "reward": 0.6796875596046448,
      "reward_std": 0.1419203132390976,
      "rewards/verify_math_reward/mean": 0.6796875,
      "rewards/verify_math_reward/std": 0.4668572247028351,
      "step": 1295
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1138392857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2505.0,
      "completions/mean_length": 977.099365234375,
      "completions/mean_terminated_length": 576.4345092773438,
      "completions/min_length": 122.0,
      "completions/min_terminated_length": 122.0,
      "epoch": 12.11195335276968,
      "grad_norm": 0.15405990183353424,
      "learning_rate": 1e-06,
      "loss": -0.0458,
      "num_tokens": 757260056.0,
      "reward": 0.699776828289032,
      "reward_std": 0.11765359342098236,
      "rewards/verify_math_reward/mean": 0.6997767686843872,
      "rewards/verify_math_reward/std": 0.4586109220981598,
      "step": 1296
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1060267857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3015.0,
      "completions/mean_length": 994.7745971679688,
      "completions/mean_terminated_length": 626.9638061523438,
      "completions/min_length": 143.0,
      "completions/min_terminated_length": 143.0,
      "epoch": 12.12128279883382,
      "grad_norm": 0.16399167478084564,
      "learning_rate": 1e-06,
      "loss": -0.0757,
      "num_tokens": 757848358.0,
      "reward": 0.7120535969734192,
      "reward_std": 0.15793149173259735,
      "rewards/verify_math_reward/mean": 0.7120535969734192,
      "rewards/verify_math_reward/std": 0.4530589282512665,
      "step": 1297
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1372767857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3403.0,
      "completions/mean_length": 1122.6015625,
      "completions/mean_terminated_length": 649.4735107421875,
      "completions/min_length": 132.0,
      "completions/min_terminated_length": 132.0,
      "epoch": 12.130612244897959,
      "grad_norm": 0.15027697384357452,
      "learning_rate": 1e-06,
      "loss": -0.0641,
      "num_tokens": 758443937.0,
      "reward": 0.6495535969734192,
      "reward_std": 0.13842841982841492,
      "rewards/verify_math_reward/mean": 0.6495535969734192,
      "rewards/verify_math_reward/std": 0.477376252412796,
      "step": 1298
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0982142857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3964.0,
      "completions/mean_length": 935.35498046875,
      "completions/mean_terminated_length": 591.126220703125,
      "completions/min_length": 149.0,
      "completions/min_terminated_length": 149.0,
      "epoch": 12.139941690962099,
      "grad_norm": 0.15242789685726166,
      "learning_rate": 1e-06,
      "loss": -0.0292,
      "num_tokens": 759006887.0,
      "reward": 0.7287946939468384,
      "reward_std": 0.10716353356838226,
      "rewards/verify_math_reward/mean": 0.7287946343421936,
      "rewards/verify_math_reward/std": 0.44483017921447754,
      "step": 1299
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1573660714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3537.0,
      "completions/mean_length": 1186.8359375,
      "completions/mean_terminated_length": 643.5350952148438,
      "completions/min_length": 148.0,
      "completions/min_terminated_length": 148.0,
      "epoch": 12.14927113702624,
      "grad_norm": 0.17300738394260406,
      "learning_rate": 1e-06,
      "loss": -0.1027,
      "num_tokens": 759592748.0,
      "reward": 0.660714328289032,
      "reward_std": 0.16435259580612183,
      "rewards/verify_math_reward/mean": 0.6607142686843872,
      "rewards/verify_math_reward/std": 0.4737313687801361,
      "step": 1300
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.078125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3942.0,
      "completions/mean_length": 839.6484985351562,
      "completions/mean_terminated_length": 563.6864624023438,
      "completions/min_length": 125.0,
      "completions/min_terminated_length": 125.0,
      "epoch": 12.15860058309038,
      "grad_norm": 0.16081883013248444,
      "learning_rate": 1e-06,
      "loss": -0.0505,
      "num_tokens": 760142881.0,
      "reward": 0.7477678656578064,
      "reward_std": 0.11434461921453476,
      "rewards/verify_math_reward/mean": 0.7477678656578064,
      "rewards/verify_math_reward/std": 0.434536337852478,
      "step": 1301
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.109375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3316.0,
      "completions/mean_length": 1012.5647583007812,
      "completions/mean_terminated_length": 633.897216796875,
      "completions/min_length": 153.0,
      "completions/min_terminated_length": 153.0,
      "epoch": 12.167930029154519,
      "grad_norm": 0.16975706815719604,
      "learning_rate": 1e-06,
      "loss": -0.0453,
      "num_tokens": 760739171.0,
      "reward": 0.6930803656578064,
      "reward_std": 0.15251773595809937,
      "rewards/verify_math_reward/mean": 0.6930803656578064,
      "rewards/verify_math_reward/std": 0.46147334575653076,
      "step": 1302
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1350446428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3561.0,
      "completions/mean_length": 1040.6663818359375,
      "completions/mean_terminated_length": 563.6400146484375,
      "completions/min_length": 164.0,
      "completions/min_terminated_length": 164.0,
      "epoch": 12.177259475218658,
      "grad_norm": 0.1844092309474945,
      "learning_rate": 1e-06,
      "loss": -0.0611,
      "num_tokens": 761258704.0,
      "reward": 0.7209821939468384,
      "reward_std": 0.14628168940544128,
      "rewards/verify_math_reward/mean": 0.7209821343421936,
      "rewards/verify_math_reward/std": 0.448766827583313,
      "step": 1303
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1037946428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2628.0,
      "completions/mean_length": 914.5982666015625,
      "completions/mean_terminated_length": 546.1419677734375,
      "completions/min_length": 132.0,
      "completions/min_terminated_length": 132.0,
      "epoch": 12.186588921282798,
      "grad_norm": 0.1265084594488144,
      "learning_rate": 1e-06,
      "loss": -0.0502,
      "num_tokens": 761776936.0,
      "reward": 0.746651828289032,
      "reward_std": 0.0867268368601799,
      "rewards/verify_math_reward/mean": 0.7466517686843872,
      "rewards/verify_math_reward/std": 0.435171514749527,
      "step": 1304
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1227678571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2983.0,
      "completions/mean_length": 1044.993408203125,
      "completions/mean_terminated_length": 618.0076293945312,
      "completions/min_length": 155.0,
      "completions/min_terminated_length": 155.0,
      "epoch": 12.19591836734694,
      "grad_norm": 0.13551369309425354,
      "learning_rate": 1e-06,
      "loss": -0.0642,
      "num_tokens": 762353386.0,
      "reward": 0.707589328289032,
      "reward_std": 0.11152489483356476,
      "rewards/verify_math_reward/mean": 0.7075892686843872,
      "rewards/verify_math_reward/std": 0.45512402057647705,
      "step": 1305
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0848214285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3493.0,
      "completions/mean_length": 899.0469360351562,
      "completions/mean_terminated_length": 602.743896484375,
      "completions/min_length": 139.0,
      "completions/min_terminated_length": 139.0,
      "epoch": 12.205247813411079,
      "grad_norm": 0.16129587590694427,
      "learning_rate": 1e-06,
      "loss": -0.0273,
      "num_tokens": 762934004.0,
      "reward": 0.6908482313156128,
      "reward_std": 0.13745088875293732,
      "rewards/verify_math_reward/mean": 0.6908482313156128,
      "rewards/verify_math_reward/std": 0.46240198612213135,
      "step": 1306
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1607142857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3596.0,
      "completions/mean_length": 1171.62060546875,
      "completions/mean_terminated_length": 611.6329345703125,
      "completions/min_length": 110.0,
      "completions/min_terminated_length": 110.0,
      "epoch": 12.214577259475218,
      "grad_norm": 0.13189013302326202,
      "learning_rate": 1e-06,
      "loss": -0.0717,
      "num_tokens": 763482600.0,
      "reward": 0.6908482313156128,
      "reward_std": 0.11208830773830414,
      "rewards/verify_math_reward/mean": 0.6908482313156128,
      "rewards/verify_math_reward/std": 0.46240198612213135,
      "step": 1307
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1618303571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3406.0,
      "completions/mean_length": 1209.1373291015625,
      "completions/mean_terminated_length": 651.753662109375,
      "completions/min_length": 176.0,
      "completions/min_terminated_length": 176.0,
      "epoch": 12.223906705539358,
      "grad_norm": 0.1550699770450592,
      "learning_rate": 1e-06,
      "loss": -0.0749,
      "num_tokens": 764057955.0,
      "reward": 0.6473214626312256,
      "reward_std": 0.1360626220703125,
      "rewards/verify_math_reward/mean": 0.6473214030265808,
      "rewards/verify_math_reward/std": 0.47807058691978455,
      "step": 1308
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1662946428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3482.0,
      "completions/mean_length": 1193.4921875,
      "completions/mean_terminated_length": 614.5448608398438,
      "completions/min_length": 152.0,
      "completions/min_terminated_length": 152.0,
      "epoch": 12.2332361516035,
      "grad_norm": 0.16716431081295013,
      "learning_rate": 1e-06,
      "loss": -0.0596,
      "num_tokens": 764603828.0,
      "reward": 0.637276828289032,
      "reward_std": 0.13778719305992126,
      "rewards/verify_math_reward/mean": 0.6372767686843872,
      "rewards/verify_math_reward/std": 0.481054425239563,
      "step": 1309
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1328125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3157.0,
      "completions/mean_length": 1084.1395263671875,
      "completions/mean_terminated_length": 622.8635864257812,
      "completions/min_length": 174.0,
      "completions/min_terminated_length": 174.0,
      "epoch": 12.242565597667639,
      "grad_norm": 0.13719680905342102,
      "learning_rate": 1e-06,
      "loss": -0.0996,
      "num_tokens": 765175065.0,
      "reward": 0.7042410969734192,
      "reward_std": 0.1506737321615219,
      "rewards/verify_math_reward/mean": 0.7042410969734192,
      "rewards/verify_math_reward/std": 0.45663803815841675,
      "step": 1310
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0970982142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2353.0,
      "completions/mean_length": 890.0413208007812,
      "completions/mean_terminated_length": 545.2719116210938,
      "completions/min_length": 152.0,
      "completions/min_terminated_length": 152.0,
      "epoch": 12.251895043731778,
      "grad_norm": 0.16699302196502686,
      "learning_rate": 1e-06,
      "loss": -0.0749,
      "num_tokens": 765696534.0,
      "reward": 0.7265625596046448,
      "reward_std": 0.1439163088798523,
      "rewards/verify_math_reward/mean": 0.7265625,
      "rewards/verify_math_reward/std": 0.4459724426269531,
      "step": 1311
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1428571428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3246.0,
      "completions/mean_length": 1121.9576416015625,
      "completions/mean_terminated_length": 626.2838745117188,
      "completions/min_length": 160.0,
      "completions/min_terminated_length": 160.0,
      "epoch": 12.261224489795918,
      "grad_norm": 0.15327630937099457,
      "learning_rate": 1e-06,
      "loss": -0.0915,
      "num_tokens": 766269864.0,
      "reward": 0.645089328289032,
      "reward_std": 0.14623567461967468,
      "rewards/verify_math_reward/mean": 0.6450892686843872,
      "rewards/verify_math_reward/std": 0.4787535071372986,
      "step": 1312
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1227678571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2790.0,
      "completions/mean_length": 1061.8404541015625,
      "completions/mean_terminated_length": 637.2124633789062,
      "completions/min_length": 147.0,
      "completions/min_terminated_length": 147.0,
      "epoch": 12.270553935860057,
      "grad_norm": 0.16404862701892853,
      "learning_rate": 1e-06,
      "loss": -0.0649,
      "num_tokens": 766856337.0,
      "reward": 0.6752232313156128,
      "reward_std": 0.14661727845668793,
      "rewards/verify_math_reward/mean": 0.6752232313156128,
      "rewards/verify_math_reward/std": 0.46855294704437256,
      "step": 1313
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1149553571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3308.0,
      "completions/mean_length": 991.9464721679688,
      "completions/mean_terminated_length": 588.771728515625,
      "completions/min_length": 145.0,
      "completions/min_terminated_length": 145.0,
      "epoch": 12.279883381924199,
      "grad_norm": 0.16358880698680878,
      "learning_rate": 1e-06,
      "loss": -0.0804,
      "num_tokens": 767414265.0,
      "reward": 0.707589328289032,
      "reward_std": 0.1519557535648346,
      "rewards/verify_math_reward/mean": 0.7075892686843872,
      "rewards/verify_math_reward/std": 0.45512402057647705,
      "step": 1314
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1026785714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4017.0,
      "completions/mean_length": 910.4654541015625,
      "completions/mean_terminated_length": 545.9514770507812,
      "completions/min_length": 148.0,
      "completions/min_terminated_length": 148.0,
      "epoch": 12.289212827988338,
      "grad_norm": 0.16791094839572906,
      "learning_rate": 1e-06,
      "loss": -0.0533,
      "num_tokens": 767933426.0,
      "reward": 0.7276785969734192,
      "reward_std": 0.126667320728302,
      "rewards/verify_math_reward/mean": 0.7276785969734192,
      "rewards/verify_math_reward/std": 0.4454030692577362,
      "step": 1315
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1183035714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3946.0,
      "completions/mean_length": 1009.638427734375,
      "completions/mean_terminated_length": 595.5189819335938,
      "completions/min_length": 144.0,
      "completions/min_terminated_length": 144.0,
      "epoch": 12.298542274052478,
      "grad_norm": 0.14293956756591797,
      "learning_rate": 1e-06,
      "loss": -0.0599,
      "num_tokens": 768496558.0,
      "reward": 0.6796875596046448,
      "reward_std": 0.11892352253198624,
      "rewards/verify_math_reward/mean": 0.6796875,
      "rewards/verify_math_reward/std": 0.4668572247028351,
      "step": 1316
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0926339285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3452.0,
      "completions/mean_length": 940.8917846679688,
      "completions/mean_terminated_length": 618.7835083007812,
      "completions/min_length": 150.0,
      "completions/min_terminated_length": 150.0,
      "epoch": 12.307871720116617,
      "grad_norm": 0.1451500505208969,
      "learning_rate": 1e-06,
      "loss": -0.0699,
      "num_tokens": 769086773.0,
      "reward": 0.7142857313156128,
      "reward_std": 0.15123826265335083,
      "rewards/verify_math_reward/mean": 0.7142857313156128,
      "rewards/verify_math_reward/std": 0.4520062506198883,
      "step": 1317
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1607142857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3710.0,
      "completions/mean_length": 1201.2879638671875,
      "completions/mean_terminated_length": 646.9813842773438,
      "completions/min_length": 127.0,
      "completions/min_terminated_length": 127.0,
      "epoch": 12.317201166180759,
      "grad_norm": 0.18425531685352325,
      "learning_rate": 1e-06,
      "loss": -0.0944,
      "num_tokens": 769672855.0,
      "reward": 0.6517857313156128,
      "reward_std": 0.16149938106536865,
      "rewards/verify_math_reward/mean": 0.6517857313156128,
      "rewards/verify_math_reward/std": 0.47667041420936584,
      "step": 1318
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1004464285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3025.0,
      "completions/mean_length": 937.6730346679688,
      "completions/mean_terminated_length": 585.0062255859375,
      "completions/min_length": 174.0,
      "completions/min_terminated_length": 174.0,
      "epoch": 12.326530612244898,
      "grad_norm": 0.1543116569519043,
      "learning_rate": 1e-06,
      "loss": -0.0459,
      "num_tokens": 770227658.0,
      "reward": 0.7366071939468384,
      "reward_std": 0.1331227421760559,
      "rewards/verify_math_reward/mean": 0.7366071343421936,
      "rewards/verify_math_reward/std": 0.44071969389915466,
      "step": 1319
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1283482142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3912.0,
      "completions/mean_length": 1100.864990234375,
      "completions/mean_terminated_length": 659.8399658203125,
      "completions/min_length": 157.0,
      "completions/min_terminated_length": 157.0,
      "epoch": 12.335860058309038,
      "grad_norm": 0.11531640589237213,
      "learning_rate": 1e-06,
      "loss": -0.0922,
      "num_tokens": 770829393.0,
      "reward": 0.6439732313156128,
      "reward_std": 0.11388886719942093,
      "rewards/verify_math_reward/mean": 0.6439732313156128,
      "rewards/verify_math_reward/std": 0.47909072041511536,
      "step": 1320
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1796875,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3950.0,
      "completions/mean_length": 1277.0101318359375,
      "completions/mean_terminated_length": 659.5170288085938,
      "completions/min_length": 153.0,
      "completions/min_terminated_length": 153.0,
      "epoch": 12.345189504373177,
      "grad_norm": 0.181630477309227,
      "learning_rate": 1e-06,
      "loss": -0.0907,
      "num_tokens": 771398762.0,
      "reward": 0.6227678656578064,
      "reward_std": 0.14327509701251984,
      "rewards/verify_math_reward/mean": 0.6227678656578064,
      "rewards/verify_math_reward/std": 0.4849644601345062,
      "step": 1321
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1607142857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3082.0,
      "completions/mean_length": 1147.716552734375,
      "completions/mean_terminated_length": 583.1515502929688,
      "completions/min_length": 149.0,
      "completions/min_terminated_length": 149.0,
      "epoch": 12.354518950437317,
      "grad_norm": 0.1867125928401947,
      "learning_rate": 1e-06,
      "loss": -0.0749,
      "num_tokens": 771924100.0,
      "reward": 0.6584821939468384,
      "reward_std": 0.1230238527059555,
      "rewards/verify_math_reward/mean": 0.6584821343421936,
      "rewards/verify_math_reward/std": 0.4744836091995239,
      "step": 1322
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1551339285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2390.0,
      "completions/mean_length": 1138.462158203125,
      "completions/mean_terminated_length": 595.4002685546875,
      "completions/min_length": 126.0,
      "completions/min_terminated_length": 126.0,
      "epoch": 12.363848396501458,
      "grad_norm": 0.14314530789852142,
      "learning_rate": 1e-06,
      "loss": -0.0666,
      "num_tokens": 772462098.0,
      "reward": 0.6662946939468384,
      "reward_std": 0.1176842749118805,
      "rewards/verify_math_reward/mean": 0.6662946343421936,
      "rewards/verify_math_reward/std": 0.47179925441741943,
      "step": 1323
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1194196428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3459.0,
      "completions/mean_length": 1088.2879638671875,
      "completions/mean_terminated_length": 680.39794921875,
      "completions/min_length": 167.0,
      "completions/min_terminated_length": 167.0,
      "epoch": 12.373177842565598,
      "grad_norm": 0.17088662087917328,
      "learning_rate": 1e-06,
      "loss": -0.0926,
      "num_tokens": 773095028.0,
      "reward": 0.6272321939468384,
      "reward_std": 0.1732172816991806,
      "rewards/verify_math_reward/mean": 0.6272321343421936,
      "rewards/verify_math_reward/std": 0.4838111698627472,
      "step": 1324
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1875,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3560.0,
      "completions/mean_length": 1295.83154296875,
      "completions/mean_terminated_length": 649.6387329101562,
      "completions/min_length": 134.0,
      "completions/min_terminated_length": 134.0,
      "epoch": 12.382507288629737,
      "grad_norm": 0.1721210479736328,
      "learning_rate": 1e-06,
      "loss": -0.1125,
      "num_tokens": 773653757.0,
      "reward": 0.6283482313156128,
      "reward_std": 0.13654935359954834,
      "rewards/verify_math_reward/mean": 0.6283482313156128,
      "rewards/verify_math_reward/std": 0.4835159480571747,
      "step": 1325
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1863839285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3510.0,
      "completions/mean_length": 1335.341552734375,
      "completions/mean_terminated_length": 702.9273071289062,
      "completions/min_length": 188.0,
      "completions/min_terminated_length": 188.0,
      "epoch": 12.391836734693877,
      "grad_norm": 0.1628490537405014,
      "learning_rate": 1e-06,
      "loss": -0.087,
      "num_tokens": 774259151.0,
      "reward": 0.5870535969734192,
      "reward_std": 0.1614226996898651,
      "rewards/verify_math_reward/mean": 0.5870535969734192,
      "rewards/verify_math_reward/std": 0.49263837933540344,
      "step": 1326
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1261160714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3174.0,
      "completions/mean_length": 1026.2723388671875,
      "completions/mean_terminated_length": 583.2592163085938,
      "completions/min_length": 178.0,
      "completions/min_terminated_length": 178.0,
      "epoch": 12.401166180758018,
      "grad_norm": 0.16476882994174957,
      "learning_rate": 1e-06,
      "loss": -0.0607,
      "num_tokens": 774794467.0,
      "reward": 0.6796875596046448,
      "reward_std": 0.13711318373680115,
      "rewards/verify_math_reward/mean": 0.6796875,
      "rewards/verify_math_reward/std": 0.4668572247028351,
      "step": 1327
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1194196428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3819.0,
      "completions/mean_length": 1035.35498046875,
      "completions/mean_terminated_length": 620.2864379882812,
      "completions/min_length": 173.0,
      "completions/min_terminated_length": 173.0,
      "epoch": 12.410495626822158,
      "grad_norm": 0.17252616584300995,
      "learning_rate": 1e-06,
      "loss": -0.0739,
      "num_tokens": 775373065.0,
      "reward": 0.7209821939468384,
      "reward_std": 0.1640915721654892,
      "rewards/verify_math_reward/mean": 0.7209821343421936,
      "rewards/verify_math_reward/std": 0.448766827583313,
      "step": 1328
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1183035714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3075.0,
      "completions/mean_length": 994.8449096679688,
      "completions/mean_terminated_length": 578.7405395507812,
      "completions/min_length": 149.0,
      "completions/min_terminated_length": 149.0,
      "epoch": 12.419825072886297,
      "grad_norm": 0.15286733210086823,
      "learning_rate": 1e-06,
      "loss": -0.0523,
      "num_tokens": 775920454.0,
      "reward": 0.707589328289032,
      "reward_std": 0.12287301570177078,
      "rewards/verify_math_reward/mean": 0.7075892686843872,
      "rewards/verify_math_reward/std": 0.45512405037879944,
      "step": 1329
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1361607142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2681.0,
      "completions/mean_length": 1097.44873046875,
      "completions/mean_terminated_length": 624.8087768554688,
      "completions/min_length": 142.0,
      "completions/min_terminated_length": 142.0,
      "epoch": 12.429154518950437,
      "grad_norm": 0.16014844179153442,
      "learning_rate": 1e-06,
      "loss": -0.0933,
      "num_tokens": 776505160.0,
      "reward": 0.6506696939468384,
      "reward_std": 0.14774663746356964,
      "rewards/verify_math_reward/mean": 0.6506696343421936,
      "rewards/verify_math_reward/std": 0.47702476382255554,
      "step": 1330
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1238839285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3433.0,
      "completions/mean_length": 1037.69873046875,
      "completions/mean_terminated_length": 605.2509765625,
      "completions/min_length": 143.0,
      "completions/min_terminated_length": 143.0,
      "epoch": 12.438483965014576,
      "grad_norm": 0.179546520113945,
      "learning_rate": 1e-06,
      "loss": -0.0963,
      "num_tokens": 777067834.0,
      "reward": 0.6964285969734192,
      "reward_std": 0.1568765640258789,
      "rewards/verify_math_reward/mean": 0.6964285969734192,
      "rewards/verify_math_reward/std": 0.4600566029548645,
      "step": 1331
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0825892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4069.0,
      "completions/mean_length": 865.763427734375,
      "completions/mean_terminated_length": 574.9635009765625,
      "completions/min_length": 172.0,
      "completions/min_terminated_length": 172.0,
      "epoch": 12.447813411078718,
      "grad_norm": 0.1690395474433899,
      "learning_rate": 1e-06,
      "loss": -0.039,
      "num_tokens": 777626478.0,
      "reward": 0.7578125596046448,
      "reward_std": 0.14909958839416504,
      "rewards/verify_math_reward/mean": 0.7578125,
      "rewards/verify_math_reward/std": 0.428646445274353,
      "step": 1332
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1607142857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3603.0,
      "completions/mean_length": 1169.364990234375,
      "completions/mean_terminated_length": 608.9454345703125,
      "completions/min_length": 173.0,
      "completions/min_terminated_length": 173.0,
      "epoch": 12.457142857142857,
      "grad_norm": 0.16161946952342987,
      "learning_rate": 1e-06,
      "loss": -0.0618,
      "num_tokens": 778185405.0,
      "reward": 0.6439732313156128,
      "reward_std": 0.12831632792949677,
      "rewards/verify_math_reward/mean": 0.6439732313156128,
      "rewards/verify_math_reward/std": 0.47909072041511536,
      "step": 1333
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1450892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3180.0,
      "completions/mean_length": 1130.149658203125,
      "completions/mean_terminated_length": 626.8068237304688,
      "completions/min_length": 203.0,
      "completions/min_terminated_length": 203.0,
      "epoch": 12.466472303206997,
      "grad_norm": 0.20989133417606354,
      "learning_rate": 1e-06,
      "loss": -0.0848,
      "num_tokens": 778757603.0,
      "reward": 0.6808035969734192,
      "reward_std": 0.1842365711927414,
      "rewards/verify_math_reward/mean": 0.6808035969734192,
      "rewards/verify_math_reward/std": 0.4664256274700165,
      "step": 1334
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0747767857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4027.0,
      "completions/mean_length": 845.6183471679688,
      "completions/mean_terminated_length": 582.9215698242188,
      "completions/min_length": 148.0,
      "completions/min_terminated_length": 148.0,
      "epoch": 12.475801749271136,
      "grad_norm": 0.13798098266124725,
      "learning_rate": 1e-06,
      "loss": -0.0558,
      "num_tokens": 779321461.0,
      "reward": 0.7566964626312256,
      "reward_std": 0.12467243522405624,
      "rewards/verify_math_reward/mean": 0.7566964030265808,
      "rewards/verify_math_reward/std": 0.4293164908885956,
      "step": 1335
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1104910714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3313.0,
      "completions/mean_length": 989.76123046875,
      "completions/mean_terminated_length": 603.9171752929688,
      "completions/min_length": 112.0,
      "completions/min_terminated_length": 112.0,
      "epoch": 12.485131195335278,
      "grad_norm": 0.1599721610546112,
      "learning_rate": 1e-06,
      "loss": -0.0569,
      "num_tokens": 779897551.0,
      "reward": 0.707589328289032,
      "reward_std": 0.15213866531848907,
      "rewards/verify_math_reward/mean": 0.7075892686843872,
      "rewards/verify_math_reward/std": 0.45512402057647705,
      "step": 1336
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1138392857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4001.0,
      "completions/mean_length": 992.7935791015625,
      "completions/mean_terminated_length": 594.1448364257812,
      "completions/min_length": 159.0,
      "completions/min_terminated_length": 159.0,
      "epoch": 12.494460641399417,
      "grad_norm": 0.15016426146030426,
      "learning_rate": 1e-06,
      "loss": -0.0592,
      "num_tokens": 780461654.0,
      "reward": 0.7220982313156128,
      "reward_std": 0.12772038578987122,
      "rewards/verify_math_reward/mean": 0.7220982313156128,
      "rewards/verify_math_reward/std": 0.44821488857269287,
      "step": 1337
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1026785714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3224.0,
      "completions/mean_length": 985.8348388671875,
      "completions/mean_terminated_length": 629.9452514648438,
      "completions/min_length": 179.0,
      "completions/min_terminated_length": 179.0,
      "epoch": 12.503790087463557,
      "grad_norm": 0.1614190638065338,
      "learning_rate": 1e-06,
      "loss": -0.0595,
      "num_tokens": 781057906.0,
      "reward": 0.6707589626312256,
      "reward_std": 0.15349344909191132,
      "rewards/verify_math_reward/mean": 0.6707589030265808,
      "rewards/verify_math_reward/std": 0.4702001214027405,
      "step": 1338
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1361607142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3702.0,
      "completions/mean_length": 1130.333740234375,
      "completions/mean_terminated_length": 662.8772583007812,
      "completions/min_length": 167.0,
      "completions/min_terminated_length": 167.0,
      "epoch": 12.513119533527696,
      "grad_norm": 0.1414966583251953,
      "learning_rate": 1e-06,
      "loss": -0.066,
      "num_tokens": 781671941.0,
      "reward": 0.6517857313156128,
      "reward_std": 0.12843577563762665,
      "rewards/verify_math_reward/mean": 0.6517857313156128,
      "rewards/verify_math_reward/std": 0.47667041420936584,
      "step": 1339
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1261160714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3799.0,
      "completions/mean_length": 1101.6138916015625,
      "completions/mean_terminated_length": 669.4738159179688,
      "completions/min_length": 121.0,
      "completions/min_terminated_length": 121.0,
      "epoch": 12.522448979591836,
      "grad_norm": 0.15916673839092255,
      "learning_rate": 1e-06,
      "loss": -0.0725,
      "num_tokens": 782286107.0,
      "reward": 0.6908482313156128,
      "reward_std": 0.16003471612930298,
      "rewards/verify_math_reward/mean": 0.6908482313156128,
      "rewards/verify_math_reward/std": 0.46240198612213135,
      "step": 1340
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1138392857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3153.0,
      "completions/mean_length": 1056.875,
      "completions/mean_terminated_length": 666.4584350585938,
      "completions/min_length": 162.0,
      "completions/min_terminated_length": 162.0,
      "epoch": 12.531778425655977,
      "grad_norm": 0.15327374637126923,
      "learning_rate": 1e-06,
      "loss": -0.0727,
      "num_tokens": 782898571.0,
      "reward": 0.691964328289032,
      "reward_std": 0.13132219016551971,
      "rewards/verify_math_reward/mean": 0.6919642686843872,
      "rewards/verify_math_reward/std": 0.4619392454624176,
      "step": 1341
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1171875,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3555.0,
      "completions/mean_length": 1058.51123046875,
      "completions/mean_terminated_length": 655.3046875,
      "completions/min_length": 125.0,
      "completions/min_terminated_length": 125.0,
      "epoch": 12.541107871720117,
      "grad_norm": 0.1456425040960312,
      "learning_rate": 1e-06,
      "loss": -0.0339,
      "num_tokens": 783505549.0,
      "reward": 0.6785714626312256,
      "reward_std": 0.10836746543645859,
      "rewards/verify_math_reward/mean": 0.6785714030265808,
      "rewards/verify_math_reward/std": 0.46728572249412537,
      "step": 1342
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1439732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4049.0,
      "completions/mean_length": 1128.040283203125,
      "completions/mean_terminated_length": 628.86572265625,
      "completions/min_length": 122.0,
      "completions/min_terminated_length": 122.0,
      "epoch": 12.550437317784256,
      "grad_norm": 0.17717291414737701,
      "learning_rate": 1e-06,
      "loss": -0.12,
      "num_tokens": 784070337.0,
      "reward": 0.6662946939468384,
      "reward_std": 0.20249111950397491,
      "rewards/verify_math_reward/mean": 0.6662946343421936,
      "rewards/verify_math_reward/std": 0.47179922461509705,
      "step": 1343
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1004464285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2402.0,
      "completions/mean_length": 934.82373046875,
      "completions/mean_terminated_length": 581.8386840820312,
      "completions/min_length": 165.0,
      "completions/min_terminated_length": 165.0,
      "epoch": 12.559766763848396,
      "grad_norm": 0.14286023378372192,
      "learning_rate": 1e-06,
      "loss": -0.0509,
      "num_tokens": 784619763.0,
      "reward": 0.7299107313156128,
      "reward_std": 0.12043054401874542,
      "rewards/verify_math_reward/mean": 0.7299107313156128,
      "rewards/verify_math_reward/std": 0.44425368309020996,
      "step": 1344
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1395089285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3826.0,
      "completions/mean_length": 1145.3035888671875,
      "completions/mean_terminated_length": 666.9157104492188,
      "completions/min_length": 163.0,
      "completions/min_terminated_length": 163.0,
      "epoch": 12.569096209912537,
      "grad_norm": 0.19437940418720245,
      "learning_rate": 1e-06,
      "loss": -0.0767,
      "num_tokens": 785227363.0,
      "reward": 0.6328125,
      "reward_std": 0.14864563941955566,
      "rewards/verify_math_reward/mean": 0.6328125,
      "rewards/verify_math_reward/std": 0.48230743408203125,
      "step": 1345
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0825892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4016.0,
      "completions/mean_length": 869.9185791015625,
      "completions/mean_terminated_length": 579.49267578125,
      "completions/min_length": 105.0,
      "completions/min_terminated_length": 105.0,
      "epoch": 12.578425655976677,
      "grad_norm": 0.1386461853981018,
      "learning_rate": 1e-06,
      "loss": -0.0496,
      "num_tokens": 785798018.0,
      "reward": 0.7578125596046448,
      "reward_std": 0.1179899051785469,
      "rewards/verify_math_reward/mean": 0.7578125,
      "rewards/verify_math_reward/std": 0.428646445274353,
      "step": 1346
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1060267857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3017.0,
      "completions/mean_length": 982.2366333007812,
      "completions/mean_terminated_length": 612.9388427734375,
      "completions/min_length": 121.0,
      "completions/min_terminated_length": 121.0,
      "epoch": 12.587755102040816,
      "grad_norm": 0.14988870918750763,
      "learning_rate": 1e-06,
      "loss": -0.0518,
      "num_tokens": 786379310.0,
      "reward": 0.7031250596046448,
      "reward_std": 0.1299756020307541,
      "rewards/verify_math_reward/mean": 0.703125,
      "rewards/verify_math_reward/std": 0.4571361541748047,
      "step": 1347
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1060267857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3508.0,
      "completions/mean_length": 971.935302734375,
      "completions/mean_terminated_length": 601.4157104492188,
      "completions/min_length": 123.0,
      "completions/min_terminated_length": 123.0,
      "epoch": 12.597084548104956,
      "grad_norm": 0.15420491993427277,
      "learning_rate": 1e-06,
      "loss": -0.0538,
      "num_tokens": 786949004.0,
      "reward": 0.7209821939468384,
      "reward_std": 0.11629742383956909,
      "rewards/verify_math_reward/mean": 0.7209821343421936,
      "rewards/verify_math_reward/std": 0.448766827583313,
      "step": 1348
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1261160714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3264.0,
      "completions/mean_length": 1083.8348388671875,
      "completions/mean_terminated_length": 649.1289672851562,
      "completions/min_length": 154.0,
      "completions/min_terminated_length": 154.0,
      "epoch": 12.606413994169095,
      "grad_norm": 0.16972795128822327,
      "learning_rate": 1e-06,
      "loss": -0.0685,
      "num_tokens": 787555400.0,
      "reward": 0.6361607313156128,
      "reward_std": 0.16476556658744812,
      "rewards/verify_math_reward/mean": 0.6361607313156128,
      "rewards/verify_math_reward/std": 0.4813718795776367,
      "step": 1349
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1227678571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3121.0,
      "completions/mean_length": 999.50341796875,
      "completions/mean_terminated_length": 566.1514282226562,
      "completions/min_length": 124.0,
      "completions/min_terminated_length": 124.0,
      "epoch": 12.615743440233237,
      "grad_norm": 0.1987408697605133,
      "learning_rate": 1e-06,
      "loss": -0.0782,
      "num_tokens": 788093763.0,
      "reward": 0.6808035969734192,
      "reward_std": 0.14650921523571014,
      "rewards/verify_math_reward/mean": 0.6808035969734192,
      "rewards/verify_math_reward/std": 0.4664256274700165,
      "step": 1350
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2623.0,
      "completions/mean_length": 1067.04248046875,
      "completions/mean_terminated_length": 634.3341674804688,
      "completions/min_length": 173.0,
      "completions/min_terminated_length": 173.0,
      "epoch": 12.625072886297376,
      "grad_norm": 0.16759316623210907,
      "learning_rate": 1e-06,
      "loss": -0.06,
      "num_tokens": 788680577.0,
      "reward": 0.625,
      "reward_std": 0.15161804854869843,
      "rewards/verify_math_reward/mean": 0.625,
      "rewards/verify_math_reward/std": 0.48439329862594604,
      "step": 1351
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1372767857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3229.0,
      "completions/mean_length": 1084.805908203125,
      "completions/mean_terminated_length": 605.6636352539062,
      "completions/min_length": 162.0,
      "completions/min_terminated_length": 162.0,
      "epoch": 12.634402332361516,
      "grad_norm": 0.15915155410766602,
      "learning_rate": 1e-06,
      "loss": -0.0656,
      "num_tokens": 789236667.0,
      "reward": 0.6283482313156128,
      "reward_std": 0.12839441001415253,
      "rewards/verify_math_reward/mean": 0.6283482313156128,
      "rewards/verify_math_reward/std": 0.4835159182548523,
      "step": 1352
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1540178571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3880.0,
      "completions/mean_length": 1142.34716796875,
      "completions/mean_terminated_length": 604.61083984375,
      "completions/min_length": 143.0,
      "completions/min_terminated_length": 143.0,
      "epoch": 12.643731778425655,
      "grad_norm": 0.16895677149295807,
      "learning_rate": 1e-06,
      "loss": -0.0636,
      "num_tokens": 789788994.0,
      "reward": 0.691964328289032,
      "reward_std": 0.11675135791301727,
      "rewards/verify_math_reward/mean": 0.6919642686843872,
      "rewards/verify_math_reward/std": 0.4619392454624176,
      "step": 1353
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0904017857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3380.0,
      "completions/mean_length": 841.7656860351562,
      "completions/mean_terminated_length": 518.338623046875,
      "completions/min_length": 169.0,
      "completions/min_terminated_length": 169.0,
      "epoch": 12.653061224489797,
      "grad_norm": 0.14239118993282318,
      "learning_rate": 1e-06,
      "loss": -0.0478,
      "num_tokens": 790288776.0,
      "reward": 0.7433035969734192,
      "reward_std": 0.11288176476955414,
      "rewards/verify_math_reward/mean": 0.7433035969734192,
      "rewards/verify_math_reward/std": 0.43705445528030396,
      "step": 1354
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1026785714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3616.0,
      "completions/mean_length": 980.6116333007812,
      "completions/mean_terminated_length": 624.1243896484375,
      "completions/min_length": 134.0,
      "completions/min_terminated_length": 134.0,
      "epoch": 12.662390670553936,
      "grad_norm": 0.18198366463184357,
      "learning_rate": 1e-06,
      "loss": -0.0801,
      "num_tokens": 790883732.0,
      "reward": 0.691964328289032,
      "reward_std": 0.1643179953098297,
      "rewards/verify_math_reward/mean": 0.6919642686843872,
      "rewards/verify_math_reward/std": 0.4619392454624176,
      "step": 1355
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1584821428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3823.0,
      "completions/mean_length": 1191.4342041015625,
      "completions/mean_terminated_length": 644.42041015625,
      "completions/min_length": 116.0,
      "completions/min_terminated_length": 116.0,
      "epoch": 12.671720116618076,
      "grad_norm": 0.15740332007408142,
      "learning_rate": 1e-06,
      "loss": -0.0993,
      "num_tokens": 791471297.0,
      "reward": 0.6395089626312256,
      "reward_std": 0.1413985788822174,
      "rewards/verify_math_reward/mean": 0.6395089030265808,
      "rewards/verify_math_reward/std": 0.4804111421108246,
      "step": 1356
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1584821428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4054.0,
      "completions/mean_length": 1186.83935546875,
      "completions/mean_terminated_length": 638.960205078125,
      "completions/min_length": 173.0,
      "completions/min_terminated_length": 173.0,
      "epoch": 12.681049562682215,
      "grad_norm": 0.1690467894077301,
      "learning_rate": 1e-06,
      "loss": -0.0744,
      "num_tokens": 792039857.0,
      "reward": 0.6462053656578064,
      "reward_std": 0.1469959318637848,
      "rewards/verify_math_reward/mean": 0.6462053656578064,
      "rewards/verify_math_reward/std": 0.478413462638855,
      "step": 1357
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1060267857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3684.0,
      "completions/mean_length": 931.4576416015625,
      "completions/mean_terminated_length": 556.1373291015625,
      "completions/min_length": 123.0,
      "completions/min_terminated_length": 123.0,
      "epoch": 12.690379008746355,
      "grad_norm": 0.15457387268543243,
      "learning_rate": 1e-06,
      "loss": -0.0274,
      "num_tokens": 792571555.0,
      "reward": 0.7455357313156128,
      "reward_std": 0.10581875592470169,
      "rewards/verify_math_reward/mean": 0.7455357313156128,
      "rewards/verify_math_reward/std": 0.4358029067516327,
      "step": 1358
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1004464285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3765.0,
      "completions/mean_length": 935.700927734375,
      "completions/mean_terminated_length": 582.8139038085938,
      "completions/min_length": 165.0,
      "completions/min_terminated_length": 165.0,
      "epoch": 12.699708454810496,
      "grad_norm": 0.1606937199831009,
      "learning_rate": 1e-06,
      "loss": -0.0737,
      "num_tokens": 793131359.0,
      "reward": 0.7745535969734192,
      "reward_std": 0.13354459404945374,
      "rewards/verify_math_reward/mean": 0.7745535969734192,
      "rewards/verify_math_reward/std": 0.41810935735702515,
      "step": 1359
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1227678571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3880.0,
      "completions/mean_length": 1071.7020263671875,
      "completions/mean_terminated_length": 648.4542236328125,
      "completions/min_length": 127.0,
      "completions/min_terminated_length": 127.0,
      "epoch": 12.709037900874636,
      "grad_norm": 0.15520969033241272,
      "learning_rate": 1e-06,
      "loss": -0.0459,
      "num_tokens": 793724324.0,
      "reward": 0.6495535969734192,
      "reward_std": 0.13064706325531006,
      "rewards/verify_math_reward/mean": 0.6495535969734192,
      "rewards/verify_math_reward/std": 0.477376252412796,
      "step": 1360
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0915178571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2891.0,
      "completions/mean_length": 922.3192138671875,
      "completions/mean_terminated_length": 602.6117553710938,
      "completions/min_length": 167.0,
      "completions/min_terminated_length": 167.0,
      "epoch": 12.718367346938775,
      "grad_norm": 0.17056338489055634,
      "learning_rate": 1e-06,
      "loss": -0.0416,
      "num_tokens": 794296146.0,
      "reward": 0.7008928656578064,
      "reward_std": 0.13241805136203766,
      "rewards/verify_math_reward/mean": 0.7008928656578064,
      "rewards/verify_math_reward/std": 0.458122581243515,
      "step": 1361
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1305803571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3659.0,
      "completions/mean_length": 1150.1942138671875,
      "completions/mean_terminated_length": 707.756103515625,
      "completions/min_length": 173.0,
      "completions/min_terminated_length": 173.0,
      "epoch": 12.727696793002915,
      "grad_norm": 0.1725529283285141,
      "learning_rate": 1e-06,
      "loss": -0.0432,
      "num_tokens": 794938752.0,
      "reward": 0.5814732313156128,
      "reward_std": 0.15011100471019745,
      "rewards/verify_math_reward/mean": 0.5814732313156128,
      "rewards/verify_math_reward/std": 0.4935929775238037,
      "step": 1362
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.140625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3533.0,
      "completions/mean_length": 1127.302490234375,
      "completions/mean_terminated_length": 641.5155639648438,
      "completions/min_length": 105.0,
      "completions/min_terminated_length": 105.0,
      "epoch": 12.737026239067056,
      "grad_norm": 0.1569979339838028,
      "learning_rate": 1e-06,
      "loss": -0.0809,
      "num_tokens": 795526039.0,
      "reward": 0.6395089626312256,
      "reward_std": 0.1566508710384369,
      "rewards/verify_math_reward/mean": 0.6395089030265808,
      "rewards/verify_math_reward/std": 0.4804111421108246,
      "step": 1363
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0982142857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3809.0,
      "completions/mean_length": 966.9944458007812,
      "completions/mean_terminated_length": 626.2116088867188,
      "completions/min_length": 172.0,
      "completions/min_terminated_length": 172.0,
      "epoch": 12.746355685131196,
      "grad_norm": 0.17467598617076874,
      "learning_rate": 1e-06,
      "loss": -0.0522,
      "num_tokens": 796133290.0,
      "reward": 0.6718750596046448,
      "reward_std": 0.17374537885189056,
      "rewards/verify_math_reward/mean": 0.671875,
      "rewards/verify_math_reward/std": 0.46979284286499023,
      "step": 1364
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1138392857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3222.0,
      "completions/mean_length": 974.9375610351562,
      "completions/mean_terminated_length": 573.9949340820312,
      "completions/min_length": 145.0,
      "completions/min_terminated_length": 145.0,
      "epoch": 12.755685131195335,
      "grad_norm": 0.15648187696933746,
      "learning_rate": 1e-06,
      "loss": -0.0602,
      "num_tokens": 796669122.0,
      "reward": 0.7176339626312256,
      "reward_std": 0.12666912376880646,
      "rewards/verify_math_reward/mean": 0.7176339030265808,
      "rewards/verify_math_reward/std": 0.4504019320011139,
      "step": 1365
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1283482142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3885.0,
      "completions/mean_length": 1094.693115234375,
      "completions/mean_terminated_length": 652.75927734375,
      "completions/min_length": 126.0,
      "completions/min_terminated_length": 126.0,
      "epoch": 12.765014577259475,
      "grad_norm": 0.1611330360174179,
      "learning_rate": 1e-06,
      "loss": -0.057,
      "num_tokens": 797269383.0,
      "reward": 0.6774553656578064,
      "reward_std": 0.12192729860544205,
      "rewards/verify_math_reward/mean": 0.6774553656578064,
      "rewards/verify_math_reward/std": 0.4677111804485321,
      "step": 1366
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1540178571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3783.0,
      "completions/mean_length": 1148.703125,
      "completions/mean_terminated_length": 612.1240234375,
      "completions/min_length": 131.0,
      "completions/min_terminated_length": 131.0,
      "epoch": 12.774344023323614,
      "grad_norm": 0.14559553563594818,
      "learning_rate": 1e-06,
      "loss": -0.0435,
      "num_tokens": 797819461.0,
      "reward": 0.6875000596046448,
      "reward_std": 0.09416642040014267,
      "rewards/verify_math_reward/mean": 0.6875,
      "rewards/verify_math_reward/std": 0.4637712836265564,
      "step": 1367
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1171875,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3336.0,
      "completions/mean_length": 989.9141235351562,
      "completions/mean_terminated_length": 577.601806640625,
      "completions/min_length": 115.0,
      "completions/min_terminated_length": 115.0,
      "epoch": 12.783673469387756,
      "grad_norm": 0.14515119791030884,
      "learning_rate": 1e-06,
      "loss": -0.0485,
      "num_tokens": 798362488.0,
      "reward": 0.6930803656578064,
      "reward_std": 0.11245782673358917,
      "rewards/verify_math_reward/mean": 0.6930803656578064,
      "rewards/verify_math_reward/std": 0.46147337555885315,
      "step": 1368
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1049107142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2791.0,
      "completions/mean_length": 987.0123291015625,
      "completions/mean_terminated_length": 622.6172485351562,
      "completions/min_length": 182.0,
      "completions/min_terminated_length": 182.0,
      "epoch": 12.793002915451895,
      "grad_norm": 0.15400585532188416,
      "learning_rate": 1e-06,
      "loss": -0.038,
      "num_tokens": 798963635.0,
      "reward": 0.731026828289032,
      "reward_std": 0.1411721557378769,
      "rewards/verify_math_reward/mean": 0.7310267686843872,
      "rewards/verify_math_reward/std": 0.44367367029190063,
      "step": 1369
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1339285714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3830.0,
      "completions/mean_length": 1085.3248291015625,
      "completions/mean_terminated_length": 619.7564086914062,
      "completions/min_length": 160.0,
      "completions/min_terminated_length": 160.0,
      "epoch": 12.802332361516035,
      "grad_norm": 0.14347673952579498,
      "learning_rate": 1e-06,
      "loss": -0.044,
      "num_tokens": 799535326.0,
      "reward": 0.7064732313156128,
      "reward_std": 0.11144751310348511,
      "rewards/verify_math_reward/mean": 0.7064732313156128,
      "rewards/verify_math_reward/std": 0.4556320011615753,
      "step": 1370
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1383928571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3040.0,
      "completions/mean_length": 1125.685302734375,
      "completions/mean_terminated_length": 648.5880737304688,
      "completions/min_length": 137.0,
      "completions/min_terminated_length": 137.0,
      "epoch": 12.811661807580174,
      "grad_norm": 0.17199894785881042,
      "learning_rate": 1e-06,
      "loss": -0.0769,
      "num_tokens": 800139044.0,
      "reward": 0.6383928656578064,
      "reward_std": 0.1446651816368103,
      "rewards/verify_math_reward/mean": 0.6383928656578064,
      "rewards/verify_math_reward/std": 0.4807341992855072,
      "step": 1371
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1127232142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4031.0,
      "completions/mean_length": 1044.76123046875,
      "completions/mean_terminated_length": 657.1195068359375,
      "completions/min_length": 135.0,
      "completions/min_terminated_length": 135.0,
      "epoch": 12.820991253644316,
      "grad_norm": 0.1765907108783722,
      "learning_rate": 1e-06,
      "loss": -0.066,
      "num_tokens": 800754854.0,
      "reward": 0.6272321939468384,
      "reward_std": 0.16281278431415558,
      "rewards/verify_math_reward/mean": 0.6272321343421936,
      "rewards/verify_math_reward/std": 0.4838111698627472,
      "step": 1372
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1450892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3554.0,
      "completions/mean_length": 1136.5546875,
      "completions/mean_terminated_length": 634.2989501953125,
      "completions/min_length": 149.0,
      "completions/min_terminated_length": 149.0,
      "epoch": 12.830320699708455,
      "grad_norm": 0.12316008657217026,
      "learning_rate": 1e-06,
      "loss": -0.039,
      "num_tokens": 801337439.0,
      "reward": 0.6752232313156128,
      "reward_std": 0.08090193569660187,
      "rewards/verify_math_reward/mean": 0.6752232313156128,
      "rewards/verify_math_reward/std": 0.46855294704437256,
      "step": 1373
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1183035714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3276.0,
      "completions/mean_length": 999.1116333007812,
      "completions/mean_terminated_length": 583.5797729492188,
      "completions/min_length": 147.0,
      "completions/min_terminated_length": 147.0,
      "epoch": 12.839650145772595,
      "grad_norm": 0.16492483019828796,
      "learning_rate": 1e-06,
      "loss": -0.0626,
      "num_tokens": 801885483.0,
      "reward": 0.6897321939468384,
      "reward_std": 0.1338823139667511,
      "rewards/verify_math_reward/mean": 0.6897321343421936,
      "rewards/verify_math_reward/std": 0.4628615975379944,
      "step": 1374
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3014.0,
      "completions/mean_length": 937.76123046875,
      "completions/mean_terminated_length": 580.7428588867188,
      "completions/min_length": 109.0,
      "completions/min_terminated_length": 109.0,
      "epoch": 12.848979591836734,
      "grad_norm": 0.16988950967788696,
      "learning_rate": 1e-06,
      "loss": -0.0443,
      "num_tokens": 802444965.0,
      "reward": 0.7343750596046448,
      "reward_std": 0.133134126663208,
      "rewards/verify_math_reward/mean": 0.734375,
      "rewards/verify_math_reward/std": 0.44191211462020874,
      "step": 1375
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.109375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3596.0,
      "completions/mean_length": 1011.1506958007812,
      "completions/mean_terminated_length": 632.3095092773438,
      "completions/min_length": 139.0,
      "completions/min_terminated_length": 139.0,
      "epoch": 12.858309037900874,
      "grad_norm": 0.17253868281841278,
      "learning_rate": 1e-06,
      "loss": -0.0664,
      "num_tokens": 803037588.0,
      "reward": 0.6540178656578064,
      "reward_std": 0.16157494485378265,
      "rewards/verify_math_reward/mean": 0.6540178656578064,
      "rewards/verify_math_reward/std": 0.4759531021118164,
      "step": 1376
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1116071428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4080.0,
      "completions/mean_length": 1051.7991943359375,
      "completions/mean_terminated_length": 669.36181640625,
      "completions/min_length": 141.0,
      "completions/min_terminated_length": 141.0,
      "epoch": 12.867638483965015,
      "grad_norm": 0.1794116348028183,
      "learning_rate": 1e-06,
      "loss": -0.0778,
      "num_tokens": 803655384.0,
      "reward": 0.7142857313156128,
      "reward_std": 0.17630550265312195,
      "rewards/verify_math_reward/mean": 0.7142857313156128,
      "rewards/verify_math_reward/std": 0.4520062506198883,
      "step": 1377
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1361607142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3605.0,
      "completions/mean_length": 1151.548095703125,
      "completions/mean_terminated_length": 687.4354248046875,
      "completions/min_length": 190.0,
      "completions/min_terminated_length": 190.0,
      "epoch": 12.876967930029155,
      "grad_norm": 0.1660955399274826,
      "learning_rate": 1e-06,
      "loss": -0.0599,
      "num_tokens": 804290515.0,
      "reward": 0.6037946939468384,
      "reward_std": 0.1363229602575302,
      "rewards/verify_math_reward/mean": 0.6037946343421936,
      "rewards/verify_math_reward/std": 0.48938122391700745,
      "step": 1378
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0825892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2397.0,
      "completions/mean_length": 881.24560546875,
      "completions/mean_terminated_length": 591.8394165039062,
      "completions/min_length": 162.0,
      "completions/min_terminated_length": 162.0,
      "epoch": 12.886297376093294,
      "grad_norm": 0.1700073927640915,
      "learning_rate": 1e-06,
      "loss": -0.0436,
      "num_tokens": 804873999.0,
      "reward": 0.7087053656578064,
      "reward_std": 0.12133026868104935,
      "rewards/verify_math_reward/mean": 0.7087053656578064,
      "rewards/verify_math_reward/std": 0.45461276173591614,
      "step": 1379
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1194196428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3873.0,
      "completions/mean_length": 1056.1082763671875,
      "completions/mean_terminated_length": 643.854248046875,
      "completions/min_length": 151.0,
      "completions/min_terminated_length": 151.0,
      "epoch": 12.895626822157434,
      "grad_norm": 0.16197142004966736,
      "learning_rate": 1e-06,
      "loss": -0.0584,
      "num_tokens": 805468488.0,
      "reward": 0.6808035969734192,
      "reward_std": 0.12644091248512268,
      "rewards/verify_math_reward/mean": 0.6808035969734192,
      "rewards/verify_math_reward/std": 0.46642565727233887,
      "step": 1380
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1361607142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3025.0,
      "completions/mean_length": 1116.5023193359375,
      "completions/mean_terminated_length": 646.8656616210938,
      "completions/min_length": 160.0,
      "completions/min_terminated_length": 160.0,
      "epoch": 12.904956268221575,
      "grad_norm": 0.19323408603668213,
      "learning_rate": 1e-06,
      "loss": -0.0675,
      "num_tokens": 806066674.0,
      "reward": 0.6517857313156128,
      "reward_std": 0.17228113114833832,
      "rewards/verify_math_reward/mean": 0.6517857313156128,
      "rewards/verify_math_reward/std": 0.47667041420936584,
      "step": 1381
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0747767857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3866.0,
      "completions/mean_length": 858.4553833007812,
      "completions/mean_terminated_length": 596.796142578125,
      "completions/min_length": 142.0,
      "completions/min_terminated_length": 142.0,
      "epoch": 12.914285714285715,
      "grad_norm": 0.16140006482601166,
      "learning_rate": 1e-06,
      "loss": -0.0609,
      "num_tokens": 806641962.0,
      "reward": 0.7209821939468384,
      "reward_std": 0.13527238368988037,
      "rewards/verify_math_reward/mean": 0.7209821343421936,
      "rewards/verify_math_reward/std": 0.448766827583313,
      "step": 1382
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0948660714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3261.0,
      "completions/mean_length": 898.3404541015625,
      "completions/mean_terminated_length": 563.197265625,
      "completions/min_length": 146.0,
      "completions/min_terminated_length": 146.0,
      "epoch": 12.923615160349854,
      "grad_norm": 0.21663758158683777,
      "learning_rate": 1e-06,
      "loss": -0.0501,
      "num_tokens": 807196603.0,
      "reward": 0.7433035969734192,
      "reward_std": 0.15800705552101135,
      "rewards/verify_math_reward/mean": 0.7433035969734192,
      "rewards/verify_math_reward/std": 0.43705442547798157,
      "step": 1383
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1294642857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2941.0,
      "completions/mean_length": 1038.4107666015625,
      "completions/mean_terminated_length": 583.6923217773438,
      "completions/min_length": 150.0,
      "completions/min_terminated_length": 150.0,
      "epoch": 12.932944606413994,
      "grad_norm": 0.16240949928760529,
      "learning_rate": 1e-06,
      "loss": -0.075,
      "num_tokens": 807744307.0,
      "reward": 0.6986607313156128,
      "reward_std": 0.13226580619812012,
      "rewards/verify_math_reward/mean": 0.6986607313156128,
      "rewards/verify_math_reward/std": 0.4590960443019867,
      "step": 1384
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3557.0,
      "completions/mean_length": 950.5335083007812,
      "completions/mean_terminated_length": 594.958984375,
      "completions/min_length": 145.0,
      "completions/min_terminated_length": 145.0,
      "epoch": 12.942274052478133,
      "grad_norm": 0.16597980260849,
      "learning_rate": 1e-06,
      "loss": -0.0557,
      "num_tokens": 808312457.0,
      "reward": 0.7421875596046448,
      "reward_std": 0.15123896300792694,
      "rewards/verify_math_reward/mean": 0.7421875,
      "rewards/verify_math_reward/std": 0.43767455220222473,
      "step": 1385
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4072.0,
      "completions/mean_length": 991.810302734375,
      "completions/mean_terminated_length": 640.90185546875,
      "completions/min_length": 142.0,
      "completions/min_terminated_length": 142.0,
      "epoch": 12.951603498542275,
      "grad_norm": 0.18450036644935608,
      "learning_rate": 1e-06,
      "loss": -0.0523,
      "num_tokens": 808914599.0,
      "reward": 0.6796875596046448,
      "reward_std": 0.15612910687923431,
      "rewards/verify_math_reward/mean": 0.6796875,
      "rewards/verify_math_reward/std": 0.4668572247028351,
      "step": 1386
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.109375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2970.0,
      "completions/mean_length": 932.1272583007812,
      "completions/mean_terminated_length": 543.5814208984375,
      "completions/min_length": 154.0,
      "completions/min_terminated_length": 154.0,
      "epoch": 12.960932944606414,
      "grad_norm": 0.17737141251564026,
      "learning_rate": 1e-06,
      "loss": -0.0453,
      "num_tokens": 809444961.0,
      "reward": 0.6852678656578064,
      "reward_std": 0.12140876054763794,
      "rewards/verify_math_reward/mean": 0.6852678656578064,
      "rewards/verify_math_reward/std": 0.46466848254203796,
      "step": 1387
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0870535714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3306.0,
      "completions/mean_length": 892.8772583007812,
      "completions/mean_terminated_length": 587.4450073242188,
      "completions/min_length": 143.0,
      "completions/min_terminated_length": 143.0,
      "epoch": 12.970262390670554,
      "grad_norm": 0.14486269652843475,
      "learning_rate": 1e-06,
      "loss": -0.0197,
      "num_tokens": 810012707.0,
      "reward": 0.6718750596046448,
      "reward_std": 0.10554774105548859,
      "rewards/verify_math_reward/mean": 0.671875,
      "rewards/verify_math_reward/std": 0.46979284286499023,
      "step": 1388
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3823.0,
      "completions/mean_length": 1022.1172485351562,
      "completions/mean_terminated_length": 582.9910888671875,
      "completions/min_length": 157.0,
      "completions/min_terminated_length": 157.0,
      "epoch": 12.979591836734693,
      "grad_norm": 0.16290737688541412,
      "learning_rate": 1e-06,
      "loss": -0.0611,
      "num_tokens": 810560988.0,
      "reward": 0.6785714626312256,
      "reward_std": 0.10772695392370224,
      "rewards/verify_math_reward/mean": 0.6785714030265808,
      "rewards/verify_math_reward/std": 0.46728572249412537,
      "step": 1389
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1328125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3630.0,
      "completions/mean_length": 1108.2176513671875,
      "completions/mean_terminated_length": 650.6293334960938,
      "completions/min_length": 117.0,
      "completions/min_terminated_length": 117.0,
      "epoch": 12.988921282798835,
      "grad_norm": 0.16302025318145752,
      "learning_rate": 1e-06,
      "loss": -0.0589,
      "num_tokens": 811155095.0,
      "reward": 0.6662946939468384,
      "reward_std": 0.13200436532497406,
      "rewards/verify_math_reward/mean": 0.6662946343421936,
      "rewards/verify_math_reward/std": 0.47179925441741943,
      "step": 1390
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.11931818181818177,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3962.0,
      "completions/mean_length": 1054.977294921875,
      "completions/mean_terminated_length": 642.9677124023438,
      "completions/min_length": 159.0,
      "completions/min_terminated_length": 159.0,
      "epoch": 12.998250728862974,
      "grad_norm": 0.13653789460659027,
      "learning_rate": 1e-06,
      "loss": -0.0568,
      "num_tokens": 811746142.0,
      "reward": 0.6696428656578064,
      "reward_std": 0.10843275487422943,
      "rewards/verify_math_reward/mean": 0.6696428656578064,
      "rewards/verify_math_reward/std": 0.47060438990592957,
      "step": 1391
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0825892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3752.0,
      "completions/mean_length": 895.8449096679688,
      "completions/mean_terminated_length": 607.7530517578125,
      "completions/min_length": 158.0,
      "completions/min_terminated_length": 158.0,
      "epoch": 13.00932944606414,
      "grad_norm": 0.1684548407793045,
      "learning_rate": 1e-06,
      "loss": -0.0389,
      "num_tokens": 812334363.0,
      "reward": 0.6752232313156128,
      "reward_std": 0.15364569425582886,
      "rewards/verify_math_reward/mean": 0.6752232313156128,
      "rewards/verify_math_reward/std": 0.46855294704437256,
      "step": 1392
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0837053571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4058.0,
      "completions/mean_length": 921.6116333007812,
      "completions/mean_terminated_length": 631.6248779296875,
      "completions/min_length": 170.0,
      "completions/min_terminated_length": 170.0,
      "epoch": 13.018658892128279,
      "grad_norm": 0.14841940999031067,
      "learning_rate": 1e-06,
      "loss": -0.0493,
      "num_tokens": 812932295.0,
      "reward": 0.7511160969734192,
      "reward_std": 0.12967318296432495,
      "rewards/verify_math_reward/mean": 0.7511160969734192,
      "rewards/verify_math_reward/std": 0.43260788917541504,
      "step": 1393
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1305803571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3050.0,
      "completions/mean_length": 1115.2723388671875,
      "completions/mean_terminated_length": 667.5892333984375,
      "completions/min_length": 156.0,
      "completions/min_terminated_length": 156.0,
      "epoch": 13.02798833819242,
      "grad_norm": 0.16736483573913574,
      "learning_rate": 1e-06,
      "loss": -0.0389,
      "num_tokens": 813542875.0,
      "reward": 0.59375,
      "reward_std": 0.14169208705425262,
      "rewards/verify_math_reward/mean": 0.59375,
      "rewards/verify_math_reward/std": 0.4914066195487976,
      "step": 1394
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1037946428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4096.0,
      "completions/mean_length": 935.89404296875,
      "completions/mean_terminated_length": 569.9041137695312,
      "completions/min_length": 109.0,
      "completions/min_terminated_length": 109.0,
      "epoch": 13.03731778425656,
      "grad_norm": 0.1944524496793747,
      "learning_rate": 1e-06,
      "loss": -0.0419,
      "num_tokens": 814088036.0,
      "reward": 0.6986607313156128,
      "reward_std": 0.12711238861083984,
      "rewards/verify_math_reward/mean": 0.6986607313156128,
      "rewards/verify_math_reward/std": 0.4590960443019867,
      "step": 1395
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4048.0,
      "completions/mean_length": 1033.958740234375,
      "completions/mean_terminated_length": 596.5242309570312,
      "completions/min_length": 159.0,
      "completions/min_terminated_length": 159.0,
      "epoch": 13.0466472303207,
      "grad_norm": 0.15413354337215424,
      "learning_rate": 1e-06,
      "loss": -0.0942,
      "num_tokens": 814647751.0,
      "reward": 0.691964328289032,
      "reward_std": 0.12749329209327698,
      "rewards/verify_math_reward/mean": 0.6919642686843872,
      "rewards/verify_math_reward/std": 0.4619392454624176,
      "step": 1396
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0814732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2966.0,
      "completions/mean_length": 846.9766235351562,
      "completions/mean_terminated_length": 558.78857421875,
      "completions/min_length": 98.0,
      "completions/min_terminated_length": 98.0,
      "epoch": 13.055976676384839,
      "grad_norm": 0.15785396099090576,
      "learning_rate": 1e-06,
      "loss": -0.0572,
      "num_tokens": 815200586.0,
      "reward": 0.754464328289032,
      "reward_std": 0.1263321340084076,
      "rewards/verify_math_reward/mean": 0.7544642686843872,
      "rewards/verify_math_reward/std": 0.4306447505950928,
      "step": 1397
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0770089285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3236.0,
      "completions/mean_length": 838.9967041015625,
      "completions/mean_terminated_length": 567.2515258789062,
      "completions/min_length": 156.0,
      "completions/min_terminated_length": 156.0,
      "epoch": 13.06530612244898,
      "grad_norm": 0.1844933182001114,
      "learning_rate": 1e-06,
      "loss": -0.0308,
      "num_tokens": 815747591.0,
      "reward": 0.7812500596046448,
      "reward_std": 0.14556418359279633,
      "rewards/verify_math_reward/mean": 0.78125,
      "rewards/verify_math_reward/std": 0.41362953186035156,
      "step": 1398
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1127232142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2729.0,
      "completions/mean_length": 996.3449096679688,
      "completions/mean_terminated_length": 602.5521850585938,
      "completions/min_length": 178.0,
      "completions/min_terminated_length": 178.0,
      "epoch": 13.07463556851312,
      "grad_norm": 0.14525890350341797,
      "learning_rate": 1e-06,
      "loss": -0.0599,
      "num_tokens": 816315044.0,
      "reward": 0.7142857313156128,
      "reward_std": 0.12583360075950623,
      "rewards/verify_math_reward/mean": 0.7142857313156128,
      "rewards/verify_math_reward/std": 0.4520062506198883,
      "step": 1399
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1037946428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3853.0,
      "completions/mean_length": 975.450927734375,
      "completions/mean_terminated_length": 614.0422973632812,
      "completions/min_length": 173.0,
      "completions/min_terminated_length": 173.0,
      "epoch": 13.08396501457726,
      "grad_norm": 0.1412966400384903,
      "learning_rate": 1e-06,
      "loss": -0.0654,
      "num_tokens": 816905056.0,
      "reward": 0.699776828289032,
      "reward_std": 0.11283759027719498,
      "rewards/verify_math_reward/mean": 0.6997767686843872,
      "rewards/verify_math_reward/std": 0.4586109220981598,
      "step": 1400
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1037946428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3544.0,
      "completions/mean_length": 983.53466796875,
      "completions/mean_terminated_length": 623.062255859375,
      "completions/min_length": 126.0,
      "completions/min_terminated_length": 126.0,
      "epoch": 13.093294460641399,
      "grad_norm": 0.18408381938934326,
      "learning_rate": 1e-06,
      "loss": -0.0631,
      "num_tokens": 817487207.0,
      "reward": 0.691964328289032,
      "reward_std": 0.14045649766921997,
      "rewards/verify_math_reward/mean": 0.6919642686843872,
      "rewards/verify_math_reward/std": 0.4619392454624176,
      "step": 1401
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0837053571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3700.0,
      "completions/mean_length": 898.67529296875,
      "completions/mean_terminated_length": 606.5932006835938,
      "completions/min_length": 167.0,
      "completions/min_terminated_length": 167.0,
      "epoch": 13.102623906705539,
      "grad_norm": 0.16722357273101807,
      "learning_rate": 1e-06,
      "loss": -0.0437,
      "num_tokens": 818077444.0,
      "reward": 0.7332589626312256,
      "reward_std": 0.1280987709760666,
      "rewards/verify_math_reward/mean": 0.7332589030265808,
      "rewards/verify_math_reward/std": 0.4425028860569,
      "step": 1402
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1082589285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3756.0,
      "completions/mean_length": 969.7254638671875,
      "completions/mean_terminated_length": 590.1902465820312,
      "completions/min_length": 140.0,
      "completions/min_terminated_length": 140.0,
      "epoch": 13.11195335276968,
      "grad_norm": 0.17252375185489655,
      "learning_rate": 1e-06,
      "loss": -0.0413,
      "num_tokens": 818644366.0,
      "reward": 0.6551339626312256,
      "reward_std": 0.1385364830493927,
      "rewards/verify_math_reward/mean": 0.6551339030265808,
      "rewards/verify_math_reward/std": 0.4755900800228119,
      "step": 1403
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.140625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3850.0,
      "completions/mean_length": 1111.3773193359375,
      "completions/mean_terminated_length": 622.984375,
      "completions/min_length": 138.0,
      "completions/min_terminated_length": 138.0,
      "epoch": 13.12128279883382,
      "grad_norm": 0.16588982939720154,
      "learning_rate": 1e-06,
      "loss": -0.0521,
      "num_tokens": 819215584.0,
      "reward": 0.6741071939468384,
      "reward_std": 0.13387978076934814,
      "rewards/verify_math_reward/mean": 0.6741071343421936,
      "rewards/verify_math_reward/std": 0.4689692556858063,
      "step": 1404
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1551339285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2689.0,
      "completions/mean_length": 1221.2991943359375,
      "completions/mean_terminated_length": 693.4478149414062,
      "completions/min_length": 171.0,
      "completions/min_terminated_length": 171.0,
      "epoch": 13.130612244897959,
      "grad_norm": 0.14792756736278534,
      "learning_rate": 1e-06,
      "loss": -0.0558,
      "num_tokens": 819836444.0,
      "reward": 0.6026785969734192,
      "reward_std": 0.14838281273841858,
      "rewards/verify_math_reward/mean": 0.6026785969734192,
      "rewards/verify_math_reward/std": 0.48961687088012695,
      "step": 1405
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4055.0,
      "completions/mean_length": 996.1785888671875,
      "completions/mean_terminated_length": 645.7639770507812,
      "completions/min_length": 157.0,
      "completions/min_terminated_length": 157.0,
      "epoch": 13.139941690962099,
      "grad_norm": 0.15390346944332123,
      "learning_rate": 1e-06,
      "loss": -0.0838,
      "num_tokens": 820450428.0,
      "reward": 0.7031250596046448,
      "reward_std": 0.16604435443878174,
      "rewards/verify_math_reward/mean": 0.703125,
      "rewards/verify_math_reward/std": 0.4571361541748047,
      "step": 1406
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0837053571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3018.0,
      "completions/mean_length": 875.4486694335938,
      "completions/mean_terminated_length": 581.244873046875,
      "completions/min_length": 142.0,
      "completions/min_terminated_length": 142.0,
      "epoch": 13.14927113702624,
      "grad_norm": 0.17589004337787628,
      "learning_rate": 1e-06,
      "loss": -0.0609,
      "num_tokens": 821008510.0,
      "reward": 0.7366071939468384,
      "reward_std": 0.14838533103466034,
      "rewards/verify_math_reward/mean": 0.7366071343421936,
      "rewards/verify_math_reward/std": 0.44071969389915466,
      "step": 1407
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2914.0,
      "completions/mean_length": 1055.90966796875,
      "completions/mean_terminated_length": 621.6109619140625,
      "completions/min_length": 148.0,
      "completions/min_terminated_length": 148.0,
      "epoch": 13.15860058309038,
      "grad_norm": 0.15691091120243073,
      "learning_rate": 1e-06,
      "loss": -0.0646,
      "num_tokens": 821589757.0,
      "reward": 0.6662946939468384,
      "reward_std": 0.13410800695419312,
      "rewards/verify_math_reward/mean": 0.6662946343421936,
      "rewards/verify_math_reward/std": 0.47179922461509705,
      "step": 1408
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1026785714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3801.0,
      "completions/mean_length": 955.224365234375,
      "completions/mean_terminated_length": 595.8320922851562,
      "completions/min_length": 138.0,
      "completions/min_terminated_length": 138.0,
      "epoch": 13.167930029154519,
      "grad_norm": 0.14748361706733704,
      "learning_rate": 1e-06,
      "loss": -0.0478,
      "num_tokens": 822175398.0,
      "reward": 0.6339285969734192,
      "reward_std": 0.11144820600748062,
      "rewards/verify_math_reward/mean": 0.6339285969734192,
      "rewards/verify_math_reward/std": 0.48199835419654846,
      "step": 1409
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.09375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3712.0,
      "completions/mean_length": 907.6998291015625,
      "completions/mean_terminated_length": 577.8756103515625,
      "completions/min_length": 127.0,
      "completions/min_terminated_length": 127.0,
      "epoch": 13.177259475218658,
      "grad_norm": 0.14740511775016785,
      "learning_rate": 1e-06,
      "loss": -0.0557,
      "num_tokens": 822738833.0,
      "reward": 0.7812500596046448,
      "reward_std": 0.11257615685462952,
      "rewards/verify_math_reward/mean": 0.78125,
      "rewards/verify_math_reward/std": 0.41362953186035156,
      "step": 1410
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0837053571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3468.0,
      "completions/mean_length": 849.0692138671875,
      "completions/mean_terminated_length": 552.45556640625,
      "completions/min_length": 119.0,
      "completions/min_terminated_length": 119.0,
      "epoch": 13.186588921282798,
      "grad_norm": 0.17583271861076355,
      "learning_rate": 1e-06,
      "loss": -0.0719,
      "num_tokens": 823276783.0,
      "reward": 0.7845982313156128,
      "reward_std": 0.1518462747335434,
      "rewards/verify_math_reward/mean": 0.7845982313156128,
      "rewards/verify_math_reward/std": 0.4113304018974304,
      "step": 1411
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1183035714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3816.0,
      "completions/mean_length": 1038.404052734375,
      "completions/mean_terminated_length": 628.144287109375,
      "completions/min_length": 132.0,
      "completions/min_terminated_length": 132.0,
      "epoch": 13.19591836734694,
      "grad_norm": 0.17455369234085083,
      "learning_rate": 1e-06,
      "loss": -0.0501,
      "num_tokens": 823867865.0,
      "reward": 0.6573660969734192,
      "reward_std": 0.15567517280578613,
      "rewards/verify_math_reward/mean": 0.6573660969734192,
      "rewards/verify_math_reward/std": 0.47485533356666565,
      "step": 1412
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0982142857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3561.0,
      "completions/mean_length": 933.5859985351562,
      "completions/mean_terminated_length": 589.1646118164062,
      "completions/min_length": 133.0,
      "completions/min_terminated_length": 133.0,
      "epoch": 13.205247813411079,
      "grad_norm": 0.14539392292499542,
      "learning_rate": 1e-06,
      "loss": -0.0689,
      "num_tokens": 824433182.0,
      "reward": 0.691964328289032,
      "reward_std": 0.12418389320373535,
      "rewards/verify_math_reward/mean": 0.6919642686843872,
      "rewards/verify_math_reward/std": 0.4619392454624176,
      "step": 1413
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.15625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3761.0,
      "completions/mean_length": 1150.328125,
      "completions/mean_terminated_length": 604.8333129882812,
      "completions/min_length": 110.0,
      "completions/min_terminated_length": 110.0,
      "epoch": 13.214577259475218,
      "grad_norm": 0.17322084307670593,
      "learning_rate": 1e-06,
      "loss": -0.0611,
      "num_tokens": 824979132.0,
      "reward": 0.6517857313156128,
      "reward_std": 0.14079168438911438,
      "rewards/verify_math_reward/mean": 0.6517857313156128,
      "rewards/verify_math_reward/std": 0.47667041420936584,
      "step": 1414
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1283482142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2682.0,
      "completions/mean_length": 1060.2701416015625,
      "completions/mean_terminated_length": 613.2676391601562,
      "completions/min_length": 164.0,
      "completions/min_terminated_length": 164.0,
      "epoch": 13.223906705539358,
      "grad_norm": 0.1663852334022522,
      "learning_rate": 1e-06,
      "loss": -0.0528,
      "num_tokens": 825554590.0,
      "reward": 0.6808035969734192,
      "reward_std": 0.14458851516246796,
      "rewards/verify_math_reward/mean": 0.6808035969734192,
      "rewards/verify_math_reward/std": 0.4664256274700165,
      "step": 1415
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1049107142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3395.0,
      "completions/mean_length": 928.8516235351562,
      "completions/mean_terminated_length": 557.6396484375,
      "completions/min_length": 141.0,
      "completions/min_terminated_length": 141.0,
      "epoch": 13.2332361516035,
      "grad_norm": 0.17487259209156036,
      "learning_rate": 1e-06,
      "loss": -0.081,
      "num_tokens": 826097681.0,
      "reward": 0.6886160969734192,
      "reward_std": 0.14139604568481445,
      "rewards/verify_math_reward/mean": 0.6886160969734192,
      "rewards/verify_math_reward/std": 0.46331802010536194,
      "step": 1416
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0837053571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2846.0,
      "completions/mean_length": 908.1138916015625,
      "completions/mean_terminated_length": 616.89404296875,
      "completions/min_length": 150.0,
      "completions/min_terminated_length": 150.0,
      "epoch": 13.242565597667639,
      "grad_norm": 0.16165295243263245,
      "learning_rate": 1e-06,
      "loss": -0.0461,
      "num_tokens": 826684391.0,
      "reward": 0.7053571939468384,
      "reward_std": 0.13380561769008636,
      "rewards/verify_math_reward/mean": 0.7053571343421936,
      "rewards/verify_math_reward/std": 0.45613664388656616,
      "step": 1417
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0948660714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3473.0,
      "completions/mean_length": 953.974365234375,
      "completions/mean_terminated_length": 624.662109375,
      "completions/min_length": 153.0,
      "completions/min_terminated_length": 153.0,
      "epoch": 13.251895043731778,
      "grad_norm": 0.14920338988304138,
      "learning_rate": 1e-06,
      "loss": -0.0528,
      "num_tokens": 827284216.0,
      "reward": 0.7198660969734192,
      "reward_std": 0.13373075425624847,
      "rewards/verify_math_reward/mean": 0.7198660969734192,
      "rewards/verify_math_reward/std": 0.44931530952453613,
      "step": 1418
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0792410714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3311.0,
      "completions/mean_length": 935.3761596679688,
      "completions/mean_terminated_length": 663.3709106445312,
      "completions/min_length": 169.0,
      "completions/min_terminated_length": 169.0,
      "epoch": 13.261224489795918,
      "grad_norm": 0.21821512281894684,
      "learning_rate": 1e-06,
      "loss": -0.0246,
      "num_tokens": 827920321.0,
      "reward": 0.6774553656578064,
      "reward_std": 0.1482323855161667,
      "rewards/verify_math_reward/mean": 0.6774553656578064,
      "rewards/verify_math_reward/std": 0.4677111804485321,
      "step": 1419
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1194196428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3519.0,
      "completions/mean_length": 979.3527221679688,
      "completions/mean_terminated_length": 556.689453125,
      "completions/min_length": 124.0,
      "completions/min_terminated_length": 124.0,
      "epoch": 13.270553935860057,
      "grad_norm": 0.18146897852420807,
      "learning_rate": 1e-06,
      "loss": -0.0551,
      "num_tokens": 828454429.0,
      "reward": 0.7064732313156128,
      "reward_std": 0.1554897278547287,
      "rewards/verify_math_reward/mean": 0.7064732313156128,
      "rewards/verify_math_reward/std": 0.4556320011615753,
      "step": 1420
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0792410714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3690.0,
      "completions/mean_length": 849.8895263671875,
      "completions/mean_terminated_length": 570.5272827148438,
      "completions/min_length": 166.0,
      "completions/min_terminated_length": 166.0,
      "epoch": 13.279883381924199,
      "grad_norm": 0.1580151617527008,
      "learning_rate": 1e-06,
      "loss": -0.0229,
      "num_tokens": 829005962.0,
      "reward": 0.6830357313156128,
      "reward_std": 0.10554774850606918,
      "rewards/verify_math_reward/mean": 0.6830357313156128,
      "rewards/verify_math_reward/std": 0.46555325388908386,
      "step": 1421
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0770089285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4048.0,
      "completions/mean_length": 897.138427734375,
      "completions/mean_terminated_length": 630.2442626953125,
      "completions/min_length": 115.0,
      "completions/min_terminated_length": 115.0,
      "epoch": 13.289212827988338,
      "grad_norm": 0.1608230322599411,
      "learning_rate": 1e-06,
      "loss": -0.0536,
      "num_tokens": 829619246.0,
      "reward": 0.7176339626312256,
      "reward_std": 0.14771202206611633,
      "rewards/verify_math_reward/mean": 0.7176339030265808,
      "rewards/verify_math_reward/std": 0.4504019320011139,
      "step": 1422
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0870535714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3960.0,
      "completions/mean_length": 892.9810791015625,
      "completions/mean_terminated_length": 587.5587158203125,
      "completions/min_length": 134.0,
      "completions/min_terminated_length": 134.0,
      "epoch": 13.298542274052478,
      "grad_norm": 0.1555834412574768,
      "learning_rate": 1e-06,
      "loss": -0.0328,
      "num_tokens": 830189197.0,
      "reward": 0.7087053656578064,
      "reward_std": 0.12569092214107513,
      "rewards/verify_math_reward/mean": 0.7087053656578064,
      "rewards/verify_math_reward/std": 0.45461276173591614,
      "step": 1423
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1127232142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3872.0,
      "completions/mean_length": 1030.685302734375,
      "completions/mean_terminated_length": 641.2553100585938,
      "completions/min_length": 150.0,
      "completions/min_terminated_length": 150.0,
      "epoch": 13.307871720116617,
      "grad_norm": 0.15356653928756714,
      "learning_rate": 1e-06,
      "loss": -0.0609,
      "num_tokens": 830792419.0,
      "reward": 0.6718750596046448,
      "reward_std": 0.12456478923559189,
      "rewards/verify_math_reward/mean": 0.671875,
      "rewards/verify_math_reward/std": 0.46979284286499023,
      "step": 1424
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1071428571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3138.0,
      "completions/mean_length": 998.91748046875,
      "completions/mean_terminated_length": 627.2674560546875,
      "completions/min_length": 133.0,
      "completions/min_terminated_length": 133.0,
      "epoch": 13.317201166180759,
      "grad_norm": 0.17770841717720032,
      "learning_rate": 1e-06,
      "loss": -0.0504,
      "num_tokens": 831374457.0,
      "reward": 0.6417410969734192,
      "reward_std": 0.16908234357833862,
      "rewards/verify_math_reward/mean": 0.6417410969734192,
      "rewards/verify_math_reward/std": 0.47975659370422363,
      "step": 1425
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1138392857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3226.0,
      "completions/mean_length": 1007.7969360351562,
      "completions/mean_terminated_length": 611.0755615234375,
      "completions/min_length": 110.0,
      "completions/min_terminated_length": 110.0,
      "epoch": 13.326530612244898,
      "grad_norm": 0.14215829968452454,
      "learning_rate": 1e-06,
      "loss": -0.0463,
      "num_tokens": 831944227.0,
      "reward": 0.7265625596046448,
      "reward_std": 0.12651436030864716,
      "rewards/verify_math_reward/mean": 0.7265625,
      "rewards/verify_math_reward/std": 0.4459724426269531,
      "step": 1426
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3535.0,
      "completions/mean_length": 1112.1663818359375,
      "completions/mean_terminated_length": 685.904296875,
      "completions/min_length": 117.0,
      "completions/min_terminated_length": 117.0,
      "epoch": 13.335860058309038,
      "grad_norm": 0.17089352011680603,
      "learning_rate": 1e-06,
      "loss": -0.0852,
      "num_tokens": 832568840.0,
      "reward": 0.6696428656578064,
      "reward_std": 0.15852628648281097,
      "rewards/verify_math_reward/mean": 0.6696428656578064,
      "rewards/verify_math_reward/std": 0.47060438990592957,
      "step": 1427
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0725446428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3207.0,
      "completions/mean_length": 888.7422485351562,
      "completions/mean_terminated_length": 637.8736572265625,
      "completions/min_length": 118.0,
      "completions/min_terminated_length": 118.0,
      "epoch": 13.345189504373177,
      "grad_norm": 0.17929132282733917,
      "learning_rate": 1e-06,
      "loss": -0.0526,
      "num_tokens": 833187193.0,
      "reward": 0.6953125596046448,
      "reward_std": 0.15654322504997253,
      "rewards/verify_math_reward/mean": 0.6953125,
      "rewards/verify_math_reward/std": 0.4605320394039154,
      "step": 1428
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0993303571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2486.0,
      "completions/mean_length": 929.239990234375,
      "completions/mean_terminated_length": 579.9938354492188,
      "completions/min_length": 148.0,
      "completions/min_terminated_length": 148.0,
      "epoch": 13.354518950437317,
      "grad_norm": 0.1622866839170456,
      "learning_rate": 1e-06,
      "loss": -0.0746,
      "num_tokens": 833737184.0,
      "reward": 0.7500000596046448,
      "reward_std": 0.1112217977643013,
      "rewards/verify_math_reward/mean": 0.75,
      "rewards/verify_math_reward/std": 0.43325456976890564,
      "step": 1429
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1060267857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3710.0,
      "completions/mean_length": 965.6194458007812,
      "completions/mean_terminated_length": 594.3507690429688,
      "completions/min_length": 161.0,
      "completions/min_terminated_length": 161.0,
      "epoch": 13.363848396501458,
      "grad_norm": 0.1701904684305191,
      "learning_rate": 1e-06,
      "loss": -0.0371,
      "num_tokens": 834293323.0,
      "reward": 0.6785714626312256,
      "reward_std": 0.15390853583812714,
      "rewards/verify_math_reward/mean": 0.6785714030265808,
      "rewards/verify_math_reward/std": 0.46728572249412537,
      "step": 1430
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1004464285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2721.0,
      "completions/mean_length": 885.99560546875,
      "completions/mean_terminated_length": 527.5582885742188,
      "completions/min_length": 122.0,
      "completions/min_terminated_length": 122.0,
      "epoch": 13.373177842565598,
      "grad_norm": 0.12884975969791412,
      "learning_rate": 1e-06,
      "loss": -0.0665,
      "num_tokens": 834813631.0,
      "reward": 0.7399553656578064,
      "reward_std": 0.11498472094535828,
      "rewards/verify_math_reward/mean": 0.7399553656578064,
      "rewards/verify_math_reward/std": 0.43890365958213806,
      "step": 1431
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1138392857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3674.0,
      "completions/mean_length": 1021.4620971679688,
      "completions/mean_terminated_length": 626.4962158203125,
      "completions/min_length": 148.0,
      "completions/min_terminated_length": 148.0,
      "epoch": 13.382507288629737,
      "grad_norm": 0.1437826305627823,
      "learning_rate": 1e-06,
      "loss": -0.0391,
      "num_tokens": 835403077.0,
      "reward": 0.6618303656578064,
      "reward_std": 0.12726393342018127,
      "rewards/verify_math_reward/mean": 0.6618303656578064,
      "rewards/verify_math_reward/std": 0.4733508229255676,
      "step": 1432
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0970982142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3291.0,
      "completions/mean_length": 914.6741333007812,
      "completions/mean_terminated_length": 572.5537719726562,
      "completions/min_length": 170.0,
      "completions/min_terminated_length": 170.0,
      "epoch": 13.391836734693877,
      "grad_norm": 0.17380942404270172,
      "learning_rate": 1e-06,
      "loss": -0.0476,
      "num_tokens": 835960745.0,
      "reward": 0.7198660969734192,
      "reward_std": 0.15161871910095215,
      "rewards/verify_math_reward/mean": 0.7198660969734192,
      "rewards/verify_math_reward/std": 0.44931530952453613,
      "step": 1433
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0904017857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3736.0,
      "completions/mean_length": 881.3058471679688,
      "completions/mean_terminated_length": 561.80859375,
      "completions/min_length": 152.0,
      "completions/min_terminated_length": 152.0,
      "epoch": 13.401166180758018,
      "grad_norm": 0.15893049538135529,
      "learning_rate": 1e-06,
      "loss": -0.058,
      "num_tokens": 836510995.0,
      "reward": 0.738839328289032,
      "reward_std": 0.1144213080406189,
      "rewards/verify_math_reward/mean": 0.7388392686843872,
      "rewards/verify_math_reward/std": 0.439512699842453,
      "step": 1434
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0736607142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3607.0,
      "completions/mean_length": 854.0067138671875,
      "completions/mean_terminated_length": 596.2096557617188,
      "completions/min_length": 152.0,
      "completions/min_terminated_length": 152.0,
      "epoch": 13.410495626822158,
      "grad_norm": 0.14369086921215057,
      "learning_rate": 1e-06,
      "loss": -0.0541,
      "num_tokens": 837085057.0,
      "reward": 0.785714328289032,
      "reward_std": 0.14083515107631683,
      "rewards/verify_math_reward/mean": 0.7857142686843872,
      "rewards/verify_math_reward/std": 0.41055506467819214,
      "step": 1435
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1004464285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3175.0,
      "completions/mean_length": 951.8516235351562,
      "completions/mean_terminated_length": 600.7680053710938,
      "completions/min_length": 142.0,
      "completions/min_terminated_length": 142.0,
      "epoch": 13.419825072886297,
      "grad_norm": 0.1766524761915207,
      "learning_rate": 1e-06,
      "loss": -0.0494,
      "num_tokens": 837667964.0,
      "reward": 0.6986607313156128,
      "reward_std": 0.13075695931911469,
      "rewards/verify_math_reward/mean": 0.6986607313156128,
      "rewards/verify_math_reward/std": 0.4590960443019867,
      "step": 1436
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0915178571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3731.0,
      "completions/mean_length": 959.271240234375,
      "completions/mean_terminated_length": 643.2862548828125,
      "completions/min_length": 182.0,
      "completions/min_terminated_length": 182.0,
      "epoch": 13.429154518950437,
      "grad_norm": 0.1514643430709839,
      "learning_rate": 1e-06,
      "loss": -0.0578,
      "num_tokens": 838278415.0,
      "reward": 0.7020089626312256,
      "reward_std": 0.14560948312282562,
      "rewards/verify_math_reward/mean": 0.7020089030265808,
      "rewards/verify_math_reward/std": 0.45763099193573,
      "step": 1437
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1004464285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3858.0,
      "completions/mean_length": 929.825927734375,
      "completions/mean_terminated_length": 576.2828979492188,
      "completions/min_length": 147.0,
      "completions/min_terminated_length": 147.0,
      "epoch": 13.438483965014576,
      "grad_norm": 0.15670251846313477,
      "learning_rate": 1e-06,
      "loss": -0.0727,
      "num_tokens": 838834467.0,
      "reward": 0.7254464626312256,
      "reward_std": 0.1256481409072876,
      "rewards/verify_math_reward/mean": 0.7254464030265808,
      "rewards/verify_math_reward/std": 0.4465382993221283,
      "step": 1438
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1350446428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3390.0,
      "completions/mean_length": 1107.852783203125,
      "completions/mean_terminated_length": 641.316162109375,
      "completions/min_length": 139.0,
      "completions/min_terminated_length": 139.0,
      "epoch": 13.447813411078718,
      "grad_norm": 0.14905303716659546,
      "learning_rate": 1e-06,
      "loss": -0.0463,
      "num_tokens": 839427023.0,
      "reward": 0.65625,
      "reward_std": 0.1370062381029129,
      "rewards/verify_math_reward/mean": 0.65625,
      "rewards/verify_math_reward/std": 0.4752241373062134,
      "step": 1439
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.109375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3549.0,
      "completions/mean_length": 1020.1652221679688,
      "completions/mean_terminated_length": 642.4310913085938,
      "completions/min_length": 134.0,
      "completions/min_terminated_length": 134.0,
      "epoch": 13.457142857142857,
      "grad_norm": 0.1549973040819168,
      "learning_rate": 1e-06,
      "loss": -0.0441,
      "num_tokens": 840023035.0,
      "reward": 0.7343750596046448,
      "reward_std": 0.1150607094168663,
      "rewards/verify_math_reward/mean": 0.734375,
      "rewards/verify_math_reward/std": 0.44191211462020874,
      "step": 1440
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1305803571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3567.0,
      "completions/mean_length": 1083.302490234375,
      "completions/mean_terminated_length": 630.8177490234375,
      "completions/min_length": 149.0,
      "completions/min_terminated_length": 149.0,
      "epoch": 13.466472303206997,
      "grad_norm": 0.1533796489238739,
      "learning_rate": 1e-06,
      "loss": -0.0441,
      "num_tokens": 840606146.0,
      "reward": 0.6696428656578064,
      "reward_std": 0.1196403056383133,
      "rewards/verify_math_reward/mean": 0.6696428656578064,
      "rewards/verify_math_reward/std": 0.47060438990592957,
      "step": 1441
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1261160714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3679.0,
      "completions/mean_length": 1055.1429443359375,
      "completions/mean_terminated_length": 616.2962646484375,
      "completions/min_length": 127.0,
      "completions/min_terminated_length": 127.0,
      "epoch": 13.475801749271136,
      "grad_norm": 0.14904369413852692,
      "learning_rate": 1e-06,
      "loss": -0.0299,
      "num_tokens": 841175442.0,
      "reward": 0.6897321939468384,
      "reward_std": 0.11509279161691666,
      "rewards/verify_math_reward/mean": 0.6897321343421936,
      "rewards/verify_math_reward/std": 0.4628615975379944,
      "step": 1442
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1026785714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3493.0,
      "completions/mean_length": 964.5011596679688,
      "completions/mean_terminated_length": 606.17041015625,
      "completions/min_length": 129.0,
      "completions/min_terminated_length": 129.0,
      "epoch": 13.485131195335278,
      "grad_norm": 0.17566823959350586,
      "learning_rate": 1e-06,
      "loss": -0.0528,
      "num_tokens": 841762619.0,
      "reward": 0.6941964626312256,
      "reward_std": 0.12223109602928162,
      "rewards/verify_math_reward/mean": 0.6941964030265808,
      "rewards/verify_math_reward/std": 0.4610042870044708,
      "step": 1443
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1138392857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2613.0,
      "completions/mean_length": 997.9442138671875,
      "completions/mean_terminated_length": 599.9571533203125,
      "completions/min_length": 128.0,
      "completions/min_terminated_length": 128.0,
      "epoch": 13.494460641399417,
      "grad_norm": 0.16535988450050354,
      "learning_rate": 1e-06,
      "loss": -0.0717,
      "num_tokens": 842327073.0,
      "reward": 0.7354910969734192,
      "reward_std": 0.15297168493270874,
      "rewards/verify_math_reward/mean": 0.7354910969734192,
      "rewards/verify_math_reward/std": 0.44131770730018616,
      "step": 1444
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1082589285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3836.0,
      "completions/mean_length": 994.22998046875,
      "completions/mean_terminated_length": 617.6696166992188,
      "completions/min_length": 139.0,
      "completions/min_terminated_length": 139.0,
      "epoch": 13.503790087463557,
      "grad_norm": 0.14937201142311096,
      "learning_rate": 1e-06,
      "loss": -0.0528,
      "num_tokens": 842911535.0,
      "reward": 0.6986607313156128,
      "reward_std": 0.12324914336204529,
      "rewards/verify_math_reward/mean": 0.6986607313156128,
      "rewards/verify_math_reward/std": 0.4590960443019867,
      "step": 1445
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1216517857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3827.0,
      "completions/mean_length": 1036.6273193359375,
      "completions/mean_terminated_length": 612.9021606445312,
      "completions/min_length": 150.0,
      "completions/min_terminated_length": 150.0,
      "epoch": 13.513119533527696,
      "grad_norm": 0.1665908843278885,
      "learning_rate": 1e-06,
      "loss": -0.0476,
      "num_tokens": 843477753.0,
      "reward": 0.6484375,
      "reward_std": 0.10960599780082703,
      "rewards/verify_math_reward/mean": 0.6484375,
      "rewards/verify_math_reward/std": 0.4777248501777649,
      "step": 1446
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1272321428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3296.0,
      "completions/mean_length": 1044.5413818359375,
      "completions/mean_terminated_length": 599.699462890625,
      "completions/min_length": 156.0,
      "completions/min_terminated_length": 156.0,
      "epoch": 13.522448979591836,
      "grad_norm": 0.14764945209026337,
      "learning_rate": 1e-06,
      "loss": -0.0493,
      "num_tokens": 844048662.0,
      "reward": 0.6774553656578064,
      "reward_std": 0.11591833829879761,
      "rewards/verify_math_reward/mean": 0.6774553656578064,
      "rewards/verify_math_reward/std": 0.4677111804485321,
      "step": 1447
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1037946428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3980.0,
      "completions/mean_length": 967.3404541015625,
      "completions/mean_terminated_length": 604.9924926757812,
      "completions/min_length": 118.0,
      "completions/min_terminated_length": 118.0,
      "epoch": 13.531778425655977,
      "grad_norm": 0.12813781201839447,
      "learning_rate": 1e-06,
      "loss": -0.0178,
      "num_tokens": 844615471.0,
      "reward": 0.7176339626312256,
      "reward_std": 0.09453663229942322,
      "rewards/verify_math_reward/mean": 0.7176339030265808,
      "rewards/verify_math_reward/std": 0.4504019320011139,
      "step": 1448
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0892857142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3590.0,
      "completions/mean_length": 892.5636596679688,
      "completions/mean_terminated_length": 578.501220703125,
      "completions/min_length": 146.0,
      "completions/min_terminated_length": 146.0,
      "epoch": 13.541107871720117,
      "grad_norm": 0.13819770514965057,
      "learning_rate": 1e-06,
      "loss": -0.0672,
      "num_tokens": 845177208.0,
      "reward": 0.731026828289032,
      "reward_std": 0.11858721822500229,
      "rewards/verify_math_reward/mean": 0.7310267686843872,
      "rewards/verify_math_reward/std": 0.44367367029190063,
      "step": 1449
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1082589285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2395.0,
      "completions/mean_length": 965.0960083007812,
      "completions/mean_terminated_length": 584.998779296875,
      "completions/min_length": 120.0,
      "completions/min_terminated_length": 120.0,
      "epoch": 13.550437317784256,
      "grad_norm": 0.13451385498046875,
      "learning_rate": 1e-06,
      "loss": -0.0218,
      "num_tokens": 845745110.0,
      "reward": 0.6975446939468384,
      "reward_std": 0.09337659925222397,
      "rewards/verify_math_reward/mean": 0.6975446343421936,
      "rewards/verify_math_reward/std": 0.45957788825035095,
      "step": 1450
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1060267857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3837.0,
      "completions/mean_length": 968.864990234375,
      "completions/mean_terminated_length": 597.9812622070312,
      "completions/min_length": 158.0,
      "completions/min_terminated_length": 158.0,
      "epoch": 13.559766763848396,
      "grad_norm": 0.1683139204978943,
      "learning_rate": 1e-06,
      "loss": -0.0743,
      "num_tokens": 846304357.0,
      "reward": 0.7243303656578064,
      "reward_std": 0.1473347544670105,
      "rewards/verify_math_reward/mean": 0.7243303656578064,
      "rewards/verify_math_reward/std": 0.4471006691455841,
      "step": 1451
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1462053571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3013.0,
      "completions/mean_length": 1088.844970703125,
      "completions/mean_terminated_length": 573.8941650390625,
      "completions/min_length": 155.0,
      "completions/min_terminated_length": 155.0,
      "epoch": 13.569096209912537,
      "grad_norm": 0.19807444512844086,
      "learning_rate": 1e-06,
      "loss": -0.0701,
      "num_tokens": 846826618.0,
      "reward": 0.7243303656578064,
      "reward_std": 0.14060692489147186,
      "rewards/verify_math_reward/mean": 0.7243303656578064,
      "rewards/verify_math_reward/std": 0.4471006691455841,
      "step": 1452
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.109375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3370.0,
      "completions/mean_length": 969.638427734375,
      "completions/mean_terminated_length": 585.69921875,
      "completions/min_length": 129.0,
      "completions/min_terminated_length": 129.0,
      "epoch": 13.578425655976677,
      "grad_norm": 0.16037462651729584,
      "learning_rate": 1e-06,
      "loss": -0.0825,
      "num_tokens": 847380254.0,
      "reward": 0.7176339626312256,
      "reward_std": 0.14448042213916779,
      "rewards/verify_math_reward/mean": 0.7176339030265808,
      "rewards/verify_math_reward/std": 0.4504019320011139,
      "step": 1453
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.109375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3158.0,
      "completions/mean_length": 969.98779296875,
      "completions/mean_terminated_length": 586.0914916992188,
      "completions/min_length": 171.0,
      "completions/min_terminated_length": 171.0,
      "epoch": 13.587755102040816,
      "grad_norm": 0.15379683673381805,
      "learning_rate": 1e-06,
      "loss": -0.0293,
      "num_tokens": 847941467.0,
      "reward": 0.6886160969734192,
      "reward_std": 0.1311374455690384,
      "rewards/verify_math_reward/mean": 0.6886160969734192,
      "rewards/verify_math_reward/std": 0.46331802010536194,
      "step": 1454
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1116071428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3250.0,
      "completions/mean_length": 987.4688110351562,
      "completions/mean_terminated_length": 596.9497680664062,
      "completions/min_length": 131.0,
      "completions/min_terminated_length": 131.0,
      "epoch": 13.597084548104956,
      "grad_norm": 0.15186743438243866,
      "learning_rate": 1e-06,
      "loss": -0.0819,
      "num_tokens": 848512911.0,
      "reward": 0.6752232313156128,
      "reward_std": 0.14402256906032562,
      "rewards/verify_math_reward/mean": 0.6752232313156128,
      "rewards/verify_math_reward/std": 0.46855294704437256,
      "step": 1455
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1227678571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3168.0,
      "completions/mean_length": 1055.6082763671875,
      "completions/mean_terminated_length": 630.108154296875,
      "completions/min_length": 112.0,
      "completions/min_terminated_length": 112.0,
      "epoch": 13.606413994169095,
      "grad_norm": 0.15735474228858948,
      "learning_rate": 1e-06,
      "loss": -0.0273,
      "num_tokens": 849104552.0,
      "reward": 0.6696428656578064,
      "reward_std": 0.13842478394508362,
      "rewards/verify_math_reward/mean": 0.6696428656578064,
      "rewards/verify_math_reward/std": 0.47060438990592957,
      "step": 1456
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1428571428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4067.0,
      "completions/mean_length": 1121.7601318359375,
      "completions/mean_terminated_length": 626.0534057617188,
      "completions/min_length": 152.0,
      "completions/min_terminated_length": 152.0,
      "epoch": 13.615743440233237,
      "grad_norm": 0.20069833099842072,
      "learning_rate": 1e-06,
      "loss": -0.0616,
      "num_tokens": 849671857.0,
      "reward": 0.6651785969734192,
      "reward_std": 0.16506867110729218,
      "rewards/verify_math_reward/mean": 0.6651785969734192,
      "rewards/verify_math_reward/std": 0.47219157218933105,
      "step": 1457
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0647321428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3162.0,
      "completions/mean_length": 767.9263916015625,
      "completions/mean_terminated_length": 537.5823364257812,
      "completions/min_length": 124.0,
      "completions/min_terminated_length": 124.0,
      "epoch": 13.625072886297376,
      "grad_norm": 0.1525256186723709,
      "learning_rate": 1e-06,
      "loss": -0.0295,
      "num_tokens": 850211447.0,
      "reward": 0.7555803656578064,
      "reward_std": 0.10525421798229218,
      "rewards/verify_math_reward/mean": 0.7555803656578064,
      "rewards/verify_math_reward/std": 0.42998260259628296,
      "step": 1458
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1361607142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3788.0,
      "completions/mean_length": 1094.540283203125,
      "completions/mean_terminated_length": 621.44189453125,
      "completions/min_length": 147.0,
      "completions/min_terminated_length": 147.0,
      "epoch": 13.634402332361516,
      "grad_norm": 0.15941284596920013,
      "learning_rate": 1e-06,
      "loss": -0.0592,
      "num_tokens": 850788275.0,
      "reward": 0.6618303656578064,
      "reward_std": 0.14440374076366425,
      "rewards/verify_math_reward/mean": 0.6618303656578064,
      "rewards/verify_math_reward/std": 0.4733508229255676,
      "step": 1459
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1216517857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3863.0,
      "completions/mean_length": 1075.3148193359375,
      "completions/mean_terminated_length": 656.9478759765625,
      "completions/min_length": 129.0,
      "completions/min_terminated_length": 129.0,
      "epoch": 13.643731778425655,
      "grad_norm": 0.14771853387355804,
      "learning_rate": 1e-06,
      "loss": -0.0519,
      "num_tokens": 851400117.0,
      "reward": 0.6897321939468384,
      "reward_std": 0.14128021895885468,
      "rewards/verify_math_reward/mean": 0.6897321343421936,
      "rewards/verify_math_reward/std": 0.4628615975379944,
      "step": 1460
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1160714285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3597.0,
      "completions/mean_length": 1057.4029541015625,
      "completions/mean_terminated_length": 658.3952026367188,
      "completions/min_length": 133.0,
      "completions/min_terminated_length": 133.0,
      "epoch": 13.653061224489797,
      "grad_norm": 0.17699874937534332,
      "learning_rate": 1e-06,
      "loss": -0.0922,
      "num_tokens": 852002078.0,
      "reward": 0.7299107313156128,
      "reward_std": 0.1521807461977005,
      "rewards/verify_math_reward/mean": 0.7299107313156128,
      "rewards/verify_math_reward/std": 0.44425368309020996,
      "step": 1461
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1183035714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3692.0,
      "completions/mean_length": 1023.5156860351562,
      "completions/mean_terminated_length": 611.2582397460938,
      "completions/min_length": 165.0,
      "completions/min_terminated_length": 165.0,
      "epoch": 13.662390670553936,
      "grad_norm": 0.1677607297897339,
      "learning_rate": 1e-06,
      "loss": -0.0651,
      "num_tokens": 852576300.0,
      "reward": 0.707589328289032,
      "reward_std": 0.136213481426239,
      "rewards/verify_math_reward/mean": 0.7075892686843872,
      "rewards/verify_math_reward/std": 0.45512402057647705,
      "step": 1462
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1517857142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3914.0,
      "completions/mean_length": 1151.743408203125,
      "completions/mean_terminated_length": 624.8763427734375,
      "completions/min_length": 154.0,
      "completions/min_terminated_length": 154.0,
      "epoch": 13.671720116618076,
      "grad_norm": 0.18875309824943542,
      "learning_rate": 1e-06,
      "loss": -0.0868,
      "num_tokens": 853138646.0,
      "reward": 0.6618303656578064,
      "reward_std": 0.16078400611877441,
      "rewards/verify_math_reward/mean": 0.6618303656578064,
      "rewards/verify_math_reward/std": 0.4733508229255676,
      "step": 1463
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1428571428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3613.0,
      "completions/mean_length": 1097.15625,
      "completions/mean_terminated_length": 597.3489990234375,
      "completions/min_length": 127.0,
      "completions/min_terminated_length": 127.0,
      "epoch": 13.681049562682215,
      "grad_norm": 0.1729128062725067,
      "learning_rate": 1e-06,
      "loss": -0.0508,
      "num_tokens": 853682098.0,
      "reward": 0.6462053656578064,
      "reward_std": 0.11884935945272446,
      "rewards/verify_math_reward/mean": 0.6462053656578064,
      "rewards/verify_math_reward/std": 0.478413462638855,
      "step": 1464
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0904017857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2944.0,
      "completions/mean_length": 910.21435546875,
      "completions/mean_terminated_length": 593.5902099609375,
      "completions/min_length": 176.0,
      "completions/min_terminated_length": 176.0,
      "epoch": 13.690379008746355,
      "grad_norm": 0.18528950214385986,
      "learning_rate": 1e-06,
      "loss": -0.0693,
      "num_tokens": 854262314.0,
      "reward": 0.7042410969734192,
      "reward_std": 0.1732921302318573,
      "rewards/verify_math_reward/mean": 0.7042410969734192,
      "rewards/verify_math_reward/std": 0.45663803815841675,
      "step": 1465
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1205357142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3452.0,
      "completions/mean_length": 989.396240234375,
      "completions/mean_terminated_length": 563.6179809570312,
      "completions/min_length": 146.0,
      "completions/min_terminated_length": 146.0,
      "epoch": 13.699708454810496,
      "grad_norm": 0.13456867635250092,
      "learning_rate": 1e-06,
      "loss": -0.0538,
      "num_tokens": 854784813.0,
      "reward": 0.7522321939468384,
      "reward_std": 0.10498391091823578,
      "rewards/verify_math_reward/mean": 0.7522321343421936,
      "rewards/verify_math_reward/std": 0.4319573938846588,
      "step": 1466
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1138392857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3645.0,
      "completions/mean_length": 1025.8125,
      "completions/mean_terminated_length": 631.405517578125,
      "completions/min_length": 151.0,
      "completions/min_terminated_length": 151.0,
      "epoch": 13.709037900874636,
      "grad_norm": 0.1496770977973938,
      "learning_rate": 1e-06,
      "loss": -0.0423,
      "num_tokens": 855370133.0,
      "reward": 0.7031250596046448,
      "reward_std": 0.1021307036280632,
      "rewards/verify_math_reward/mean": 0.703125,
      "rewards/verify_math_reward/std": 0.4571361541748047,
      "step": 1467
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1294642857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3862.0,
      "completions/mean_length": 1059.364990234375,
      "completions/mean_terminated_length": 607.7628173828125,
      "completions/min_length": 149.0,
      "completions/min_terminated_length": 149.0,
      "epoch": 13.718367346938775,
      "grad_norm": 0.15127938985824585,
      "learning_rate": 1e-06,
      "loss": -0.0707,
      "num_tokens": 855945708.0,
      "reward": 0.6350446939468384,
      "reward_std": 0.12640699744224548,
      "rewards/verify_math_reward/mean": 0.6350446343421936,
      "rewards/verify_math_reward/std": 0.4816865026950836,
      "step": 1468
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1183035714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3119.0,
      "completions/mean_length": 1113.1942138671875,
      "completions/mean_terminated_length": 712.9696044921875,
      "completions/min_length": 150.0,
      "completions/min_terminated_length": 150.0,
      "epoch": 13.727696793002915,
      "grad_norm": 0.14797398447990417,
      "learning_rate": 1e-06,
      "loss": -0.0609,
      "num_tokens": 856599562.0,
      "reward": 0.6473214626312256,
      "reward_std": 0.12527835369110107,
      "rewards/verify_math_reward/mean": 0.6473214030265808,
      "rewards/verify_math_reward/std": 0.47807058691978455,
      "step": 1469
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1316964285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3671.0,
      "completions/mean_length": 1066.50341796875,
      "completions/mean_terminated_length": 607.0167236328125,
      "completions/min_length": 168.0,
      "completions/min_terminated_length": 168.0,
      "epoch": 13.737026239067056,
      "grad_norm": 0.16321104764938354,
      "learning_rate": 1e-06,
      "loss": -0.0891,
      "num_tokens": 857151533.0,
      "reward": 0.7098214626312256,
      "reward_std": 0.1455295979976654,
      "rewards/verify_math_reward/mean": 0.7098214030265808,
      "rewards/verify_math_reward/std": 0.454098105430603,
      "step": 1470
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1339285714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4055.0,
      "completions/mean_length": 1056.72998046875,
      "completions/mean_terminated_length": 586.7396850585938,
      "completions/min_length": 125.0,
      "completions/min_terminated_length": 125.0,
      "epoch": 13.746355685131196,
      "grad_norm": 0.17276650667190552,
      "learning_rate": 1e-06,
      "loss": -0.0828,
      "num_tokens": 857695179.0,
      "reward": 0.7087053656578064,
      "reward_std": 0.14974148571491241,
      "rewards/verify_math_reward/mean": 0.7087053656578064,
      "rewards/verify_math_reward/std": 0.45461276173591614,
      "step": 1471
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1328125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3638.0,
      "completions/mean_length": 1088.8538818359375,
      "completions/mean_terminated_length": 628.2998657226562,
      "completions/min_length": 168.0,
      "completions/min_terminated_length": 168.0,
      "epoch": 13.755685131195335,
      "grad_norm": 0.14320360124111176,
      "learning_rate": 1e-06,
      "loss": -0.0908,
      "num_tokens": 858282112.0,
      "reward": 0.7020089626312256,
      "reward_std": 0.13192880153656006,
      "rewards/verify_math_reward/mean": 0.7020089030265808,
      "rewards/verify_math_reward/std": 0.45763102173805237,
      "step": 1472
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1462053571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3606.0,
      "completions/mean_length": 1129.376220703125,
      "completions/mean_terminated_length": 621.3660278320312,
      "completions/min_length": 167.0,
      "completions/min_terminated_length": 167.0,
      "epoch": 13.765014577259475,
      "grad_norm": 0.18304169178009033,
      "learning_rate": 1e-06,
      "loss": -0.0561,
      "num_tokens": 858841449.0,
      "reward": 0.6729910969734192,
      "reward_std": 0.14327581226825714,
      "rewards/verify_math_reward/mean": 0.6729910969734192,
      "rewards/verify_math_reward/std": 0.46938255429267883,
      "step": 1473
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1607142857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4082.0,
      "completions/mean_length": 1172.068115234375,
      "completions/mean_terminated_length": 612.1661987304688,
      "completions/min_length": 156.0,
      "completions/min_terminated_length": 156.0,
      "epoch": 13.774344023323614,
      "grad_norm": 0.16333621740341187,
      "learning_rate": 1e-06,
      "loss": -0.0835,
      "num_tokens": 859399702.0,
      "reward": 0.6551339626312256,
      "reward_std": 0.1410187929868698,
      "rewards/verify_math_reward/mean": 0.6551339030265808,
      "rewards/verify_math_reward/std": 0.4755900800228119,
      "step": 1474
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.140625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2577.0,
      "completions/mean_length": 1155.44873046875,
      "completions/mean_terminated_length": 674.2675170898438,
      "completions/min_length": 169.0,
      "completions/min_terminated_length": 169.0,
      "epoch": 13.783673469387756,
      "grad_norm": 0.16109581291675568,
      "learning_rate": 1e-06,
      "loss": -0.0839,
      "num_tokens": 860012632.0,
      "reward": 0.6283482313156128,
      "reward_std": 0.15793219208717346,
      "rewards/verify_math_reward/mean": 0.6283482313156128,
      "rewards/verify_math_reward/std": 0.4835159182548523,
      "step": 1475
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0926339285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3238.0,
      "completions/mean_length": 882.7969360351562,
      "completions/mean_terminated_length": 554.7576904296875,
      "completions/min_length": 172.0,
      "completions/min_terminated_length": 172.0,
      "epoch": 13.793002915451895,
      "grad_norm": 0.1812966763973236,
      "learning_rate": 1e-06,
      "loss": -0.0665,
      "num_tokens": 860560178.0,
      "reward": 0.7455357313156128,
      "reward_std": 0.13947714865207672,
      "rewards/verify_math_reward/mean": 0.7455357313156128,
      "rewards/verify_math_reward/std": 0.4358029067516327,
      "step": 1476
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1517857142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3735.0,
      "completions/mean_length": 1202.2154541015625,
      "completions/mean_terminated_length": 684.3803100585938,
      "completions/min_length": 144.0,
      "completions/min_terminated_length": 144.0,
      "epoch": 13.802332361516035,
      "grad_norm": 0.14447076618671417,
      "learning_rate": 1e-06,
      "loss": -0.0794,
      "num_tokens": 861175619.0,
      "reward": 0.6428571939468384,
      "reward_std": 0.1272311508655548,
      "rewards/verify_math_reward/mean": 0.6428571343421936,
      "rewards/verify_math_reward/std": 0.4794250428676605,
      "step": 1477
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1305803571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3861.0,
      "completions/mean_length": 1061.063720703125,
      "completions/mean_terminated_length": 605.23876953125,
      "completions/min_length": 161.0,
      "completions/min_terminated_length": 161.0,
      "epoch": 13.811661807580174,
      "grad_norm": 0.16366831958293915,
      "learning_rate": 1e-06,
      "loss": -0.0854,
      "num_tokens": 861725540.0,
      "reward": 0.7399553656578064,
      "reward_std": 0.1310618817806244,
      "rewards/verify_math_reward/mean": 0.7399553656578064,
      "rewards/verify_math_reward/std": 0.43890365958213806,
      "step": 1478
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1462053571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2612.0,
      "completions/mean_length": 1131.1060791015625,
      "completions/mean_terminated_length": 623.3921508789062,
      "completions/min_length": 180.0,
      "completions/min_terminated_length": 180.0,
      "epoch": 13.820991253644316,
      "grad_norm": 0.16854460537433624,
      "learning_rate": 1e-06,
      "loss": -0.0516,
      "num_tokens": 862289971.0,
      "reward": 0.6417410969734192,
      "reward_std": 0.13921934366226196,
      "rewards/verify_math_reward/mean": 0.6417410969734192,
      "rewards/verify_math_reward/std": 0.47975659370422363,
      "step": 1479
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1138392857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2919.0,
      "completions/mean_length": 976.02685546875,
      "completions/mean_terminated_length": 575.2241821289062,
      "completions/min_length": 146.0,
      "completions/min_terminated_length": 146.0,
      "epoch": 13.830320699708455,
      "grad_norm": 0.1460832953453064,
      "learning_rate": 1e-06,
      "loss": -0.055,
      "num_tokens": 862831107.0,
      "reward": 0.7566964626312256,
      "reward_std": 0.11126276105642319,
      "rewards/verify_math_reward/mean": 0.7566964030265808,
      "rewards/verify_math_reward/std": 0.4293164908885956,
      "step": 1480
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0993303571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3637.0,
      "completions/mean_length": 972.7422485351562,
      "completions/mean_terminated_length": 628.293701171875,
      "completions/min_length": 137.0,
      "completions/min_terminated_length": 137.0,
      "epoch": 13.839650145772595,
      "grad_norm": 0.1542253941297531,
      "learning_rate": 1e-06,
      "loss": -0.0643,
      "num_tokens": 863420932.0,
      "reward": 0.7399553656578064,
      "reward_std": 0.1373753398656845,
      "rewards/verify_math_reward/mean": 0.7399553656578064,
      "rewards/verify_math_reward/std": 0.43890365958213806,
      "step": 1481
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.2243303571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3626.0,
      "completions/mean_length": 1476.0592041015625,
      "completions/mean_terminated_length": 718.349609375,
      "completions/min_length": 99.0,
      "completions/min_terminated_length": 99.0,
      "epoch": 13.848979591836734,
      "grad_norm": 0.1680951565504074,
      "learning_rate": 1e-06,
      "loss": -0.1322,
      "num_tokens": 864019969.0,
      "reward": 0.5792410969734192,
      "reward_std": 0.14060552418231964,
      "rewards/verify_math_reward/mean": 0.5792410969734192,
      "rewards/verify_math_reward/std": 0.49395665526390076,
      "step": 1482
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1316964285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3645.0,
      "completions/mean_length": 1083.993408203125,
      "completions/mean_terminated_length": 627.1593627929688,
      "completions/min_length": 144.0,
      "completions/min_terminated_length": 144.0,
      "epoch": 13.858309037900874,
      "grad_norm": 0.17295648157596588,
      "learning_rate": 1e-06,
      "loss": -0.0516,
      "num_tokens": 864600995.0,
      "reward": 0.6473214626312256,
      "reward_std": 0.14128021895885468,
      "rewards/verify_math_reward/mean": 0.6473214030265808,
      "rewards/verify_math_reward/std": 0.47807058691978455,
      "step": 1483
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1060267857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3003.0,
      "completions/mean_length": 989.58935546875,
      "completions/mean_terminated_length": 621.1635131835938,
      "completions/min_length": 143.0,
      "completions/min_terminated_length": 143.0,
      "epoch": 13.867638483965015,
      "grad_norm": 0.14881660044193268,
      "learning_rate": 1e-06,
      "loss": -0.0691,
      "num_tokens": 865188115.0,
      "reward": 0.7042410969734192,
      "reward_std": 0.11486707627773285,
      "rewards/verify_math_reward/mean": 0.7042410969734192,
      "rewards/verify_math_reward/std": 0.45663803815841675,
      "step": 1484
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1171875,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3379.0,
      "completions/mean_length": 1001.87841796875,
      "completions/mean_terminated_length": 591.1542358398438,
      "completions/min_length": 126.0,
      "completions/min_terminated_length": 126.0,
      "epoch": 13.876967930029155,
      "grad_norm": 0.1812649667263031,
      "learning_rate": 1e-06,
      "loss": -0.0522,
      "num_tokens": 865737118.0,
      "reward": 0.7020089626312256,
      "reward_std": 0.13775509595870972,
      "rewards/verify_math_reward/mean": 0.7020089030265808,
      "rewards/verify_math_reward/std": 0.45763099193573,
      "step": 1485
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1796875,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3854.0,
      "completions/mean_length": 1271.9398193359375,
      "completions/mean_terminated_length": 653.3360595703125,
      "completions/min_length": 125.0,
      "completions/min_terminated_length": 125.0,
      "epoch": 13.886297376093294,
      "grad_norm": 0.16636329889297485,
      "learning_rate": 1e-06,
      "loss": -0.0691,
      "num_tokens": 866304232.0,
      "reward": 0.6305803656578064,
      "reward_std": 0.12260904908180237,
      "rewards/verify_math_reward/mean": 0.6305803656578064,
      "rewards/verify_math_reward/std": 0.4829172194004059,
      "step": 1486
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1238839285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3936.0,
      "completions/mean_length": 1036.560302734375,
      "completions/mean_terminated_length": 603.9515991210938,
      "completions/min_length": 105.0,
      "completions/min_terminated_length": 105.0,
      "epoch": 13.895626822157434,
      "grad_norm": 0.1734764277935028,
      "learning_rate": 1e-06,
      "loss": -0.0529,
      "num_tokens": 866867702.0,
      "reward": 0.6808035969734192,
      "reward_std": 0.1443263590335846,
      "rewards/verify_math_reward/mean": 0.6808035969734192,
      "rewards/verify_math_reward/std": 0.4664256274700165,
      "step": 1487
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1037946428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3216.0,
      "completions/mean_length": 960.5692138671875,
      "completions/mean_terminated_length": 597.4370727539062,
      "completions/min_length": 135.0,
      "completions/min_terminated_length": 135.0,
      "epoch": 13.904956268221575,
      "grad_norm": 0.12487823516130447,
      "learning_rate": 1e-06,
      "loss": -0.0207,
      "num_tokens": 867425180.0,
      "reward": 0.7433035969734192,
      "reward_std": 0.0913478210568428,
      "rewards/verify_math_reward/mean": 0.7433035969734192,
      "rewards/verify_math_reward/std": 0.43705442547798157,
      "step": 1488
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1049107142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3535.0,
      "completions/mean_length": 977.3616333007812,
      "completions/mean_terminated_length": 611.83544921875,
      "completions/min_length": 148.0,
      "completions/min_terminated_length": 148.0,
      "epoch": 13.914285714285715,
      "grad_norm": 0.17821316421031952,
      "learning_rate": 1e-06,
      "loss": -0.0811,
      "num_tokens": 868010784.0,
      "reward": 0.7031250596046448,
      "reward_std": 0.1578880101442337,
      "rewards/verify_math_reward/mean": 0.703125,
      "rewards/verify_math_reward/std": 0.4571361541748047,
      "step": 1489
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1383928571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2722.0,
      "completions/mean_length": 1081.360595703125,
      "completions/mean_terminated_length": 597.1437377929688,
      "completions/min_length": 134.0,
      "completions/min_terminated_length": 134.0,
      "epoch": 13.923615160349854,
      "grad_norm": 0.17223955690860748,
      "learning_rate": 1e-06,
      "loss": -0.0922,
      "num_tokens": 868560179.0,
      "reward": 0.7142857313156128,
      "reward_std": 0.13771234452724457,
      "rewards/verify_math_reward/mean": 0.7142857313156128,
      "rewards/verify_math_reward/std": 0.4520062506198883,
      "step": 1490
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1339285714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3865.0,
      "completions/mean_length": 1132.7254638671875,
      "completions/mean_terminated_length": 674.4871215820312,
      "completions/min_length": 183.0,
      "completions/min_terminated_length": 183.0,
      "epoch": 13.932944606413994,
      "grad_norm": 0.16000701487064362,
      "learning_rate": 1e-06,
      "loss": -0.0865,
      "num_tokens": 869170829.0,
      "reward": 0.640625,
      "reward_std": 0.15995843708515167,
      "rewards/verify_math_reward/mean": 0.640625,
      "rewards/verify_math_reward/std": 0.48008525371551514,
      "step": 1491
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1450892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2537.0,
      "completions/mean_length": 1114.935302734375,
      "completions/mean_terminated_length": 609.0104370117188,
      "completions/min_length": 161.0,
      "completions/min_terminated_length": 161.0,
      "epoch": 13.942274052478133,
      "grad_norm": 0.172801673412323,
      "learning_rate": 1e-06,
      "loss": -0.1032,
      "num_tokens": 869726499.0,
      "reward": 0.6484375,
      "reward_std": 0.155143141746521,
      "rewards/verify_math_reward/mean": 0.6484375,
      "rewards/verify_math_reward/std": 0.4777248501777649,
      "step": 1492
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0881696428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3448.0,
      "completions/mean_length": 908.5167846679688,
      "completions/mean_terminated_length": 600.3023071289062,
      "completions/min_length": 145.0,
      "completions/min_terminated_length": 145.0,
      "epoch": 13.951603498542275,
      "grad_norm": 0.13795799016952515,
      "learning_rate": 1e-06,
      "loss": -0.059,
      "num_tokens": 870304250.0,
      "reward": 0.7265625596046448,
      "reward_std": 0.10551635921001434,
      "rewards/verify_math_reward/mean": 0.7265625,
      "rewards/verify_math_reward/std": 0.4459724426269531,
      "step": 1493
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.15625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3150.0,
      "completions/mean_length": 1200.2467041015625,
      "completions/mean_terminated_length": 663.9960327148438,
      "completions/min_length": 149.0,
      "completions/min_terminated_length": 149.0,
      "epoch": 13.960932944606414,
      "grad_norm": 0.14509567618370056,
      "learning_rate": 1e-06,
      "loss": -0.0742,
      "num_tokens": 870902495.0,
      "reward": 0.6629464626312256,
      "reward_std": 0.11727311462163925,
      "rewards/verify_math_reward/mean": 0.6629464030265808,
      "rewards/verify_math_reward/std": 0.47296738624572754,
      "step": 1494
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1685267857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3320.0,
      "completions/mean_length": 1239.3248291015625,
      "completions/mean_terminated_length": 660.3208618164062,
      "completions/min_length": 147.0,
      "completions/min_terminated_length": 147.0,
      "epoch": 13.970262390670554,
      "grad_norm": 0.182939812541008,
      "learning_rate": 1e-06,
      "loss": -0.0798,
      "num_tokens": 871475522.0,
      "reward": 0.625,
      "reward_std": 0.13685287535190582,
      "rewards/verify_math_reward/mean": 0.625,
      "rewards/verify_math_reward/std": 0.48439329862594604,
      "step": 1495
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1171875,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3149.0,
      "completions/mean_length": 982.6741333007812,
      "completions/mean_terminated_length": 569.4007568359375,
      "completions/min_length": 104.0,
      "completions/min_terminated_length": 104.0,
      "epoch": 13.979591836734693,
      "grad_norm": 0.18029490113258362,
      "learning_rate": 1e-06,
      "loss": -0.0241,
      "num_tokens": 872011118.0,
      "reward": 0.715401828289032,
      "reward_std": 0.12076866626739502,
      "rewards/verify_math_reward/mean": 0.7154017686843872,
      "rewards/verify_math_reward/std": 0.4514748752117157,
      "step": 1496
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1328125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3952.0,
      "completions/mean_length": 1170.0301513671875,
      "completions/mean_terminated_length": 721.9086303710938,
      "completions/min_length": 166.0,
      "completions/min_terminated_length": 166.0,
      "epoch": 13.988921282798835,
      "grad_norm": 0.1886465698480606,
      "learning_rate": 1e-06,
      "loss": -0.0671,
      "num_tokens": 872665089.0,
      "reward": 0.640625,
      "reward_std": 0.18220455944538116,
      "rewards/verify_math_reward/mean": 0.640625,
      "rewards/verify_math_reward/std": 0.48008525371551514,
      "step": 1497
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.16193181818181823,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3782.0,
      "completions/mean_length": 1186.0994873046875,
      "completions/mean_terminated_length": 623.8474731445312,
      "completions/min_length": 195.0,
      "completions/min_terminated_length": 195.0,
      "epoch": 13.998250728862974,
      "grad_norm": 0.13559775054454803,
      "learning_rate": 1e-06,
      "loss": -0.075,
      "num_tokens": 873257060.0,
      "reward": 0.7254464626312256,
      "reward_std": 0.12050722539424896,
      "rewards/verify_math_reward/mean": 0.7254464030265808,
      "rewards/verify_math_reward/std": 0.4465382993221283,
      "step": 1498
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1618303571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3601.0,
      "completions/mean_length": 1204.75341796875,
      "completions/mean_terminated_length": 646.5232543945312,
      "completions/min_length": 136.0,
      "completions/min_terminated_length": 136.0,
      "epoch": 14.00932944606414,
      "grad_norm": 0.16991333663463593,
      "learning_rate": 1e-06,
      "loss": -0.0815,
      "num_tokens": 873830511.0,
      "reward": 0.6506696939468384,
      "reward_std": 0.15500116348266602,
      "rewards/verify_math_reward/mean": 0.6506696343421936,
      "rewards/verify_math_reward/std": 0.47702476382255554,
      "step": 1499
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1049107142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3950.0,
      "completions/mean_length": 1000.825927734375,
      "completions/mean_terminated_length": 638.0499267578125,
      "completions/min_length": 136.0,
      "completions/min_terminated_length": 136.0,
      "epoch": 14.018658892128279,
      "grad_norm": 0.15517744421958923,
      "learning_rate": 1e-06,
      "loss": -0.0723,
      "num_tokens": 874437915.0,
      "reward": 0.7399553656578064,
      "reward_std": 0.15308158099651337,
      "rewards/verify_math_reward/mean": 0.7399553656578064,
      "rewards/verify_math_reward/std": 0.43890365958213806,
      "step": 1500
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.140625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3814.0,
      "completions/mean_length": 1165.8895263671875,
      "completions/mean_terminated_length": 686.4168701171875,
      "completions/min_length": 172.0,
      "completions/min_terminated_length": 172.0,
      "epoch": 14.02798833819242,
      "grad_norm": 0.1750682294368744,
      "learning_rate": 1e-06,
      "loss": -0.0783,
      "num_tokens": 875064984.0,
      "reward": 0.6082589626312256,
      "reward_std": 0.14733335375785828,
      "rewards/verify_math_reward/mean": 0.6082589030265808,
      "rewards/verify_math_reward/std": 0.48841196298599243,
      "step": 1501
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1194196428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3283.0,
      "completions/mean_length": 1093.552490234375,
      "completions/mean_terminated_length": 686.3764038085938,
      "completions/min_length": 101.0,
      "completions/min_terminated_length": 101.0,
      "epoch": 14.03731778425656,
      "grad_norm": 0.18799026310443878,
      "learning_rate": 1e-06,
      "loss": -0.0757,
      "num_tokens": 875687663.0,
      "reward": 0.7053571939468384,
      "reward_std": 0.18655773997306824,
      "rewards/verify_math_reward/mean": 0.7053571343421936,
      "rewards/verify_math_reward/std": 0.45613667368888855,
      "step": 1502
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0725446428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3575.0,
      "completions/mean_length": 905.5491333007812,
      "completions/mean_terminated_length": 655.9951782226562,
      "completions/min_length": 175.0,
      "completions/min_terminated_length": 175.0,
      "epoch": 14.0466472303207,
      "grad_norm": 0.1436869353055954,
      "learning_rate": 1e-06,
      "loss": -0.0244,
      "num_tokens": 876322875.0,
      "reward": 0.7433035969734192,
      "reward_std": 0.1352698653936386,
      "rewards/verify_math_reward/mean": 0.7433035969734192,
      "rewards/verify_math_reward/std": 0.43705442547798157,
      "step": 1503
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0959821428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3835.0,
      "completions/mean_length": 970.1395263671875,
      "completions/mean_terminated_length": 638.258056640625,
      "completions/min_length": 129.0,
      "completions/min_terminated_length": 129.0,
      "epoch": 14.055976676384839,
      "grad_norm": 0.16448047757148743,
      "learning_rate": 1e-06,
      "loss": -0.0412,
      "num_tokens": 876925024.0,
      "reward": 0.6886160969734192,
      "reward_std": 0.14556489884853363,
      "rewards/verify_math_reward/mean": 0.6886160969734192,
      "rewards/verify_math_reward/std": 0.46331802010536194,
      "step": 1504
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0959821428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3696.0,
      "completions/mean_length": 993.7623291015625,
      "completions/mean_terminated_length": 664.388916015625,
      "completions/min_length": 155.0,
      "completions/min_terminated_length": 155.0,
      "epoch": 14.06530612244898,
      "grad_norm": 0.1584680825471878,
      "learning_rate": 1e-06,
      "loss": -0.0469,
      "num_tokens": 877550619.0,
      "reward": 0.6852678656578064,
      "reward_std": 0.12084353715181351,
      "rewards/verify_math_reward/mean": 0.6852678656578064,
      "rewards/verify_math_reward/std": 0.46466848254203796,
      "step": 1505
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1316964285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3945.0,
      "completions/mean_length": 1075.9654541015625,
      "completions/mean_terminated_length": 617.9138793945312,
      "completions/min_length": 200.0,
      "completions/min_terminated_length": 200.0,
      "epoch": 14.07463556851312,
      "grad_norm": 0.15366147458553314,
      "learning_rate": 1e-06,
      "loss": -0.0471,
      "num_tokens": 878126124.0,
      "reward": 0.6875000596046448,
      "reward_std": 0.10081617534160614,
      "rewards/verify_math_reward/mean": 0.6875,
      "rewards/verify_math_reward/std": 0.4637712836265564,
      "step": 1506
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1450892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3310.0,
      "completions/mean_length": 1155.24560546875,
      "completions/mean_terminated_length": 656.161865234375,
      "completions/min_length": 109.0,
      "completions/min_terminated_length": 109.0,
      "epoch": 14.08396501457726,
      "grad_norm": 0.20653975009918213,
      "learning_rate": 1e-06,
      "loss": -0.0896,
      "num_tokens": 878716920.0,
      "reward": 0.699776828289032,
      "reward_std": 0.1604897677898407,
      "rewards/verify_math_reward/mean": 0.6997767686843872,
      "rewards/verify_math_reward/std": 0.4586109220981598,
      "step": 1507
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1227678571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3942.0,
      "completions/mean_length": 1037.23779296875,
      "completions/mean_terminated_length": 609.1666870117188,
      "completions/min_length": 166.0,
      "completions/min_terminated_length": 166.0,
      "epoch": 14.093294460641399,
      "grad_norm": 0.17542661726474762,
      "learning_rate": 1e-06,
      "loss": -0.0817,
      "num_tokens": 879283629.0,
      "reward": 0.7254464626312256,
      "reward_std": 0.11956332623958588,
      "rewards/verify_math_reward/mean": 0.7254464030265808,
      "rewards/verify_math_reward/std": 0.4465382993221283,
      "step": 1508
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1328125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3865.0,
      "completions/mean_length": 1094.8460693359375,
      "completions/mean_terminated_length": 635.2097778320312,
      "completions/min_length": 171.0,
      "completions/min_terminated_length": 171.0,
      "epoch": 14.102623906705539,
      "grad_norm": 0.15363480150699615,
      "learning_rate": 1e-06,
      "loss": -0.0577,
      "num_tokens": 879868619.0,
      "reward": 0.723214328289032,
      "reward_std": 0.12283728271722794,
      "rewards/verify_math_reward/mean": 0.7232142686843872,
      "rewards/verify_math_reward/std": 0.44765952229499817,
      "step": 1509
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0993303571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4085.0,
      "completions/mean_length": 952.7857666015625,
      "completions/mean_terminated_length": 606.1362915039062,
      "completions/min_length": 154.0,
      "completions/min_terminated_length": 154.0,
      "epoch": 14.11195335276968,
      "grad_norm": 0.1422143131494522,
      "learning_rate": 1e-06,
      "loss": -0.0893,
      "num_tokens": 880440003.0,
      "reward": 0.7566964626312256,
      "reward_std": 0.13737492263317108,
      "rewards/verify_math_reward/mean": 0.7566964030265808,
      "rewards/verify_math_reward/std": 0.4293164908885956,
      "step": 1510
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1395089285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4035.0,
      "completions/mean_length": 1175.766845703125,
      "completions/mean_terminated_length": 702.3177490234375,
      "completions/min_length": 140.0,
      "completions/min_terminated_length": 140.0,
      "epoch": 14.12128279883382,
      "grad_norm": 0.16948352754116058,
      "learning_rate": 1e-06,
      "loss": -0.0889,
      "num_tokens": 881065042.0,
      "reward": 0.6696428656578064,
      "reward_std": 0.14553099870681763,
      "rewards/verify_math_reward/mean": 0.6696428656578064,
      "rewards/verify_math_reward/std": 0.47060438990592957,
      "step": 1511
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0770089285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3901.0,
      "completions/mean_length": 874.0279541015625,
      "completions/mean_terminated_length": 605.20556640625,
      "completions/min_length": 157.0,
      "completions/min_terminated_length": 157.0,
      "epoch": 14.130612244897959,
      "grad_norm": 0.12992741167545319,
      "learning_rate": 1e-06,
      "loss": -0.0284,
      "num_tokens": 881645867.0,
      "reward": 0.770089328289032,
      "reward_std": 0.09897328168153763,
      "rewards/verify_math_reward/mean": 0.7700892686843872,
      "rewards/verify_math_reward/std": 0.42101022601127625,
      "step": 1512
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1004464285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2722.0,
      "completions/mean_length": 982.9777221679688,
      "completions/mean_terminated_length": 635.3697509765625,
      "completions/min_length": 127.0,
      "completions/min_terminated_length": 127.0,
      "epoch": 14.139941690962099,
      "grad_norm": 0.15940600633621216,
      "learning_rate": 1e-06,
      "loss": -0.0724,
      "num_tokens": 882251823.0,
      "reward": 0.6707589626312256,
      "reward_std": 0.1349353790283203,
      "rewards/verify_math_reward/mean": 0.6707589030265808,
      "rewards/verify_math_reward/std": 0.4702001214027405,
      "step": 1513
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1417410714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3935.0,
      "completions/mean_length": 1167.6239013671875,
      "completions/mean_terminated_length": 684.00390625,
      "completions/min_length": 132.0,
      "completions/min_terminated_length": 132.0,
      "epoch": 14.14927113702624,
      "grad_norm": 0.13552004098892212,
      "learning_rate": 1e-06,
      "loss": -0.0607,
      "num_tokens": 882871990.0,
      "reward": 0.6897321939468384,
      "reward_std": 0.11257727444171906,
      "rewards/verify_math_reward/mean": 0.6897321343421936,
      "rewards/verify_math_reward/std": 0.462861567735672,
      "step": 1514
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1026785714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3499.0,
      "completions/mean_length": 1069.2054443359375,
      "completions/mean_terminated_length": 722.855712890625,
      "completions/min_length": 121.0,
      "completions/min_terminated_length": 121.0,
      "epoch": 14.15860058309038,
      "grad_norm": 0.14533638954162598,
      "learning_rate": 1e-06,
      "loss": -0.055,
      "num_tokens": 883543606.0,
      "reward": 0.6830357313156128,
      "reward_std": 0.12610459327697754,
      "rewards/verify_math_reward/mean": 0.6830357313156128,
      "rewards/verify_math_reward/std": 0.46555325388908386,
      "step": 1515
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.109375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3376.0,
      "completions/mean_length": 998.6563110351562,
      "completions/mean_terminated_length": 618.2807006835938,
      "completions/min_length": 130.0,
      "completions/min_terminated_length": 130.0,
      "epoch": 14.167930029154519,
      "grad_norm": 0.1805475354194641,
      "learning_rate": 1e-06,
      "loss": -0.0808,
      "num_tokens": 884126258.0,
      "reward": 0.7455357313156128,
      "reward_std": 0.11415030062198639,
      "rewards/verify_math_reward/mean": 0.7455357313156128,
      "rewards/verify_math_reward/std": 0.4358029067516327,
      "step": 1516
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.078125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2965.0,
      "completions/mean_length": 895.5413208007812,
      "completions/mean_terminated_length": 624.3159790039062,
      "completions/min_length": 127.0,
      "completions/min_terminated_length": 127.0,
      "epoch": 14.177259475218658,
      "grad_norm": 0.16091904044151306,
      "learning_rate": 1e-06,
      "loss": -0.0626,
      "num_tokens": 884730135.0,
      "reward": 0.7444196939468384,
      "reward_std": 0.14353542029857635,
      "rewards/verify_math_reward/mean": 0.7444196343421936,
      "rewards/verify_math_reward/std": 0.43643051385879517,
      "step": 1517
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1316964285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3298.0,
      "completions/mean_length": 1129.2210693359375,
      "completions/mean_terminated_length": 679.2467651367188,
      "completions/min_length": 144.0,
      "completions/min_terminated_length": 144.0,
      "epoch": 14.186588921282798,
      "grad_norm": 0.14764942228794098,
      "learning_rate": 1e-06,
      "loss": -0.0643,
      "num_tokens": 885344029.0,
      "reward": 0.6629464626312256,
      "reward_std": 0.12869539856910706,
      "rewards/verify_math_reward/mean": 0.6629464030265808,
      "rewards/verify_math_reward/std": 0.47296738624572754,
      "step": 1518
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1506696428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3612.0,
      "completions/mean_length": 1137.0670166015625,
      "completions/mean_terminated_length": 612.1576538085938,
      "completions/min_length": 139.0,
      "completions/min_terminated_length": 139.0,
      "epoch": 14.19591836734694,
      "grad_norm": 0.1579299420118332,
      "learning_rate": 1e-06,
      "loss": -0.067,
      "num_tokens": 885902873.0,
      "reward": 0.6729910969734192,
      "reward_std": 0.13421790301799774,
      "rewards/verify_math_reward/mean": 0.6729910969734192,
      "rewards/verify_math_reward/std": 0.46938255429267883,
      "step": 1519
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0948660714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2083.0,
      "completions/mean_length": 909.7801513671875,
      "completions/mean_terminated_length": 575.8359985351562,
      "completions/min_length": 120.0,
      "completions/min_terminated_length": 120.0,
      "epoch": 14.205247813411079,
      "grad_norm": 0.17138025164604187,
      "learning_rate": 1e-06,
      "loss": -0.0638,
      "num_tokens": 886459188.0,
      "reward": 0.7555803656578064,
      "reward_std": 0.1418115496635437,
      "rewards/verify_math_reward/mean": 0.7555803656578064,
      "rewards/verify_math_reward/std": 0.42998260259628296,
      "step": 1520
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2619.0,
      "completions/mean_length": 1050.2913818359375,
      "completions/mean_terminated_length": 615.1900634765625,
      "completions/min_length": 131.0,
      "completions/min_terminated_length": 131.0,
      "epoch": 14.214577259475218,
      "grad_norm": 0.16122666001319885,
      "learning_rate": 1e-06,
      "loss": -0.088,
      "num_tokens": 887026833.0,
      "reward": 0.7008928656578064,
      "reward_std": 0.14591076970100403,
      "rewards/verify_math_reward/mean": 0.7008928656578064,
      "rewards/verify_math_reward/std": 0.4581226110458374,
      "step": 1521
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1183035714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3673.0,
      "completions/mean_length": 1079.5491943359375,
      "completions/mean_terminated_length": 674.8101196289062,
      "completions/min_length": 168.0,
      "completions/min_terminated_length": 168.0,
      "epoch": 14.223906705539358,
      "grad_norm": 0.1701480895280838,
      "learning_rate": 1e-06,
      "loss": -0.0556,
      "num_tokens": 887641469.0,
      "reward": 0.7098214626312256,
      "reward_std": 0.14504244923591614,
      "rewards/verify_math_reward/mean": 0.7098214030265808,
      "rewards/verify_math_reward/std": 0.454098105430603,
      "step": 1522
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3045.0,
      "completions/mean_length": 1110.8270263671875,
      "completions/mean_terminated_length": 684.3737182617188,
      "completions/min_length": 139.0,
      "completions/min_terminated_length": 139.0,
      "epoch": 14.2332361516035,
      "grad_norm": 0.1491316556930542,
      "learning_rate": 1e-06,
      "loss": -0.0637,
      "num_tokens": 888266314.0,
      "reward": 0.6674107313156128,
      "reward_std": 0.12756884098052979,
      "rewards/verify_math_reward/mean": 0.6674107313156128,
      "rewards/verify_math_reward/std": 0.47140392661094666,
      "step": 1523
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1194196428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4047.0,
      "completions/mean_length": 1054.857177734375,
      "completions/mean_terminated_length": 642.4334716796875,
      "completions/min_length": 140.0,
      "completions/min_terminated_length": 140.0,
      "epoch": 14.242565597667639,
      "grad_norm": 0.14813874661922455,
      "learning_rate": 1e-06,
      "loss": -0.05,
      "num_tokens": 888861650.0,
      "reward": 0.6941964626312256,
      "reward_std": 0.13451918959617615,
      "rewards/verify_math_reward/mean": 0.6941964030265808,
      "rewards/verify_math_reward/std": 0.4610042870044708,
      "step": 1524
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1171875,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3804.0,
      "completions/mean_length": 1051.654052734375,
      "completions/mean_terminated_length": 647.5372924804688,
      "completions/min_length": 154.0,
      "completions/min_terminated_length": 154.0,
      "epoch": 14.251895043731778,
      "grad_norm": 0.15813909471035004,
      "learning_rate": 1e-06,
      "loss": -0.0418,
      "num_tokens": 889474732.0,
      "reward": 0.7209821939468384,
      "reward_std": 0.11509419232606888,
      "rewards/verify_math_reward/mean": 0.7209821343421936,
      "rewards/verify_math_reward/std": 0.448766827583313,
      "step": 1525
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1004464285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3036.0,
      "completions/mean_length": 962.4832763671875,
      "completions/mean_terminated_length": 612.5868530273438,
      "completions/min_length": 113.0,
      "completions/min_terminated_length": 113.0,
      "epoch": 14.261224489795918,
      "grad_norm": 0.16052603721618652,
      "learning_rate": 1e-06,
      "loss": -0.0601,
      "num_tokens": 890058717.0,
      "reward": 0.707589328289032,
      "reward_std": 0.12787306308746338,
      "rewards/verify_math_reward/mean": 0.7075892686843872,
      "rewards/verify_math_reward/std": 0.45512402057647705,
      "step": 1526
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1082589285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3720.0,
      "completions/mean_length": 1040.884033203125,
      "completions/mean_terminated_length": 669.9874877929688,
      "completions/min_length": 120.0,
      "completions/min_terminated_length": 120.0,
      "epoch": 14.270553935860057,
      "grad_norm": 0.1593150645494461,
      "learning_rate": 1e-06,
      "loss": -0.0609,
      "num_tokens": 890691853.0,
      "reward": 0.707589328289032,
      "reward_std": 0.15379583835601807,
      "rewards/verify_math_reward/mean": 0.7075892686843872,
      "rewards/verify_math_reward/std": 0.45512402057647705,
      "step": 1527
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.2131696428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3330.0,
      "completions/mean_length": 1411.5001220703125,
      "completions/mean_terminated_length": 684.2098999023438,
      "completions/min_length": 165.0,
      "completions/min_terminated_length": 165.0,
      "epoch": 14.279883381924199,
      "grad_norm": 0.17011617124080658,
      "learning_rate": 1e-06,
      "loss": -0.0652,
      "num_tokens": 891266933.0,
      "reward": 0.5915178656578064,
      "reward_std": 0.13072487711906433,
      "rewards/verify_math_reward/mean": 0.5915178656578064,
      "rewards/verify_math_reward/std": 0.49182766675949097,
      "step": 1528
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1484375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3369.0,
      "completions/mean_length": 1188.8638916015625,
      "completions/mean_terminated_length": 682.1153564453125,
      "completions/min_length": 149.0,
      "completions/min_terminated_length": 149.0,
      "epoch": 14.289212827988338,
      "grad_norm": 0.15505503118038177,
      "learning_rate": 1e-06,
      "loss": -0.103,
      "num_tokens": 891868107.0,
      "reward": 0.6741071939468384,
      "reward_std": 0.1557818502187729,
      "rewards/verify_math_reward/mean": 0.6741071343421936,
      "rewards/verify_math_reward/std": 0.4689692556858063,
      "step": 1529
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0859375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3279.0,
      "completions/mean_length": 896.1663208007812,
      "completions/mean_terminated_length": 595.3272705078125,
      "completions/min_length": 145.0,
      "completions/min_terminated_length": 145.0,
      "epoch": 14.298542274052478,
      "grad_norm": 0.16419577598571777,
      "learning_rate": 1e-06,
      "loss": -0.0766,
      "num_tokens": 892428248.0,
      "reward": 0.7968750596046448,
      "reward_std": 0.1342952847480774,
      "rewards/verify_math_reward/mean": 0.796875,
      "rewards/verify_math_reward/std": 0.40254947543144226,
      "step": 1530
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3749.0,
      "completions/mean_length": 1001.2266235351562,
      "completions/mean_terminated_length": 651.382568359375,
      "completions/min_length": 152.0,
      "completions/min_terminated_length": 152.0,
      "epoch": 14.307871720116617,
      "grad_norm": 0.17623595893383026,
      "learning_rate": 1e-06,
      "loss": -0.0362,
      "num_tokens": 893040115.0,
      "reward": 0.7321428656578064,
      "reward_std": 0.13906781375408173,
      "rewards/verify_math_reward/mean": 0.7321428656578064,
      "rewards/verify_math_reward/std": 0.4430900514125824,
      "step": 1531
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.09375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3595.0,
      "completions/mean_length": 979.8917846679688,
      "completions/mean_terminated_length": 657.5357055664062,
      "completions/min_length": 150.0,
      "completions/min_terminated_length": 150.0,
      "epoch": 14.317201166180759,
      "grad_norm": 0.1409258395433426,
      "learning_rate": 1e-06,
      "loss": -0.0725,
      "num_tokens": 893667378.0,
      "reward": 0.7109375596046448,
      "reward_std": 0.12076614797115326,
      "rewards/verify_math_reward/mean": 0.7109375,
      "rewards/verify_math_reward/std": 0.45358020067214966,
      "step": 1532
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0904017857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4027.0,
      "completions/mean_length": 937.536865234375,
      "completions/mean_terminated_length": 623.6282348632812,
      "completions/min_length": 164.0,
      "completions/min_terminated_length": 164.0,
      "epoch": 14.326530612244898,
      "grad_norm": 0.14756691455841064,
      "learning_rate": 1e-06,
      "loss": -0.0823,
      "num_tokens": 894255699.0,
      "reward": 0.6941964626312256,
      "reward_std": 0.13955524563789368,
      "rewards/verify_math_reward/mean": 0.6941964030265808,
      "rewards/verify_math_reward/std": 0.4610042870044708,
      "step": 1533
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1216517857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3925.0,
      "completions/mean_length": 1114.8973388671875,
      "completions/mean_terminated_length": 702.0126953125,
      "completions/min_length": 134.0,
      "completions/min_terminated_length": 134.0,
      "epoch": 14.335860058309038,
      "grad_norm": 0.1667661964893341,
      "learning_rate": 1e-06,
      "loss": -0.0709,
      "num_tokens": 894896527.0,
      "reward": 0.6863839626312256,
      "reward_std": 0.16645805537700653,
      "rewards/verify_math_reward/mean": 0.6863839030265808,
      "rewards/verify_math_reward/std": 0.46422144770622253,
      "step": 1534
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1395089285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3012.0,
      "completions/mean_length": 1143.118408203125,
      "completions/mean_terminated_length": 664.3761596679688,
      "completions/min_length": 154.0,
      "completions/min_terminated_length": 154.0,
      "epoch": 14.345189504373177,
      "grad_norm": 0.1445925533771515,
      "learning_rate": 1e-06,
      "loss": -0.0445,
      "num_tokens": 895499881.0,
      "reward": 0.6796875596046448,
      "reward_std": 0.13151581585407257,
      "rewards/verify_math_reward/mean": 0.6796875,
      "rewards/verify_math_reward/std": 0.4668572247028351,
      "step": 1535
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1071428571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3562.0,
      "completions/mean_length": 999.8381958007812,
      "completions/mean_terminated_length": 628.2987060546875,
      "completions/min_length": 132.0,
      "completions/min_terminated_length": 132.0,
      "epoch": 14.354518950437317,
      "grad_norm": 0.14818841218948364,
      "learning_rate": 1e-06,
      "loss": -0.0966,
      "num_tokens": 896085456.0,
      "reward": 0.7500000596046448,
      "reward_std": 0.15082639455795288,
      "rewards/verify_math_reward/mean": 0.75,
      "rewards/verify_math_reward/std": 0.43325456976890564,
      "step": 1536
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1071428571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3640.0,
      "completions/mean_length": 1057.69873046875,
      "completions/mean_terminated_length": 693.1024780273438,
      "completions/min_length": 190.0,
      "completions/min_terminated_length": 190.0,
      "epoch": 14.363848396501458,
      "grad_norm": 0.15976668894290924,
      "learning_rate": 1e-06,
      "loss": -0.045,
      "num_tokens": 896739170.0,
      "reward": 0.6495535969734192,
      "reward_std": 0.13305744528770447,
      "rewards/verify_math_reward/mean": 0.6495535969734192,
      "rewards/verify_math_reward/std": 0.477376252412796,
      "step": 1537
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1183035714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3771.0,
      "completions/mean_length": 1046.654052734375,
      "completions/mean_terminated_length": 637.5012817382812,
      "completions/min_length": 121.0,
      "completions/min_terminated_length": 121.0,
      "epoch": 14.373177842565598,
      "grad_norm": 0.16981826722621918,
      "learning_rate": 1e-06,
      "loss": -0.086,
      "num_tokens": 897323244.0,
      "reward": 0.699776828289032,
      "reward_std": 0.13842660188674927,
      "rewards/verify_math_reward/mean": 0.6997767686843872,
      "rewards/verify_math_reward/std": 0.4586109220981598,
      "step": 1538
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1216517857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4023.0,
      "completions/mean_length": 1058.48779296875,
      "completions/mean_terminated_length": 637.7903442382812,
      "completions/min_length": 139.0,
      "completions/min_terminated_length": 139.0,
      "epoch": 14.382507288629737,
      "grad_norm": 0.1528918594121933,
      "learning_rate": 1e-06,
      "loss": -0.0514,
      "num_tokens": 897909761.0,
      "reward": 0.7220982313156128,
      "reward_std": 0.11137335002422333,
      "rewards/verify_math_reward/mean": 0.7220982313156128,
      "rewards/verify_math_reward/std": 0.44821488857269287,
      "step": 1539
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0993303571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3565.0,
      "completions/mean_length": 916.654052734375,
      "completions/mean_terminated_length": 566.0198364257812,
      "completions/min_length": 154.0,
      "completions/min_terminated_length": 154.0,
      "epoch": 14.391836734693877,
      "grad_norm": 0.17690247297286987,
      "learning_rate": 1e-06,
      "loss": -0.0686,
      "num_tokens": 898449139.0,
      "reward": 0.7611607313156128,
      "reward_std": 0.14083515107631683,
      "rewards/verify_math_reward/mean": 0.7611607313156128,
      "rewards/verify_math_reward/std": 0.4266124963760376,
      "step": 1540
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1071428571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2937.0,
      "completions/mean_length": 1058.8070068359375,
      "completions/mean_terminated_length": 694.34375,
      "completions/min_length": 174.0,
      "completions/min_terminated_length": 174.0,
      "epoch": 14.401166180758018,
      "grad_norm": 0.15754111111164093,
      "learning_rate": 1e-06,
      "loss": -0.0539,
      "num_tokens": 899099566.0,
      "reward": 0.6573660969734192,
      "reward_std": 0.15713688731193542,
      "rewards/verify_math_reward/mean": 0.6573660969734192,
      "rewards/verify_math_reward/std": 0.47485533356666565,
      "step": 1541
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1037946428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3921.0,
      "completions/mean_length": 992.7310791015625,
      "completions/mean_terminated_length": 633.3237915039062,
      "completions/min_length": 135.0,
      "completions/min_terminated_length": 135.0,
      "epoch": 14.410495626822158,
      "grad_norm": 0.17619124054908752,
      "learning_rate": 1e-06,
      "loss": -0.0619,
      "num_tokens": 899688805.0,
      "reward": 0.7008928656578064,
      "reward_std": 0.1445143222808838,
      "rewards/verify_math_reward/mean": 0.7008928656578064,
      "rewards/verify_math_reward/std": 0.4581226110458374,
      "step": 1542
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0736607142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3473.0,
      "completions/mean_length": 924.7980346679688,
      "completions/mean_terminated_length": 672.630126953125,
      "completions/min_length": 168.0,
      "completions/min_terminated_length": 168.0,
      "epoch": 14.419825072886297,
      "grad_norm": 0.14695429801940918,
      "learning_rate": 1e-06,
      "loss": -0.0651,
      "num_tokens": 900328752.0,
      "reward": 0.7332589626312256,
      "reward_std": 0.13801473379135132,
      "rewards/verify_math_reward/mean": 0.7332589030265808,
      "rewards/verify_math_reward/std": 0.4425029158592224,
      "step": 1543
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1361607142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4005.0,
      "completions/mean_length": 1109.009033203125,
      "completions/mean_terminated_length": 638.1912231445312,
      "completions/min_length": 133.0,
      "completions/min_terminated_length": 133.0,
      "epoch": 14.429154518950437,
      "grad_norm": 0.17023196816444397,
      "learning_rate": 1e-06,
      "loss": -0.0664,
      "num_tokens": 900924840.0,
      "reward": 0.6875000596046448,
      "reward_std": 0.1418108493089676,
      "rewards/verify_math_reward/mean": 0.6875,
      "rewards/verify_math_reward/std": 0.4637712836265564,
      "step": 1544
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1383928571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3260.0,
      "completions/mean_length": 1106.719970703125,
      "completions/mean_terminated_length": 626.576416015625,
      "completions/min_length": 155.0,
      "completions/min_terminated_length": 155.0,
      "epoch": 14.438483965014576,
      "grad_norm": 0.1465475857257843,
      "learning_rate": 1e-06,
      "loss": -0.0838,
      "num_tokens": 901497621.0,
      "reward": 0.6886160969734192,
      "reward_std": 0.13470646739006042,
      "rewards/verify_math_reward/mean": 0.6886160969734192,
      "rewards/verify_math_reward/std": 0.46331802010536194,
      "step": 1545
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1328125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3884.0,
      "completions/mean_length": 1126.852783203125,
      "completions/mean_terminated_length": 672.118408203125,
      "completions/min_length": 152.0,
      "completions/min_terminated_length": 152.0,
      "epoch": 14.447813411078718,
      "grad_norm": 0.1621505469083786,
      "learning_rate": 1e-06,
      "loss": -0.0399,
      "num_tokens": 902102737.0,
      "reward": 0.6584821939468384,
      "reward_std": 0.15003502368927002,
      "rewards/verify_math_reward/mean": 0.6584821343421936,
      "rewards/verify_math_reward/std": 0.4744836091995239,
      "step": 1546
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1037946428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3308.0,
      "completions/mean_length": 964.8482666015625,
      "completions/mean_terminated_length": 602.211669921875,
      "completions/min_length": 141.0,
      "completions/min_terminated_length": 141.0,
      "epoch": 14.457142857142857,
      "grad_norm": 0.15574316680431366,
      "learning_rate": 1e-06,
      "loss": -0.0396,
      "num_tokens": 902680385.0,
      "reward": 0.6595982313156128,
      "reward_std": 0.11707949638366699,
      "rewards/verify_math_reward/mean": 0.6595982313156128,
      "rewards/verify_math_reward/std": 0.4741089344024658,
      "step": 1547
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1350446428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3254.0,
      "completions/mean_length": 1123.61279296875,
      "completions/mean_terminated_length": 659.5368041992188,
      "completions/min_length": 154.0,
      "completions/min_terminated_length": 154.0,
      "epoch": 14.466472303206997,
      "grad_norm": 0.14102943241596222,
      "learning_rate": 1e-06,
      "loss": -0.0413,
      "num_tokens": 903293982.0,
      "reward": 0.6729910969734192,
      "reward_std": 0.11742466688156128,
      "rewards/verify_math_reward/mean": 0.6729910969734192,
      "rewards/verify_math_reward/std": 0.46938255429267883,
      "step": 1548
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1060267857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3676.0,
      "completions/mean_length": 997.1295166015625,
      "completions/mean_terminated_length": 629.5979614257812,
      "completions/min_length": 143.0,
      "completions/min_terminated_length": 143.0,
      "epoch": 14.475801749271136,
      "grad_norm": 0.18004238605499268,
      "learning_rate": 1e-06,
      "loss": -0.0886,
      "num_tokens": 903879570.0,
      "reward": 0.7131696939468384,
      "reward_std": 0.14571714401245117,
      "rewards/verify_math_reward/mean": 0.7131696343421936,
      "rewards/verify_math_reward/std": 0.4525342583656311,
      "step": 1549
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1037946428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3689.0,
      "completions/mean_length": 988.8761596679688,
      "completions/mean_terminated_length": 629.0223999023438,
      "completions/min_length": 204.0,
      "completions/min_terminated_length": 204.0,
      "epoch": 14.485131195335278,
      "grad_norm": 0.15630412101745605,
      "learning_rate": 1e-06,
      "loss": -0.0411,
      "num_tokens": 904483755.0,
      "reward": 0.7031250596046448,
      "reward_std": 0.12430264800786972,
      "rewards/verify_math_reward/mean": 0.703125,
      "rewards/verify_math_reward/std": 0.4571361541748047,
      "step": 1550
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1049107142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3797.0,
      "completions/mean_length": 998.4922485351562,
      "completions/mean_terminated_length": 635.4426879882812,
      "completions/min_length": 166.0,
      "completions/min_terminated_length": 166.0,
      "epoch": 14.494460641399417,
      "grad_norm": 0.14220628142356873,
      "learning_rate": 1e-06,
      "loss": -0.0558,
      "num_tokens": 905078524.0,
      "reward": 0.707589328289032,
      "reward_std": 0.12910866737365723,
      "rewards/verify_math_reward/mean": 0.7075892686843872,
      "rewards/verify_math_reward/std": 0.45512402057647705,
      "step": 1551
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.140625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3811.0,
      "completions/mean_length": 1129.1217041015625,
      "completions/mean_terminated_length": 643.6324462890625,
      "completions/min_length": 153.0,
      "completions/min_terminated_length": 153.0,
      "epoch": 14.503790087463557,
      "grad_norm": 0.15278203785419464,
      "learning_rate": 1e-06,
      "loss": -0.0466,
      "num_tokens": 905668137.0,
      "reward": 0.6729910969734192,
      "reward_std": 0.12497483193874359,
      "rewards/verify_math_reward/mean": 0.6729910969734192,
      "rewards/verify_math_reward/std": 0.46938255429267883,
      "step": 1552
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1350446428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3005.0,
      "completions/mean_length": 1088.0123291015625,
      "completions/mean_terminated_length": 618.3781127929688,
      "completions/min_length": 137.0,
      "completions/min_terminated_length": 137.0,
      "epoch": 14.513119533527696,
      "grad_norm": 0.16624118387699127,
      "learning_rate": 1e-06,
      "loss": -0.0698,
      "num_tokens": 906236044.0,
      "reward": 0.6953125596046448,
      "reward_std": 0.1433524787425995,
      "rewards/verify_math_reward/mean": 0.6953125,
      "rewards/verify_math_reward/std": 0.4605320394039154,
      "step": 1553
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1417410714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2782.0,
      "completions/mean_length": 1128.622802734375,
      "completions/mean_terminated_length": 638.561767578125,
      "completions/min_length": 148.0,
      "completions/min_terminated_length": 148.0,
      "epoch": 14.522448979591836,
      "grad_norm": 0.16740426421165466,
      "learning_rate": 1e-06,
      "loss": -0.0791,
      "num_tokens": 906817562.0,
      "reward": 0.6584821939468384,
      "reward_std": 0.16070660948753357,
      "rewards/verify_math_reward/mean": 0.6584821343421936,
      "rewards/verify_math_reward/std": 0.4744836091995239,
      "step": 1554
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1127232142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3801.0,
      "completions/mean_length": 1035.53466796875,
      "completions/mean_terminated_length": 646.7207641601562,
      "completions/min_length": 140.0,
      "completions/min_terminated_length": 140.0,
      "epoch": 14.531778425655977,
      "grad_norm": 0.14488689601421356,
      "learning_rate": 1e-06,
      "loss": -0.049,
      "num_tokens": 907430233.0,
      "reward": 0.6830357313156128,
      "reward_std": 0.09882382303476334,
      "rewards/verify_math_reward/mean": 0.6830357313156128,
      "rewards/verify_math_reward/std": 0.46555325388908386,
      "step": 1555
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1439732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3761.0,
      "completions/mean_length": 1152.173095703125,
      "completions/mean_terminated_length": 657.057373046875,
      "completions/min_length": 111.0,
      "completions/min_terminated_length": 111.0,
      "epoch": 14.541107871720117,
      "grad_norm": 0.1420653611421585,
      "learning_rate": 1e-06,
      "loss": -0.0542,
      "num_tokens": 908029540.0,
      "reward": 0.6473214626312256,
      "reward_std": 0.1204291433095932,
      "rewards/verify_math_reward/mean": 0.6473214030265808,
      "rewards/verify_math_reward/std": 0.47807058691978455,
      "step": 1556
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0904017857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2787.0,
      "completions/mean_length": 931.9989013671875,
      "completions/mean_terminated_length": 617.5398559570312,
      "completions/min_length": 135.0,
      "completions/min_terminated_length": 135.0,
      "epoch": 14.550437317784256,
      "grad_norm": 0.16833344101905823,
      "learning_rate": 1e-06,
      "loss": -0.0701,
      "num_tokens": 908619283.0,
      "reward": 0.7377232313156128,
      "reward_std": 0.15213686227798462,
      "rewards/verify_math_reward/mean": 0.7377232313156128,
      "rewards/verify_math_reward/std": 0.4401180148124695,
      "step": 1557
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0892857142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4065.0,
      "completions/mean_length": 877.1685791015625,
      "completions/mean_terminated_length": 561.5968627929688,
      "completions/min_length": 138.0,
      "completions/min_terminated_length": 138.0,
      "epoch": 14.559766763848396,
      "grad_norm": 0.16206017136573792,
      "learning_rate": 1e-06,
      "loss": -0.0463,
      "num_tokens": 909156954.0,
      "reward": 0.7633928656578064,
      "reward_std": 0.11607100814580917,
      "rewards/verify_math_reward/mean": 0.7633928656578064,
      "rewards/verify_math_reward/std": 0.42523646354675293,
      "step": 1558
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.109375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3649.0,
      "completions/mean_length": 986.0670166015625,
      "completions/mean_terminated_length": 604.1453857421875,
      "completions/min_length": 174.0,
      "completions/min_terminated_length": 174.0,
      "epoch": 14.569096209912537,
      "grad_norm": 0.1452341228723526,
      "learning_rate": 1e-06,
      "loss": -0.0663,
      "num_tokens": 909729422.0,
      "reward": 0.6875000596046448,
      "reward_std": 0.12110455334186554,
      "rewards/verify_math_reward/mean": 0.6875,
      "rewards/verify_math_reward/std": 0.4637712836265564,
      "step": 1559
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1774553571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3493.0,
      "completions/mean_length": 1254.4453125,
      "completions/mean_terminated_length": 641.4097900390625,
      "completions/min_length": 145.0,
      "completions/min_terminated_length": 145.0,
      "epoch": 14.578425655976677,
      "grad_norm": 0.18581481277942657,
      "learning_rate": 1e-06,
      "loss": -0.044,
      "num_tokens": 910290309.0,
      "reward": 0.6551339626312256,
      "reward_std": 0.11114581674337387,
      "rewards/verify_math_reward/mean": 0.6551339030265808,
      "rewards/verify_math_reward/std": 0.4755900502204895,
      "step": 1560
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0959821428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3994.0,
      "completions/mean_length": 894.2522583007812,
      "completions/mean_terminated_length": 554.3135986328125,
      "completions/min_length": 126.0,
      "completions/min_terminated_length": 126.0,
      "epoch": 14.587755102040816,
      "grad_norm": 0.1467188447713852,
      "learning_rate": 1e-06,
      "loss": -0.0578,
      "num_tokens": 910826623.0,
      "reward": 0.770089328289032,
      "reward_std": 0.10212958604097366,
      "rewards/verify_math_reward/mean": 0.7700892686843872,
      "rewards/verify_math_reward/std": 0.42101022601127625,
      "step": 1561
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1060267857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3756.0,
      "completions/mean_length": 995.482177734375,
      "completions/mean_terminated_length": 627.7553100585938,
      "completions/min_length": 126.0,
      "completions/min_terminated_length": 126.0,
      "epoch": 14.597084548104956,
      "grad_norm": 0.15646781027317047,
      "learning_rate": 1e-06,
      "loss": -0.0431,
      "num_tokens": 911423431.0,
      "reward": 0.691964328289032,
      "reward_std": 0.13478271663188934,
      "rewards/verify_math_reward/mean": 0.6919642686843872,
      "rewards/verify_math_reward/std": 0.4619392454624176,
      "step": 1562
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1160714285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3537.0,
      "completions/mean_length": 1005.8582763671875,
      "completions/mean_terminated_length": 600.0820922851562,
      "completions/min_length": 134.0,
      "completions/min_terminated_length": 134.0,
      "epoch": 14.606413994169095,
      "grad_norm": 0.1558239459991455,
      "learning_rate": 1e-06,
      "loss": -0.0935,
      "num_tokens": 911984016.0,
      "reward": 0.7031250596046448,
      "reward_std": 0.13711389899253845,
      "rewards/verify_math_reward/mean": 0.703125,
      "rewards/verify_math_reward/std": 0.4571361541748047,
      "step": 1563
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1183035714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3643.0,
      "completions/mean_length": 1046.693115234375,
      "completions/mean_terminated_length": 637.5455932617188,
      "completions/min_length": 153.0,
      "completions/min_terminated_length": 153.0,
      "epoch": 14.615743440233237,
      "grad_norm": 0.16422036290168762,
      "learning_rate": 1e-06,
      "loss": -0.0585,
      "num_tokens": 912577133.0,
      "reward": 0.6897321939468384,
      "reward_std": 0.12749329209327698,
      "rewards/verify_math_reward/mean": 0.6897321343421936,
      "rewards/verify_math_reward/std": 0.4628615975379944,
      "step": 1564
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1049107142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3837.0,
      "completions/mean_length": 955.872802734375,
      "completions/mean_terminated_length": 587.8279418945312,
      "completions/min_length": 112.0,
      "completions/min_terminated_length": 112.0,
      "epoch": 14.625072886297376,
      "grad_norm": 0.16404128074645996,
      "learning_rate": 1e-06,
      "loss": -0.0553,
      "num_tokens": 913137131.0,
      "reward": 0.7500000596046448,
      "reward_std": 0.1285124570131302,
      "rewards/verify_math_reward/mean": 0.75,
      "rewards/verify_math_reward/std": 0.43325456976890564,
      "step": 1565
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1227678571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3736.0,
      "completions/mean_length": 1022.6160888671875,
      "completions/mean_terminated_length": 592.4987182617188,
      "completions/min_length": 140.0,
      "completions/min_terminated_length": 140.0,
      "epoch": 14.634402332361516,
      "grad_norm": 0.18624532222747803,
      "learning_rate": 1e-06,
      "loss": -0.0383,
      "num_tokens": 913687683.0,
      "reward": 0.7064732313156128,
      "reward_std": 0.11791320890188217,
      "rewards/verify_math_reward/mean": 0.7064732313156128,
      "rewards/verify_math_reward/std": 0.4556320011615753,
      "step": 1566
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0825892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3080.0,
      "completions/mean_length": 856.8516235351562,
      "completions/mean_terminated_length": 565.2493896484375,
      "completions/min_length": 174.0,
      "completions/min_terminated_length": 174.0,
      "epoch": 14.643731778425655,
      "grad_norm": 0.14966975152492523,
      "learning_rate": 1e-06,
      "loss": -0.0118,
      "num_tokens": 914248390.0,
      "reward": 0.7477678656578064,
      "reward_std": 0.09003441780805588,
      "rewards/verify_math_reward/mean": 0.7477678656578064,
      "rewards/verify_math_reward/std": 0.4345363676548004,
      "step": 1567
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1417410714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3904.0,
      "completions/mean_length": 1132.591552734375,
      "completions/mean_terminated_length": 643.1859741210938,
      "completions/min_length": 148.0,
      "completions/min_terminated_length": 148.0,
      "epoch": 14.653061224489797,
      "grad_norm": 0.19849668443202972,
      "learning_rate": 1e-06,
      "loss": -0.0756,
      "num_tokens": 914825608.0,
      "reward": 0.707589328289032,
      "reward_std": 0.15149927139282227,
      "rewards/verify_math_reward/mean": 0.7075892686843872,
      "rewards/verify_math_reward/std": 0.45512402057647705,
      "step": 1568
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1261160714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3862.0,
      "completions/mean_length": 1101.75341796875,
      "completions/mean_terminated_length": 669.6334228515625,
      "completions/min_length": 129.0,
      "completions/min_terminated_length": 129.0,
      "epoch": 14.662390670553936,
      "grad_norm": 0.15468859672546387,
      "learning_rate": 1e-06,
      "loss": -0.0713,
      "num_tokens": 915450907.0,
      "reward": 0.660714328289032,
      "reward_std": 0.13508442044258118,
      "rewards/verify_math_reward/mean": 0.6607142686843872,
      "rewards/verify_math_reward/std": 0.4737313687801361,
      "step": 1569
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1138392857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3328.0,
      "completions/mean_length": 1000.5625610351562,
      "completions/mean_terminated_length": 602.9118041992188,
      "completions/min_length": 126.0,
      "completions/min_terminated_length": 126.0,
      "epoch": 14.671720116618076,
      "grad_norm": 0.1583516001701355,
      "learning_rate": 1e-06,
      "loss": -0.0331,
      "num_tokens": 916016699.0,
      "reward": 0.6852678656578064,
      "reward_std": 0.11791251599788666,
      "rewards/verify_math_reward/mean": 0.6852678656578064,
      "rewards/verify_math_reward/std": 0.4646684527397156,
      "step": 1570
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0926339285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3829.0,
      "completions/mean_length": 940.700927734375,
      "completions/mean_terminated_length": 618.5731811523438,
      "completions/min_length": 136.0,
      "completions/min_terminated_length": 136.0,
      "epoch": 14.681049562682215,
      "grad_norm": 0.15199647843837738,
      "learning_rate": 1e-06,
      "loss": -0.0362,
      "num_tokens": 916611887.0,
      "reward": 0.7243303656578064,
      "reward_std": 0.11580956727266312,
      "rewards/verify_math_reward/mean": 0.7243303656578064,
      "rewards/verify_math_reward/std": 0.4471006691455841,
      "step": 1571
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1294642857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3351.0,
      "completions/mean_length": 1022.9319458007812,
      "completions/mean_terminated_length": 565.9115600585938,
      "completions/min_length": 140.0,
      "completions/min_terminated_length": 140.0,
      "epoch": 14.690379008746355,
      "grad_norm": 0.1610073745250702,
      "learning_rate": 1e-06,
      "loss": -0.0395,
      "num_tokens": 917150794.0,
      "reward": 0.6863839626312256,
      "reward_std": 0.12512819468975067,
      "rewards/verify_math_reward/mean": 0.6863839030265808,
      "rewards/verify_math_reward/std": 0.4642214775085449,
      "step": 1572
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1350446428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3761.0,
      "completions/mean_length": 1157.2054443359375,
      "completions/mean_terminated_length": 698.3742065429688,
      "completions/min_length": 122.0,
      "completions/min_terminated_length": 122.0,
      "epoch": 14.699708454810496,
      "grad_norm": 0.19162149727344513,
      "learning_rate": 1e-06,
      "loss": -0.0787,
      "num_tokens": 917783274.0,
      "reward": 0.578125,
      "reward_std": 0.18994270265102386,
      "rewards/verify_math_reward/mean": 0.578125,
      "rewards/verify_math_reward/std": 0.4941346049308777,
      "step": 1573
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1082589285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3203.0,
      "completions/mean_length": 959.8817138671875,
      "completions/mean_terminated_length": 579.1514282226562,
      "completions/min_length": 104.0,
      "completions/min_terminated_length": 104.0,
      "epoch": 14.709037900874636,
      "grad_norm": 0.16119384765625,
      "learning_rate": 1e-06,
      "loss": -0.0744,
      "num_tokens": 918333752.0,
      "reward": 0.723214328289032,
      "reward_std": 0.12069015204906464,
      "rewards/verify_math_reward/mean": 0.7232142686843872,
      "rewards/verify_math_reward/std": 0.44765952229499817,
      "step": 1574
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1060267857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3001.0,
      "completions/mean_length": 943.60498046875,
      "completions/mean_terminated_length": 569.725341796875,
      "completions/min_length": 115.0,
      "completions/min_terminated_length": 115.0,
      "epoch": 14.718367346938775,
      "grad_norm": 0.155434712767601,
      "learning_rate": 1e-06,
      "loss": -0.0396,
      "num_tokens": 918871678.0,
      "reward": 0.7343750596046448,
      "reward_std": 0.12125540524721146,
      "rewards/verify_math_reward/mean": 0.734375,
      "rewards/verify_math_reward/std": 0.44191211462020874,
      "step": 1575
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1261160714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3191.0,
      "completions/mean_length": 1045.1507568359375,
      "completions/mean_terminated_length": 604.862060546875,
      "completions/min_length": 107.0,
      "completions/min_terminated_length": 107.0,
      "epoch": 14.727696793002915,
      "grad_norm": 0.17898200452327728,
      "learning_rate": 1e-06,
      "loss": -0.0573,
      "num_tokens": 919451973.0,
      "reward": 0.676339328289032,
      "reward_std": 0.15406078100204468,
      "rewards/verify_math_reward/mean": 0.6763392686843872,
      "rewards/verify_math_reward/std": 0.4681335985660553,
      "step": 1576
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1049107142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3975.0,
      "completions/mean_length": 973.0402221679688,
      "completions/mean_terminated_length": 607.0075073242188,
      "completions/min_length": 146.0,
      "completions/min_terminated_length": 146.0,
      "epoch": 14.737026239067056,
      "grad_norm": 0.17064312100410461,
      "learning_rate": 1e-06,
      "loss": -0.0791,
      "num_tokens": 920038121.0,
      "reward": 0.7176339626312256,
      "reward_std": 0.13369613885879517,
      "rewards/verify_math_reward/mean": 0.7176339030265808,
      "rewards/verify_math_reward/std": 0.4504019320011139,
      "step": 1577
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0792410714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3654.0,
      "completions/mean_length": 840.5725708007812,
      "completions/mean_terminated_length": 560.4085083007812,
      "completions/min_length": 138.0,
      "completions/min_terminated_length": 138.0,
      "epoch": 14.746355685131196,
      "grad_norm": 0.18756458163261414,
      "learning_rate": 1e-06,
      "loss": -0.0492,
      "num_tokens": 920581010.0,
      "reward": 0.7500000596046448,
      "reward_std": 0.16217227280139923,
      "rewards/verify_math_reward/mean": 0.75,
      "rewards/verify_math_reward/std": 0.43325456976890564,
      "step": 1578
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0825892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3015.0,
      "completions/mean_length": 864.1495971679688,
      "completions/mean_terminated_length": 573.204345703125,
      "completions/min_length": 173.0,
      "completions/min_terminated_length": 173.0,
      "epoch": 14.755685131195335,
      "grad_norm": 0.1540251523256302,
      "learning_rate": 1e-06,
      "loss": -0.0542,
      "num_tokens": 921147272.0,
      "reward": 0.7611607313156128,
      "reward_std": 0.11565801501274109,
      "rewards/verify_math_reward/mean": 0.7611607313156128,
      "rewards/verify_math_reward/std": 0.4266124963760376,
      "step": 1579
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1004464285714286,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2541.0,
      "completions/mean_length": 919.21435546875,
      "completions/mean_terminated_length": 564.486328125,
      "completions/min_length": 167.0,
      "completions/min_terminated_length": 167.0,
      "epoch": 14.765014577259475,
      "grad_norm": 0.14839625358581543,
      "learning_rate": 1e-06,
      "loss": -0.06,
      "num_tokens": 921693040.0,
      "reward": 0.723214328289032,
      "reward_std": 0.11637409776449203,
      "rewards/verify_math_reward/mean": 0.7232142686843872,
      "rewards/verify_math_reward/std": 0.44765952229499817,
      "step": 1580
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1361607142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3859.0,
      "completions/mean_length": 1147.1763916015625,
      "completions/mean_terminated_length": 682.3746948242188,
      "completions/min_length": 145.0,
      "completions/min_terminated_length": 145.0,
      "epoch": 14.774344023323614,
      "grad_norm": 0.18178090453147888,
      "learning_rate": 1e-06,
      "loss": -0.0747,
      "num_tokens": 922322734.0,
      "reward": 0.6116071939468384,
      "reward_std": 0.15785479545593262,
      "rewards/verify_math_reward/mean": 0.6116071343421936,
      "rewards/verify_math_reward/std": 0.4876568913459778,
      "step": 1581
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0970982142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3057.0,
      "completions/mean_length": 933.2076416015625,
      "completions/mean_terminated_length": 593.080322265625,
      "completions/min_length": 147.0,
      "completions/min_terminated_length": 147.0,
      "epoch": 14.783673469387756,
      "grad_norm": 0.1612182855606079,
      "learning_rate": 1e-06,
      "loss": -0.0352,
      "num_tokens": 922894904.0,
      "reward": 0.652901828289032,
      "reward_std": 0.11829410493373871,
      "rewards/verify_math_reward/mean": 0.6529017686843872,
      "rewards/verify_math_reward/std": 0.47631317377090454,
      "step": 1582
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1149553571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2877.0,
      "completions/mean_length": 955.8984985351562,
      "completions/mean_terminated_length": 548.0416259765625,
      "completions/min_length": 178.0,
      "completions/min_terminated_length": 178.0,
      "epoch": 14.793002915451895,
      "grad_norm": 0.15704992413520813,
      "learning_rate": 1e-06,
      "loss": -0.0651,
      "num_tokens": 923414341.0,
      "reward": 0.7656250596046448,
      "reward_std": 0.11032027006149292,
      "rewards/verify_math_reward/mean": 0.765625,
      "rewards/verify_math_reward/std": 0.4238441288471222,
      "step": 1583
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1049107142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3532.0,
      "completions/mean_length": 976.3750610351562,
      "completions/mean_terminated_length": 610.7332153320312,
      "completions/min_length": 151.0,
      "completions/min_terminated_length": 151.0,
      "epoch": 14.802332361516035,
      "grad_norm": 0.15128548443317413,
      "learning_rate": 1e-06,
      "loss": -0.0721,
      "num_tokens": 923990133.0,
      "reward": 0.6718750596046448,
      "reward_std": 0.12974917888641357,
      "rewards/verify_math_reward/mean": 0.671875,
      "rewards/verify_math_reward/std": 0.46979284286499023,
      "step": 1584
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1272321428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3154.0,
      "completions/mean_length": 1109.7176513671875,
      "completions/mean_terminated_length": 674.3772583007812,
      "completions/min_length": 147.0,
      "completions/min_terminated_length": 147.0,
      "epoch": 14.811661807580174,
      "grad_norm": 0.15887951850891113,
      "learning_rate": 1e-06,
      "loss": -0.0843,
      "num_tokens": 924613488.0,
      "reward": 0.660714328289032,
      "reward_std": 0.14684438705444336,
      "rewards/verify_math_reward/mean": 0.6607142686843872,
      "rewards/verify_math_reward/std": 0.4737313687801361,
      "step": 1585
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1183035714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3648.0,
      "completions/mean_length": 1017.9699096679688,
      "completions/mean_terminated_length": 604.9683837890625,
      "completions/min_length": 167.0,
      "completions/min_terminated_length": 167.0,
      "epoch": 14.820991253644316,
      "grad_norm": 0.14667904376983643,
      "learning_rate": 1e-06,
      "loss": -0.0426,
      "num_tokens": 925184069.0,
      "reward": 0.691964328289032,
      "reward_std": 0.1028788611292839,
      "rewards/verify_math_reward/mean": 0.6919642686843872,
      "rewards/verify_math_reward/std": 0.4619392454624176,
      "step": 1586
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1361607142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3848.0,
      "completions/mean_length": 1110.227783203125,
      "completions/mean_terminated_length": 639.6021118164062,
      "completions/min_length": 129.0,
      "completions/min_terminated_length": 129.0,
      "epoch": 14.830320699708455,
      "grad_norm": 0.15516255795955658,
      "learning_rate": 1e-06,
      "loss": -0.0392,
      "num_tokens": 925764297.0,
      "reward": 0.6640625,
      "reward_std": 0.134141206741333,
      "rewards/verify_math_reward/mean": 0.6640625,
      "rewards/verify_math_reward/std": 0.4725809693336487,
      "step": 1587
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0837053571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2970.0,
      "completions/mean_length": 851.1964721679688,
      "completions/mean_terminated_length": 554.777099609375,
      "completions/min_length": 152.0,
      "completions/min_terminated_length": 152.0,
      "epoch": 14.839650145772595,
      "grad_norm": 0.15170130133628845,
      "learning_rate": 1e-06,
      "loss": -0.0872,
      "num_tokens": 926312313.0,
      "reward": 0.7477678656578064,
      "reward_std": 0.13301284611225128,
      "rewards/verify_math_reward/mean": 0.7477678656578064,
      "rewards/verify_math_reward/std": 0.4345363676548004,
      "step": 1588
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.109375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3927.0,
      "completions/mean_length": 1040.1239013671875,
      "completions/mean_terminated_length": 664.8408203125,
      "completions/min_length": 112.0,
      "completions/min_terminated_length": 112.0,
      "epoch": 14.848979591836734,
      "grad_norm": 0.14962172508239746,
      "learning_rate": 1e-06,
      "loss": -0.0603,
      "num_tokens": 926931664.0,
      "reward": 0.6640625,
      "reward_std": 0.14266708493232727,
      "rewards/verify_math_reward/mean": 0.6640625,
      "rewards/verify_math_reward/std": 0.4725809693336487,
      "step": 1589
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1227678571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3939.0,
      "completions/mean_length": 1059.352783203125,
      "completions/mean_terminated_length": 634.3765869140625,
      "completions/min_length": 144.0,
      "completions/min_terminated_length": 144.0,
      "epoch": 14.858309037900874,
      "grad_norm": 0.16940048336982727,
      "learning_rate": 1e-06,
      "loss": -0.0373,
      "num_tokens": 927523068.0,
      "reward": 0.6640625,
      "reward_std": 0.1179899051785469,
      "rewards/verify_math_reward/mean": 0.6640625,
      "rewards/verify_math_reward/std": 0.4725809693336487,
      "step": 1590
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1026785714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2574.0,
      "completions/mean_length": 929.1964721679688,
      "completions/mean_terminated_length": 566.8258666992188,
      "completions/min_length": 120.0,
      "completions/min_terminated_length": 120.0,
      "epoch": 14.867638483965015,
      "grad_norm": 0.17912112176418304,
      "learning_rate": 1e-06,
      "loss": -0.0608,
      "num_tokens": 928067012.0,
      "reward": 0.7176339626312256,
      "reward_std": 0.149361714720726,
      "rewards/verify_math_reward/mean": 0.7176339030265808,
      "rewards/verify_math_reward/std": 0.4504019320011139,
      "step": 1591
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0736607142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2960.0,
      "completions/mean_length": 809.8939819335938,
      "completions/mean_terminated_length": 548.5891723632812,
      "completions/min_length": 137.0,
      "completions/min_terminated_length": 137.0,
      "epoch": 14.876967930029155,
      "grad_norm": 0.15511579811573029,
      "learning_rate": 1e-06,
      "loss": -0.051,
      "num_tokens": 928610173.0,
      "reward": 0.785714328289032,
      "reward_std": 0.10867056250572205,
      "rewards/verify_math_reward/mean": 0.7857142686843872,
      "rewards/verify_math_reward/std": 0.4105550944805145,
      "step": 1592
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1305803571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3541.0,
      "completions/mean_length": 1074.6038818359375,
      "completions/mean_terminated_length": 620.8126220703125,
      "completions/min_length": 151.0,
      "completions/min_terminated_length": 151.0,
      "epoch": 14.886297376093294,
      "grad_norm": 0.2319992035627365,
      "learning_rate": 1e-06,
      "loss": -0.0489,
      "num_tokens": 929190018.0,
      "reward": 0.6696428656578064,
      "reward_std": 0.13873010873794556,
      "rewards/verify_math_reward/mean": 0.6696428656578064,
      "rewards/verify_math_reward/std": 0.47060438990592957,
      "step": 1593
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1261160714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3720.0,
      "completions/mean_length": 1005.6038208007812,
      "completions/mean_terminated_length": 559.60791015625,
      "completions/min_length": 117.0,
      "completions/min_terminated_length": 117.0,
      "epoch": 14.895626822157434,
      "grad_norm": 0.12457224726676941,
      "learning_rate": 1e-06,
      "loss": -0.0491,
      "num_tokens": 929720471.0,
      "reward": 0.660714328289032,
      "reward_std": 0.06707222014665604,
      "rewards/verify_math_reward/mean": 0.6607142686843872,
      "rewards/verify_math_reward/std": 0.4737313687801361,
      "step": 1594
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1183035714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4086.0,
      "completions/mean_length": 1016.7779541015625,
      "completions/mean_terminated_length": 603.616455078125,
      "completions/min_length": 171.0,
      "completions/min_terminated_length": 171.0,
      "epoch": 14.904956268221575,
      "grad_norm": 0.1526627540588379,
      "learning_rate": 1e-06,
      "loss": -0.0253,
      "num_tokens": 930287960.0,
      "reward": 0.684151828289032,
      "reward_std": 0.11501792818307877,
      "rewards/verify_math_reward/mean": 0.6841517686843872,
      "rewards/verify_math_reward/std": 0.4651124179363251,
      "step": 1595
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0915178571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3631.0,
      "completions/mean_length": 953.036865234375,
      "completions/mean_terminated_length": 636.423828125,
      "completions/min_length": 146.0,
      "completions/min_terminated_length": 146.0,
      "epoch": 14.914285714285715,
      "grad_norm": 0.15698200464248657,
      "learning_rate": 1e-06,
      "loss": -0.0768,
      "num_tokens": 930891705.0,
      "reward": 0.7131696939468384,
      "reward_std": 0.1341080218553543,
      "rewards/verify_math_reward/mean": 0.7131696343421936,
      "rewards/verify_math_reward/std": 0.4525342881679535,
      "step": 1596
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1026785714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2859.0,
      "completions/mean_length": 901.86279296875,
      "completions/mean_terminated_length": 536.3644409179688,
      "completions/min_length": 151.0,
      "completions/min_terminated_length": 151.0,
      "epoch": 14.923615160349854,
      "grad_norm": 0.1555425226688385,
      "learning_rate": 1e-06,
      "loss": -0.0527,
      "num_tokens": 931404622.0,
      "reward": 0.762276828289032,
      "reward_std": 0.096761554479599,
      "rewards/verify_math_reward/mean": 0.7622767686843872,
      "rewards/verify_math_reward/std": 0.42592647671699524,
      "step": 1597
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1127232142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3426.0,
      "completions/mean_length": 1003.1194458007812,
      "completions/mean_terminated_length": 610.1874389648438,
      "completions/min_length": 118.0,
      "completions/min_terminated_length": 118.0,
      "epoch": 14.932944606413994,
      "grad_norm": 0.1556512713432312,
      "learning_rate": 1e-06,
      "loss": -0.0816,
      "num_tokens": 931973705.0,
      "reward": 0.7020089626312256,
      "reward_std": 0.1509779393672943,
      "rewards/verify_math_reward/mean": 0.7020089030265808,
      "rewards/verify_math_reward/std": 0.45763099193573,
      "step": 1598
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1205357142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3186.0,
      "completions/mean_length": 1009.4576416015625,
      "completions/mean_terminated_length": 586.4288940429688,
      "completions/min_length": 126.0,
      "completions/min_terminated_length": 126.0,
      "epoch": 14.942274052478133,
      "grad_norm": 0.14617133140563965,
      "learning_rate": 1e-06,
      "loss": -0.0453,
      "num_tokens": 932528283.0,
      "reward": 0.699776828289032,
      "reward_std": 0.10595890879631042,
      "rewards/verify_math_reward/mean": 0.6997767686843872,
      "rewards/verify_math_reward/std": 0.4586109220981598,
      "step": 1599
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.1506696428571429,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3293.0,
      "completions/mean_length": 1177.529052734375,
      "completions/mean_terminated_length": 659.797607421875,
      "completions/min_length": 193.0,
      "completions/min_terminated_length": 193.0,
      "epoch": 14.951603498542275,
      "grad_norm": 0.1690528392791748,
      "learning_rate": 1e-06,
      "loss": -0.0656,
      "num_tokens": 933130789.0,
      "reward": 0.606026828289032,
      "reward_std": 0.14586800336837769,
      "rewards/verify_math_reward/mean": 0.6060267686843872,
      "rewards/verify_math_reward/std": 0.48890194296836853,
      "step": 1600
    },
    {
      "epoch": 14.951603498542275,
      "step": 1600,
      "total_flos": 0.0,
      "train_loss": -0.0391429264701253,
      "train_runtime": 211229.3498,
      "train_samples_per_second": 6.787,
      "train_steps_per_second": 0.008
    }
  ],
  "logging_steps": 1,
  "max_steps": 1600,
  "num_input_tokens_seen": 933130789,
  "num_train_epochs": 15,
  "save_steps": 80,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 0.0,
  "train_batch_size": 8,
  "trial_name": null,
  "trial_params": null
}