{
  "best_global_step": null,
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 14.951881014873141,
  "eval_steps": 500,
  "global_step": 1600,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1947.0,
      "completions/mean_length": 597.8717041015625,
      "completions/mean_terminated_length": 530.21728515625,
      "completions/min_length": 22.0,
      "completions/min_terminated_length": 22.0,
      "epoch": 0.009332166812481774,
      "grad_norm": 0.1767578125,
      "learning_rate": 1e-06,
      "loss": -0.0217,
      "num_tokens": 552565.0,
      "reward": 0.5267857313156128,
      "reward_std": 0.2720065116882324,
      "rewards/verify_math_reward/mean": 0.5267857313156128,
      "rewards/verify_math_reward/std": 0.4995608329772949,
      "step": 1
    },
    {
      "clip_ratio/high_max": 0.0014833615286988788,
      "clip_ratio/high_mean": 0.00045727290410013666,
      "clip_ratio/low_mean": 0.000256149965196073,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007134228812901711,
      "completions/clipped_ratio": 0.0078125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3467.0,
      "completions/mean_length": 577.5647583007812,
      "completions/mean_terminated_length": 549.8605346679688,
      "completions/min_length": 102.0,
      "completions/min_terminated_length": 102.0,
      "epoch": 0.018664333624963548,
      "grad_norm": 0.1484375,
      "learning_rate": 1e-06,
      "loss": 0.0008,
      "num_tokens": 1128647.0,
      "reward": 0.4732142984867096,
      "reward_std": 0.2450285404920578,
      "rewards/verify_math_reward/mean": 0.4732142984867096,
      "rewards/verify_math_reward/std": 0.4995608925819397,
      "step": 2
    },
    {
      "clip_ratio/high_max": 0.001585116649948759,
      "clip_ratio/high_mean": 0.0005085156053610262,
      "clip_ratio/low_mean": 0.0002956197542971495,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000804135374892212,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2789.0,
      "completions/mean_length": 587.3873291015625,
      "completions/mean_terminated_length": 543.7774047851562,
      "completions/min_length": 42.0,
      "completions/min_terminated_length": 42.0,
      "epoch": 0.02799650043744532,
      "grad_norm": 0.1572265625,
      "learning_rate": 1e-06,
      "loss": -0.0058,
      "num_tokens": 1708434.0,
      "reward": 0.4988839626312256,
      "reward_std": 0.23973384499549866,
      "rewards/verify_math_reward/mean": 0.4988839328289032,
      "rewards/verify_math_reward/std": 0.5002779960632324,
      "step": 3
    },
    {
      "clip_ratio/high_max": 0.0016634631665510824,
      "clip_ratio/high_mean": 0.0005582791625329264,
      "clip_ratio/low_mean": 0.000263425162643216,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008217043141485192,
      "completions/clipped_ratio": 0.006696428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2717.0,
      "completions/mean_length": 557.1004638671875,
      "completions/mean_terminated_length": 533.24267578125,
      "completions/min_length": 14.0,
      "completions/min_terminated_length": 14.0,
      "epoch": 0.037328667249927096,
      "grad_norm": 0.171875,
      "learning_rate": 1e-06,
      "loss": -0.0144,
      "num_tokens": 2262452.0,
      "reward": 0.5424107313156128,
      "reward_std": 0.25761085748672485,
      "rewards/verify_math_reward/mean": 0.5424107313156128,
      "rewards/verify_math_reward/std": 0.4984763264656067,
      "step": 4
    },
    {
      "clip_ratio/high_max": 0.001812106933357427,
      "clip_ratio/high_mean": 0.0006231880311133864,
      "clip_ratio/low_mean": 0.00030001021900716296,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009231982385244919,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2825.0,
      "completions/mean_length": 584.5267944335938,
      "completions/mean_terminated_length": 540.88134765625,
      "completions/min_length": 19.0,
      "completions/min_terminated_length": 19.0,
      "epoch": 0.04666083406240887,
      "grad_norm": 0.171875,
      "learning_rate": 1e-06,
      "loss": -0.0199,
      "num_tokens": 2824700.0,
      "reward": 0.494419664144516,
      "reward_std": 0.27577054500579834,
      "rewards/verify_math_reward/mean": 0.4944196343421936,
      "rewards/verify_math_reward/std": 0.5002480745315552,
      "step": 5
    },
    {
      "clip_ratio/high_max": 0.0014002122825331753,
      "clip_ratio/high_mean": 0.0004531991592102713,
      "clip_ratio/low_mean": 0.00042794430373760406,
      "clip_ratio/low_min": 1.1819212886621244e-05,
      "clip_ratio/region_mean": 0.0008811434686322173,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2857.0,
      "completions/mean_length": 638.2701416015625,
      "completions/mean_terminated_length": 559.3264770507812,
      "completions/min_length": 24.0,
      "completions/min_terminated_length": 24.0,
      "epoch": 0.05599300087489064,
      "grad_norm": 0.15234375,
      "learning_rate": 1e-06,
      "loss": 0.0018,
      "num_tokens": 3405094.0,
      "reward": 0.494419664144516,
      "reward_std": 0.266407310962677,
      "rewards/verify_math_reward/mean": 0.4944196343421936,
      "rewards/verify_math_reward/std": 0.5002480745315552,
      "step": 6
    },
    {
      "clip_ratio/high_max": 0.001297875107411528,
      "clip_ratio/high_mean": 0.00038535677481377206,
      "clip_ratio/low_mean": 0.0003542163467500359,
      "clip_ratio/low_min": 1.504573901911499e-05,
      "clip_ratio/region_mean": 0.000739573106784519,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3607.0,
      "completions/mean_length": 546.1830444335938,
      "completions/mean_terminated_length": 514.2026977539062,
      "completions/min_length": 46.0,
      "completions/min_terminated_length": 46.0,
      "epoch": 0.06532516768737241,
      "grad_norm": 0.1748046875,
      "learning_rate": 1e-06,
      "loss": 0.0048,
      "num_tokens": 3960162.0,
      "reward": 0.5267857313156128,
      "reward_std": 0.2519356906414032,
      "rewards/verify_math_reward/mean": 0.5267857313156128,
      "rewards/verify_math_reward/std": 0.4995608627796173,
      "step": 7
    },
    {
      "clip_ratio/high_max": 0.0013917427995693288,
      "clip_ratio/high_mean": 0.00041493226422062435,
      "clip_ratio/low_mean": 0.00031620567665413546,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000731137938601023,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2882.0,
      "completions/mean_length": 617.8616333007812,
      "completions/mean_terminated_length": 582.5704345703125,
      "completions/min_length": 64.0,
      "completions/min_terminated_length": 64.0,
      "epoch": 0.07465733449985419,
      "grad_norm": 0.1357421875,
      "learning_rate": 1e-06,
      "loss": 0.0006,
      "num_tokens": 4551294.0,
      "reward": 0.4888392984867096,
      "reward_std": 0.2438676804304123,
      "rewards/verify_math_reward/mean": 0.4888392984867096,
      "rewards/verify_math_reward/std": 0.5001546144485474,
      "step": 8
    },
    {
      "clip_ratio/high_max": 0.0018802964186761528,
      "clip_ratio/high_mean": 0.0005461828970965144,
      "clip_ratio/low_mean": 0.00031180268240404985,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008579855916650558,
      "completions/clipped_ratio": 0.021205357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2460.0,
      "completions/mean_length": 632.1920166015625,
      "completions/mean_terminated_length": 557.1493530273438,
      "completions/min_length": 54.0,
      "completions/min_terminated_length": 54.0,
      "epoch": 0.08398950131233596,
      "grad_norm": 0.166015625,
      "learning_rate": 1e-06,
      "loss": -0.0022,
      "num_tokens": 5128890.0,
      "reward": 0.5089285969734192,
      "reward_std": 0.2608848214149475,
      "rewards/verify_math_reward/mean": 0.5089285969734192,
      "rewards/verify_math_reward/std": 0.5001994967460632,
      "step": 9
    },
    {
      "clip_ratio/high_max": 0.0015824930942471838,
      "clip_ratio/high_mean": 0.0004886576807621168,
      "clip_ratio/low_mean": 0.00035787165802503296,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008465293458357337,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3914.0,
      "completions/mean_length": 633.857177734375,
      "completions/mean_terminated_length": 554.812744140625,
      "completions/min_length": 147.0,
      "completions/min_terminated_length": 147.0,
      "epoch": 0.09332166812481774,
      "grad_norm": 0.1416015625,
      "learning_rate": 1e-06,
      "loss": -0.0117,
      "num_tokens": 5710146.0,
      "reward": 0.4720982313156128,
      "reward_std": 0.24513548612594604,
      "rewards/verify_math_reward/mean": 0.4720982015132904,
      "rewards/verify_math_reward/std": 0.49949970841407776,
      "step": 10
    },
    {
      "clip_ratio/high_max": 0.0017227220623681205,
      "clip_ratio/high_mean": 0.0004794969561316975,
      "clip_ratio/low_mean": 0.0002942044511655695,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007737014029771672,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3806.0,
      "completions/mean_length": 633.8248291015625,
      "completions/mean_terminated_length": 574.87744140625,
      "completions/min_length": 93.0,
      "completions/min_terminated_length": 93.0,
      "epoch": 0.1026538349372995,
      "grad_norm": 0.14453125,
      "learning_rate": 1e-06,
      "loss": 0.0013,
      "num_tokens": 6300957.0,
      "reward": 0.4486607313156128,
      "reward_std": 0.24141353368759155,
      "rewards/verify_math_reward/mean": 0.4486607015132904,
      "rewards/verify_math_reward/std": 0.4976350665092468,
      "step": 11
    },
    {
      "clip_ratio/high_max": 0.0016666346273268573,
      "clip_ratio/high_mean": 0.00045902808733444544,
      "clip_ratio/low_mean": 0.0003052549761264345,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007642830696568126,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3757.0,
      "completions/mean_length": 589.5580444335938,
      "completions/mean_terminated_length": 537.934326171875,
      "completions/min_length": 17.0,
      "completions/min_terminated_length": 17.0,
      "epoch": 0.11198600174978128,
      "grad_norm": 0.1689453125,
      "learning_rate": 1e-06,
      "loss": -0.0052,
      "num_tokens": 6878433.0,
      "reward": 0.486607164144516,
      "reward_std": 0.23912441730499268,
      "rewards/verify_math_reward/mean": 0.4866071343421936,
      "rewards/verify_math_reward/std": 0.5000997185707092,
      "step": 12
    },
    {
      "clip_ratio/high_max": 0.0014551977310475195,
      "clip_ratio/high_mean": 0.0004775686838911497,
      "clip_ratio/low_mean": 0.0002972270609689076,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007747957361061708,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3355.0,
      "completions/mean_length": 634.234375,
      "completions/mean_terminated_length": 591.206787109375,
      "completions/min_length": 56.0,
      "completions/min_terminated_length": 56.0,
      "epoch": 0.12131816856226305,
      "grad_norm": 0.125,
      "learning_rate": 1e-06,
      "loss": -0.0063,
      "num_tokens": 7489643.0,
      "reward": 0.5256696939468384,
      "reward_std": 0.24795658886432648,
      "rewards/verify_math_reward/mean": 0.5256696343421936,
      "rewards/verify_math_reward/std": 0.4996195137500763,
      "step": 13
    },
    {
      "clip_ratio/high_max": 0.00174857863021316,
      "clip_ratio/high_mean": 0.000491976448074638,
      "clip_ratio/low_mean": 0.0003782003570904635,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008701768101673224,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2712.0,
      "completions/mean_length": 585.6015625,
      "completions/mean_terminated_length": 521.776123046875,
      "completions/min_length": 62.0,
      "completions/min_terminated_length": 62.0,
      "epoch": 0.13065033537474482,
      "grad_norm": 0.1513671875,
      "learning_rate": 1e-06,
      "loss": -0.0063,
      "num_tokens": 8044918.0,
      "reward": 0.4832589626312256,
      "reward_std": 0.2616012990474701,
      "rewards/verify_math_reward/mean": 0.4832589328289032,
      "rewards/verify_math_reward/std": 0.4999987483024597,
      "step": 14
    },
    {
      "clip_ratio/high_max": 0.0016589798287895974,
      "clip_ratio/high_mean": 0.0005142171869465528,
      "clip_ratio/low_mean": 0.0002859190755089003,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008001362625691399,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3622.0,
      "completions/mean_length": 623.09375,
      "completions/mean_terminated_length": 579.9276733398438,
      "completions/min_length": 75.0,
      "completions/min_terminated_length": 75.0,
      "epoch": 0.1399825021872266,
      "grad_norm": 0.1552734375,
      "learning_rate": 1e-06,
      "loss": -0.0073,
      "num_tokens": 8651618.0,
      "reward": 0.5390625,
      "reward_std": 0.2433338314294815,
      "rewards/verify_math_reward/mean": 0.5390625,
      "rewards/verify_math_reward/std": 0.4987502098083496,
      "step": 15
    },
    {
      "clip_ratio/high_max": 0.0013559699900724809,
      "clip_ratio/high_mean": 0.000439762489804707,
      "clip_ratio/low_mean": 0.0003798546824782534,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008196171775125549,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4065.0,
      "completions/mean_length": 607.515625,
      "completions/mean_terminated_length": 548.120361328125,
      "completions/min_length": 18.0,
      "completions/min_terminated_length": 18.0,
      "epoch": 0.14931466899970838,
      "grad_norm": 0.1494140625,
      "learning_rate": 1e-06,
      "loss": -0.0049,
      "num_tokens": 9225976.0,
      "reward": 0.527901828289032,
      "reward_std": 0.23308269679546356,
      "rewards/verify_math_reward/mean": 0.5279017686843872,
      "rewards/verify_math_reward/std": 0.49949970841407776,
      "step": 16
    },
    {
      "clip_ratio/high_max": 0.0014293352942331694,
      "clip_ratio/high_mean": 0.00041319123761240917,
      "clip_ratio/low_mean": 0.00031763293384301505,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007308241583814379,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3419.0,
      "completions/mean_length": 598.8002319335938,
      "completions/mean_terminated_length": 559.3284301757812,
      "completions/min_length": 71.0,
      "completions/min_terminated_length": 71.0,
      "epoch": 0.15864683581219013,
      "grad_norm": 0.15625,
      "learning_rate": 1e-06,
      "loss": -0.004,
      "num_tokens": 9807165.0,
      "reward": 0.5256696939468384,
      "reward_std": 0.27177828550338745,
      "rewards/verify_math_reward/mean": 0.5256696343421936,
      "rewards/verify_math_reward/std": 0.4996195137500763,
      "step": 17
    },
    {
      "clip_ratio/high_max": 0.0014759054902242497,
      "clip_ratio/high_mean": 0.00043099112144773244,
      "clip_ratio/low_mean": 0.00032991902389767347,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007609101417074271,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2924.0,
      "completions/mean_length": 619.7288208007812,
      "completions/mean_terminated_length": 568.5492553710938,
      "completions/min_length": 14.0,
      "completions/min_terminated_length": 14.0,
      "epoch": 0.1679790026246719,
      "grad_norm": 0.1552734375,
      "learning_rate": 1e-06,
      "loss": -0.0018,
      "num_tokens": 10390426.0,
      "reward": 0.4966517984867096,
      "reward_std": 0.2521638870239258,
      "rewards/verify_math_reward/mean": 0.4966517984867096,
      "rewards/verify_math_reward/std": 0.5002680420875549,
      "step": 18
    },
    {
      "clip_ratio/high_max": 0.0020546681134874234,
      "clip_ratio/high_mean": 0.0006518496816170227,
      "clip_ratio/low_mean": 0.00034170514368270233,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009935548205248779,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3812.0,
      "completions/mean_length": 602.8873291015625,
      "completions/mean_terminated_length": 555.469482421875,
      "completions/min_length": 11.0,
      "completions/min_terminated_length": 11.0,
      "epoch": 0.1773111694371537,
      "grad_norm": 0.16015625,
      "learning_rate": 1e-06,
      "loss": -0.0093,
      "num_tokens": 10975917.0,
      "reward": 0.5625,
      "reward_std": 0.3053692877292633,
      "rewards/verify_math_reward/mean": 0.5625,
      "rewards/verify_math_reward/std": 0.49635544419288635,
      "step": 19
    },
    {
      "clip_ratio/high_max": 0.0016187741694011493,
      "clip_ratio/high_mean": 0.0005318163211995852,
      "clip_ratio/low_mean": 0.000337265061943981,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008690813920111395,
      "completions/clipped_ratio": 0.0078125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4018.0,
      "completions/mean_length": 548.1707763671875,
      "completions/mean_terminated_length": 520.235107421875,
      "completions/min_length": 3.0,
      "completions/min_terminated_length": 3.0,
      "epoch": 0.18664333624963547,
      "grad_norm": 0.1533203125,
      "learning_rate": 1e-06,
      "loss": -0.0106,
      "num_tokens": 11529574.0,
      "reward": 0.574776828289032,
      "reward_std": 0.2594951391220093,
      "rewards/verify_math_reward/mean": 0.5747767686843872,
      "rewards/verify_math_reward/std": 0.49465295672416687,
      "step": 20
    },
    {
      "clip_ratio/high_max": 0.0015502021578868153,
      "clip_ratio/high_mean": 0.0004626288832696446,
      "clip_ratio/low_mean": 0.00028075454758891283,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007433834298353759,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2681.0,
      "completions/mean_length": 565.4107666015625,
      "completions/mean_terminated_length": 525.5620727539062,
      "completions/min_length": 18.0,
      "completions/min_terminated_length": 18.0,
      "epoch": 0.19597550306211722,
      "grad_norm": 0.1640625,
      "learning_rate": 1e-06,
      "loss": -0.0105,
      "num_tokens": 12077414.0,
      "reward": 0.5546875,
      "reward_std": 0.26329195499420166,
      "rewards/verify_math_reward/mean": 0.5546875,
      "rewards/verify_math_reward/std": 0.4972778558731079,
      "step": 21
    },
    {
      "clip_ratio/high_max": 0.00131268548193475,
      "clip_ratio/high_mean": 0.0003837667984498694,
      "clip_ratio/low_mean": 0.00024391151930558408,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006276783155954035,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2551.0,
      "completions/mean_length": 624.7924194335938,
      "completions/mean_terminated_length": 593.520263671875,
      "completions/min_length": 50.0,
      "completions/min_terminated_length": 50.0,
      "epoch": 0.205307669874599,
      "grad_norm": 0.1357421875,
      "learning_rate": 1e-06,
      "loss": 0.0158,
      "num_tokens": 12699892.0,
      "reward": 0.4832589626312256,
      "reward_std": 0.2300340235233307,
      "rewards/verify_math_reward/mean": 0.4832589328289032,
      "rewards/verify_math_reward/std": 0.4999987483024597,
      "step": 22
    },
    {
      "clip_ratio/high_max": 0.0019214958756492706,
      "clip_ratio/high_mean": 0.0005548375861508248,
      "clip_ratio/low_mean": 0.00021802542983095918,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007728630353085464,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3787.0,
      "completions/mean_length": 588.3058471679688,
      "completions/mean_terminated_length": 536.6636352539062,
      "completions/min_length": 11.0,
      "completions/min_terminated_length": 11.0,
      "epoch": 0.21463983668708078,
      "grad_norm": 0.1376953125,
      "learning_rate": 1e-06,
      "loss": -0.0106,
      "num_tokens": 13258102.0,
      "reward": 0.5926339626312256,
      "reward_std": 0.21225734055042267,
      "rewards/verify_math_reward/mean": 0.5926339030265808,
      "rewards/verify_math_reward/std": 0.49161845445632935,
      "step": 23
    },
    {
      "clip_ratio/high_max": 0.0016732044668970047,
      "clip_ratio/high_mean": 0.000507047609971778,
      "clip_ratio/low_mean": 0.00031541719931738044,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008224648172472371,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2814.0,
      "completions/mean_length": 639.9029541015625,
      "completions/mean_terminated_length": 585.0442504882812,
      "completions/min_length": 106.0,
      "completions/min_terminated_length": 106.0,
      "epoch": 0.22397200349956256,
      "grad_norm": 0.1416015625,
      "learning_rate": 1e-06,
      "loss": -0.0117,
      "num_tokens": 13858407.0,
      "reward": 0.4765625298023224,
      "reward_std": 0.27910587191581726,
      "rewards/verify_math_reward/mean": 0.4765625,
      "rewards/verify_math_reward/std": 0.49972933530807495,
      "step": 24
    },
    {
      "clip_ratio/high_max": 0.0012036838897984126,
      "clip_ratio/high_mean": 0.00035329613638168667,
      "clip_ratio/low_mean": 0.0002732304989194745,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006265266288210114,
      "completions/clipped_ratio": 0.021205357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4047.0,
      "completions/mean_length": 665.4866333007812,
      "completions/mean_terminated_length": 591.1653442382812,
      "completions/min_length": 25.0,
      "completions/min_terminated_length": 25.0,
      "epoch": 0.23330417031204434,
      "grad_norm": 0.12890625,
      "learning_rate": 1e-06,
      "loss": 0.0019,
      "num_tokens": 14462091.0,
      "reward": 0.5133928656578064,
      "reward_std": 0.22398342192173004,
      "rewards/verify_math_reward/mean": 0.5133928656578064,
      "rewards/verify_math_reward/std": 0.5000997185707092,
      "step": 25
    },
    {
      "clip_ratio/high_max": 0.0015261218959494727,
      "clip_ratio/high_mean": 0.00045390168679659837,
      "clip_ratio/low_mean": 0.0003453062097378279,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000799207899490284,
      "completions/clipped_ratio": 0.021205357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2476.0,
      "completions/mean_length": 638.7600708007812,
      "completions/mean_terminated_length": 563.8597412109375,
      "completions/min_length": 141.0,
      "completions/min_terminated_length": 141.0,
      "epoch": 0.2426363371245261,
      "grad_norm": 0.142578125,
      "learning_rate": 1e-06,
      "loss": 0.0026,
      "num_tokens": 15044068.0,
      "reward": 0.4720982313156128,
      "reward_std": 0.25757694244384766,
      "rewards/verify_math_reward/mean": 0.4720982015132904,
      "rewards/verify_math_reward/std": 0.49949970841407776,
      "step": 26
    },
    {
      "clip_ratio/high_max": 0.0017356351945636561,
      "clip_ratio/high_mean": 0.0005630187088172534,
      "clip_ratio/low_mean": 0.0004012571356497574,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009642758450354449,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2087.0,
      "completions/mean_length": 596.5614013671875,
      "completions/mean_terminated_length": 541.0147705078125,
      "completions/min_length": 122.0,
      "completions/min_terminated_length": 122.0,
      "epoch": 0.25196850393700787,
      "grad_norm": 0.158203125,
      "learning_rate": 1e-06,
      "loss": -0.0004,
      "num_tokens": 15609363.0,
      "reward": 0.527901828289032,
      "reward_std": 0.28294095396995544,
      "rewards/verify_math_reward/mean": 0.5279017686843872,
      "rewards/verify_math_reward/std": 0.49949970841407776,
      "step": 27
    },
    {
      "clip_ratio/high_max": 0.001649338155402802,
      "clip_ratio/high_mean": 0.0005405367069215572,
      "clip_ratio/low_mean": 0.0003426419023071503,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008831786117298179,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3773.0,
      "completions/mean_length": 583.0100708007812,
      "completions/mean_terminated_length": 551.3615112304688,
      "completions/min_length": 143.0,
      "completions/min_terminated_length": 143.0,
      "epoch": 0.26130067074948965,
      "grad_norm": 0.138671875,
      "learning_rate": 1e-06,
      "loss": 0.0095,
      "num_tokens": 16188964.0,
      "reward": 0.5223214626312256,
      "reward_std": 0.2360539585351944,
      "rewards/verify_math_reward/mean": 0.5223214030265808,
      "rewards/verify_math_reward/std": 0.49978047609329224,
      "step": 28
    },
    {
      "clip_ratio/high_max": 0.0016810440192784881,
      "clip_ratio/high_mean": 0.0005199292579618486,
      "clip_ratio/low_mean": 0.0003285311371428179,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008484603981742112,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4041.0,
      "completions/mean_length": 637.614990234375,
      "completions/mean_terminated_length": 574.7352294921875,
      "completions/min_length": 78.0,
      "completions/min_terminated_length": 78.0,
      "epoch": 0.27063283756197143,
      "grad_norm": 0.1552734375,
      "learning_rate": 1e-06,
      "loss": -0.0086,
      "num_tokens": 16783715.0,
      "reward": 0.4776785969734192,
      "reward_std": 0.257693886756897,
      "rewards/verify_math_reward/mean": 0.4776785671710968,
      "rewards/verify_math_reward/std": 0.4997805058956146,
      "step": 29
    },
    {
      "clip_ratio/high_max": 0.0013078466845399817,
      "clip_ratio/high_mean": 0.0003965248021131629,
      "clip_ratio/low_mean": 0.000290919498979747,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006874442929074576,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2668.0,
      "completions/mean_length": 574.9520263671875,
      "completions/mean_terminated_length": 527.155029296875,
      "completions/min_length": 11.0,
      "completions/min_terminated_length": 11.0,
      "epoch": 0.2799650043744532,
      "grad_norm": 0.12890625,
      "learning_rate": 1e-06,
      "loss": 0.0057,
      "num_tokens": 17345048.0,
      "reward": 0.5345982313156128,
      "reward_std": 0.21191851794719696,
      "rewards/verify_math_reward/mean": 0.5345982313156128,
      "rewards/verify_math_reward/std": 0.4990801215171814,
      "step": 30
    },
    {
      "clip_ratio/high_max": 0.0014755538031749893,
      "clip_ratio/high_mean": 0.0004436471647295548,
      "clip_ratio/low_mean": 0.0003437455734456307,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007873927415857906,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2759.0,
      "completions/mean_length": 659.2879638671875,
      "completions/mean_terminated_length": 604.7369995117188,
      "completions/min_length": 108.0,
      "completions/min_terminated_length": 108.0,
      "epoch": 0.289297171186935,
      "grad_norm": 0.11376953125,
      "learning_rate": 1e-06,
      "loss": 0.0033,
      "num_tokens": 17963306.0,
      "reward": 0.5145089626312256,
      "reward_std": 0.22800594568252563,
      "rewards/verify_math_reward/mean": 0.5145089030265808,
      "rewards/verify_math_reward/std": 0.5000685453414917,
      "step": 31
    },
    {
      "clip_ratio/high_max": 0.0016754547441450995,
      "clip_ratio/high_mean": 0.0004678851930748351,
      "clip_ratio/low_mean": 0.0003820526953859371,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008499379000568297,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3801.0,
      "completions/mean_length": 597.9642944335938,
      "completions/mean_terminated_length": 550.4796752929688,
      "completions/min_length": 5.0,
      "completions/min_terminated_length": 5.0,
      "epoch": 0.29862933799941677,
      "grad_norm": 0.1357421875,
      "learning_rate": 1e-06,
      "loss": -0.0026,
      "num_tokens": 18540770.0,
      "reward": 0.5055803656578064,
      "reward_std": 0.2404039204120636,
      "rewards/verify_math_reward/mean": 0.5055803656578064,
      "rewards/verify_math_reward/std": 0.5002480745315552,
      "step": 32
    },
    {
      "clip_ratio/high_max": 0.0015293593560272711,
      "clip_ratio/high_mean": 0.0004095853012131556,
      "clip_ratio/low_mean": 0.0003093034737275957,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007188887730080751,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3211.0,
      "completions/mean_length": 540.5870971679688,
      "completions/mean_terminated_length": 508.5563049316406,
      "completions/min_length": 3.0,
      "completions/min_terminated_length": 3.0,
      "epoch": 0.3079615048118985,
      "grad_norm": 0.1474609375,
      "learning_rate": 1e-06,
      "loss": -0.0042,
      "num_tokens": 19078904.0,
      "reward": 0.4966517984867096,
      "reward_std": 0.2256055474281311,
      "rewards/verify_math_reward/mean": 0.4966517984867096,
      "rewards/verify_math_reward/std": 0.5002680420875549,
      "step": 33
    },
    {
      "clip_ratio/high_max": 0.0015010556344350334,
      "clip_ratio/high_mean": 0.0004454719312434463,
      "clip_ratio/low_mean": 0.00043172464529561694,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008771965631240164,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3720.0,
      "completions/mean_length": 615.6942138671875,
      "completions/mean_terminated_length": 560.4512329101562,
      "completions/min_length": 115.0,
      "completions/min_terminated_length": 115.0,
      "epoch": 0.31729367162438027,
      "grad_norm": 0.1494140625,
      "learning_rate": 1e-06,
      "loss": -0.0145,
      "num_tokens": 19656614.0,
      "reward": 0.53125,
      "reward_std": 0.2604729235172272,
      "rewards/verify_math_reward/mean": 0.53125,
      "rewards/verify_math_reward/std": 0.4993011951446533,
      "step": 34
    },
    {
      "clip_ratio/high_max": 0.00146182533717365,
      "clip_ratio/high_mean": 0.0004877040967130597,
      "clip_ratio/low_mean": 0.0003603455347729323,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008480496271658922,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3128.0,
      "completions/mean_length": 622.2467041015625,
      "completions/mean_terminated_length": 563.1021728515625,
      "completions/min_length": 14.0,
      "completions/min_terminated_length": 14.0,
      "epoch": 0.32662583843686205,
      "grad_norm": 0.1416015625,
      "learning_rate": 1e-06,
      "loss": 0.0023,
      "num_tokens": 20236363.0,
      "reward": 0.5758928656578064,
      "reward_std": 0.24145811796188354,
      "rewards/verify_math_reward/mean": 0.5758928656578064,
      "rewards/verify_math_reward/std": 0.49448272585868835,
      "step": 35
    },
    {
      "clip_ratio/high_max": 0.0013757151491518016,
      "clip_ratio/high_mean": 0.0004118498437719609,
      "clip_ratio/low_mean": 0.0003148514662143498,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007267013115779264,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3910.0,
      "completions/mean_length": 653.3002319335938,
      "completions/mean_terminated_length": 590.7056884765625,
      "completions/min_length": 3.0,
      "completions/min_terminated_length": 3.0,
      "epoch": 0.3359580052493438,
      "grad_norm": 0.1298828125,
      "learning_rate": 1e-06,
      "loss": 0.0071,
      "num_tokens": 20849008.0,
      "reward": 0.5133928656578064,
      "reward_std": 0.2310871183872223,
      "rewards/verify_math_reward/mean": 0.5133928656578064,
      "rewards/verify_math_reward/std": 0.500099778175354,
      "step": 36
    },
    {
      "clip_ratio/high_max": 0.0014319893280116958,
      "clip_ratio/high_mean": 0.00045862419722197956,
      "clip_ratio/low_mean": 0.000365841522352639,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008244657310569892,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3399.0,
      "completions/mean_length": 592.1953125,
      "completions/mean_terminated_length": 560.6295166015625,
      "completions/min_length": 96.0,
      "completions/min_terminated_length": 96.0,
      "epoch": 0.3452901720618256,
      "grad_norm": 0.1787109375,
      "learning_rate": 1e-06,
      "loss": 0.0,
      "num_tokens": 21423575.0,
      "reward": 0.4955357313156128,
      "reward_std": 0.2597554326057434,
      "rewards/verify_math_reward/mean": 0.4955357015132904,
      "rewards/verify_math_reward/std": 0.500259280204773,
      "step": 37
    },
    {
      "clip_ratio/high_max": 0.0016519977225470939,
      "clip_ratio/high_mean": 0.0005154066718660033,
      "clip_ratio/low_mean": 0.00026897135535364214,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007843780281291401,
      "completions/clipped_ratio": 0.0234375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3462.0,
      "completions/mean_length": 644.5502319335938,
      "completions/mean_terminated_length": 561.7153930664062,
      "completions/min_length": 5.0,
      "completions/min_terminated_length": 5.0,
      "epoch": 0.3546223388743074,
      "grad_norm": 0.171875,
      "learning_rate": 1e-06,
      "loss": -0.0154,
      "num_tokens": 22004412.0,
      "reward": 0.5145089626312256,
      "reward_std": 0.2486048936843872,
      "rewards/verify_math_reward/mean": 0.5145089030265808,
      "rewards/verify_math_reward/std": 0.5000685453414917,
      "step": 38
    },
    {
      "clip_ratio/high_max": 0.0019011590975424042,
      "clip_ratio/high_mean": 0.0005794297785541858,
      "clip_ratio/low_mean": 0.0003370601472170165,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000916489914743579,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3493.0,
      "completions/mean_length": 596.6417846679688,
      "completions/mean_terminated_length": 537.0613403320312,
      "completions/min_length": 58.0,
      "completions/min_terminated_length": 58.0,
      "epoch": 0.36395450568678916,
      "grad_norm": 0.154296875,
      "learning_rate": 1e-06,
      "loss": 0.0044,
      "num_tokens": 22573427.0,
      "reward": 0.5111607313156128,
      "reward_std": 0.2544548213481903,
      "rewards/verify_math_reward/mean": 0.5111607313156128,
      "rewards/verify_math_reward/std": 0.5001546144485474,
      "step": 39
    },
    {
      "clip_ratio/high_max": 0.0013376170936680865,
      "clip_ratio/high_mean": 0.00036210402311098733,
      "clip_ratio/low_mean": 0.00029379942054674757,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006559034321753643,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2798.0,
      "completions/mean_length": 575.0692138671875,
      "completions/mean_terminated_length": 539.3438110351562,
      "completions/min_length": 26.0,
      "completions/min_terminated_length": 26.0,
      "epoch": 0.37328667249927094,
      "grad_norm": 0.1328125,
      "learning_rate": 1e-06,
      "loss": -0.0051,
      "num_tokens": 23154177.0,
      "reward": 0.5133928656578064,
      "reward_std": 0.20117954909801483,
      "rewards/verify_math_reward/mean": 0.5133928656578064,
      "rewards/verify_math_reward/std": 0.500099778175354,
      "step": 40
    },
    {
      "clip_ratio/high_max": 0.0015395624159282306,
      "clip_ratio/high_mean": 0.0005284612877858308,
      "clip_ratio/low_mean": 0.0003896172624990868,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009180785582429962,
      "completions/clipped_ratio": 0.021205357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3805.0,
      "completions/mean_length": 670.1361694335938,
      "completions/mean_terminated_length": 595.9155883789062,
      "completions/min_length": 159.0,
      "completions/min_terminated_length": 159.0,
      "epoch": 0.3826188393117527,
      "grad_norm": 0.13671875,
      "learning_rate": 1e-06,
      "loss": 0.0013,
      "num_tokens": 23778579.0,
      "reward": 0.4654017984867096,
      "reward_std": 0.27790629863739014,
      "rewards/verify_math_reward/mean": 0.4654017984867096,
      "rewards/verify_math_reward/std": 0.4990801215171814,
      "step": 41
    },
    {
      "clip_ratio/high_max": 0.0011948465162276989,
      "clip_ratio/high_mean": 0.0003530940239215852,
      "clip_ratio/low_mean": 0.0002722036805380412,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006252977007079608,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3749.0,
      "completions/mean_length": 594.966552734375,
      "completions/mean_terminated_length": 551.4508666992188,
      "completions/min_length": 20.0,
      "completions/min_terminated_length": 20.0,
      "epoch": 0.39195100612423445,
      "grad_norm": 0.12255859375,
      "learning_rate": 1e-06,
      "loss": 0.0018,
      "num_tokens": 24357813.0,
      "reward": 0.5100446939468384,
      "reward_std": 0.201626718044281,
      "rewards/verify_math_reward/mean": 0.5100446343421936,
      "rewards/verify_math_reward/std": 0.5001782774925232,
      "step": 42
    },
    {
      "clip_ratio/high_max": 0.0017167266614706023,
      "clip_ratio/high_mean": 0.0005214383163547609,
      "clip_ratio/low_mean": 0.00025293392923231295,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000774372233536269,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3908.0,
      "completions/mean_length": 591.9486694335938,
      "completions/mean_terminated_length": 544.3823852539062,
      "completions/min_length": 21.0,
      "completions/min_terminated_length": 21.0,
      "epoch": 0.4012831729367162,
      "grad_norm": 0.14453125,
      "learning_rate": 1e-06,
      "loss": -0.0036,
      "num_tokens": 24925183.0,
      "reward": 0.5424107313156128,
      "reward_std": 0.25145599246025085,
      "rewards/verify_math_reward/mean": 0.5424107313156128,
      "rewards/verify_math_reward/std": 0.4984763562679291,
      "step": 43
    },
    {
      "clip_ratio/high_max": 0.0015150593953876523,
      "clip_ratio/high_mean": 0.00045249088111631863,
      "clip_ratio/low_mean": 0.0002036661325064415,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006561570080521051,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3314.0,
      "completions/mean_length": 561.5357666015625,
      "completions/mean_terminated_length": 505.43310546875,
      "completions/min_length": 85.0,
      "completions/min_terminated_length": 85.0,
      "epoch": 0.410615339749198,
      "grad_norm": 0.138671875,
      "learning_rate": 1e-06,
      "loss": -0.0011,
      "num_tokens": 25457167.0,
      "reward": 0.5870535969734192,
      "reward_std": 0.22834154963493347,
      "rewards/verify_math_reward/mean": 0.5870535969734192,
      "rewards/verify_math_reward/std": 0.49263834953308105,
      "step": 44
    },
    {
      "clip_ratio/high_max": 0.0018521121382946149,
      "clip_ratio/high_mean": 0.000525584012393665,
      "clip_ratio/low_mean": 0.00026987602973349567,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007954600423545344,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2604.0,
      "completions/mean_length": 626.5658569335938,
      "completions/mean_terminated_length": 575.4869384765625,
      "completions/min_length": 114.0,
      "completions/min_terminated_length": 114.0,
      "epoch": 0.4199475065616798,
      "grad_norm": 0.1376953125,
      "learning_rate": 1e-06,
      "loss": 0.0236,
      "num_tokens": 26058714.0,
      "reward": 0.5379464626312256,
      "reward_std": 0.24356064200401306,
      "rewards/verify_math_reward/mean": 0.5379464030265808,
      "rewards/verify_math_reward/std": 0.4988364577293396,
      "step": 45
    },
    {
      "clip_ratio/high_max": 0.0013879348462069174,
      "clip_ratio/high_mean": 0.0004509769371452421,
      "clip_ratio/low_mean": 0.0003597295803956513,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008107065295916982,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3997.0,
      "completions/mean_length": 656.5033569335938,
      "completions/mean_terminated_length": 617.682861328125,
      "completions/min_length": 143.0,
      "completions/min_terminated_length": 143.0,
      "epoch": 0.42927967337416156,
      "grad_norm": 0.11865234375,
      "learning_rate": 1e-06,
      "loss": 0.0052,
      "num_tokens": 26694429.0,
      "reward": 0.5379464626312256,
      "reward_std": 0.2376294881105423,
      "rewards/verify_math_reward/mean": 0.5379464030265808,
      "rewards/verify_math_reward/std": 0.4988364577293396,
      "step": 46
    },
    {
      "clip_ratio/high_max": 0.0016000384803191992,
      "clip_ratio/high_mean": 0.0004762422627209162,
      "clip_ratio/low_mean": 0.000366352751825616,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008425950154560269,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3916.0,
      "completions/mean_length": 634.5982666015625,
      "completions/mean_terminated_length": 583.6375732421875,
      "completions/min_length": 114.0,
      "completions/min_terminated_length": 114.0,
      "epoch": 0.43861184018664334,
      "grad_norm": 0.1396484375,
      "learning_rate": 1e-06,
      "loss": 0.0146,
      "num_tokens": 27294677.0,
      "reward": 0.5011160969734192,
      "reward_std": 0.2740648686885834,
      "rewards/verify_math_reward/mean": 0.5011160969734192,
      "rewards/verify_math_reward/std": 0.5002780556678772,
      "step": 47
    },
    {
      "clip_ratio/high_max": 0.0015195509613477043,
      "clip_ratio/high_mean": 0.0004423020870945038,
      "clip_ratio/low_mean": 0.00026836878760150285,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000710670865373686,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2887.0,
      "completions/mean_length": 602.3683471679688,
      "completions/mean_terminated_length": 554.9434814453125,
      "completions/min_length": 12.0,
      "completions/min_terminated_length": 12.0,
      "epoch": 0.4479440069991251,
      "grad_norm": 0.1416015625,
      "learning_rate": 1e-06,
      "loss": 0.0031,
      "num_tokens": 27879895.0,
      "reward": 0.4799107313156128,
      "reward_std": 0.21394869685173035,
      "rewards/verify_math_reward/mean": 0.4799107015132904,
      "rewards/verify_math_reward/std": 0.4998753070831299,
      "step": 48
    },
    {
      "clip_ratio/high_max": 0.0013185698808229063,
      "clip_ratio/high_mean": 0.0004206940443509666,
      "clip_ratio/low_mean": 0.0003416846567461107,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007623787046213693,
      "completions/clipped_ratio": 0.0078125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3555.0,
      "completions/mean_length": 584.9074096679688,
      "completions/mean_terminated_length": 557.260986328125,
      "completions/min_length": 110.0,
      "completions/min_terminated_length": 110.0,
      "epoch": 0.4572761738116069,
      "grad_norm": 0.134765625,
      "learning_rate": 1e-06,
      "loss": 0.0056,
      "num_tokens": 28458316.0,
      "reward": 0.4776785969734192,
      "reward_std": 0.2448740452528,
      "rewards/verify_math_reward/mean": 0.4776785671710968,
      "rewards/verify_math_reward/std": 0.4997805058956146,
      "step": 49
    },
    {
      "clip_ratio/high_max": 0.0017849221330834553,
      "clip_ratio/high_mean": 0.0005301667756612005,
      "clip_ratio/low_mean": 0.00026941004330183205,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007995768182809115,
      "completions/clipped_ratio": 0.024553571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2506.0,
      "completions/mean_length": 627.6607666015625,
      "completions/mean_terminated_length": 540.35693359375,
      "completions/min_length": 14.0,
      "completions/min_terminated_length": 14.0,
      "epoch": 0.4666083406240887,
      "grad_norm": 0.1416015625,
      "learning_rate": 1e-06,
      "loss": 0.0028,
      "num_tokens": 29033124.0,
      "reward": 0.4933035969734192,
      "reward_std": 0.23329952359199524,
      "rewards/verify_math_reward/mean": 0.4933035671710968,
      "rewards/verify_math_reward/std": 0.5002344250679016,
      "step": 50
    },
    {
      "clip_ratio/high_max": 0.0016930396777752321,
      "clip_ratio/high_mean": 0.0004935184060741449,
      "clip_ratio/low_mean": 0.0002914745394946294,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007849929406802403,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3520.0,
      "completions/mean_length": 637.3013916015625,
      "completions/mean_terminated_length": 558.3355712890625,
      "completions/min_length": 121.0,
      "completions/min_terminated_length": 121.0,
      "epoch": 0.4759405074365704,
      "grad_norm": 0.1591796875,
      "learning_rate": 1e-06,
      "loss": 0.0001,
      "num_tokens": 29603434.0,
      "reward": 0.5691964626312256,
      "reward_std": 0.2575739920139313,
      "rewards/verify_math_reward/mean": 0.5691964030265808,
      "rewards/verify_math_reward/std": 0.4954652488231659,
      "step": 51
    },
    {
      "clip_ratio/high_max": 0.0015872932563070208,
      "clip_ratio/high_mean": 0.0005216651053387977,
      "clip_ratio/low_mean": 0.00039137663725341554,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009130417402047897,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4001.0,
      "completions/mean_length": 596.5033569335938,
      "completions/mean_terminated_length": 544.9818725585938,
      "completions/min_length": 68.0,
      "completions/min_terminated_length": 68.0,
      "epoch": 0.4852726742490522,
      "grad_norm": 0.1416015625,
      "learning_rate": 1e-06,
      "loss": 0.0073,
      "num_tokens": 30164341.0,
      "reward": 0.5993303656578064,
      "reward_std": 0.24558943510055542,
      "rewards/verify_math_reward/mean": 0.5993303656578064,
      "rewards/verify_math_reward/std": 0.49030786752700806,
      "step": 52
    },
    {
      "clip_ratio/high_max": 0.0014443692316490342,
      "clip_ratio/high_mean": 0.00046944564087425533,
      "clip_ratio/low_mean": 0.00031746199169901956,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007869076530369057,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2552.0,
      "completions/mean_length": 574.0580444335938,
      "completions/mean_terminated_length": 538.3223876953125,
      "completions/min_length": 18.0,
      "completions/min_terminated_length": 18.0,
      "epoch": 0.49460484106153396,
      "grad_norm": 0.134765625,
      "learning_rate": 1e-06,
      "loss": 0.0067,
      "num_tokens": 30727585.0,
      "reward": 0.5223214626312256,
      "reward_std": 0.25388845801353455,
      "rewards/verify_math_reward/mean": 0.5223214030265808,
      "rewards/verify_math_reward/std": 0.49978047609329224,
      "step": 53
    },
    {
      "clip_ratio/high_max": 0.0015425280453200685,
      "clip_ratio/high_mean": 0.00047079653768378193,
      "clip_ratio/low_mean": 0.0003115816055014875,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007823781465958746,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3843.0,
      "completions/mean_length": 523.1707763671875,
      "completions/mean_terminated_length": 486.9187927246094,
      "completions/min_length": 18.0,
      "completions/min_terminated_length": 18.0,
      "epoch": 0.5039370078740157,
      "grad_norm": 0.1494140625,
      "learning_rate": 1e-06,
      "loss": -0.0093,
      "num_tokens": 31249322.0,
      "reward": 0.5379464626312256,
      "reward_std": 0.245513454079628,
      "rewards/verify_math_reward/mean": 0.5379464030265808,
      "rewards/verify_math_reward/std": 0.4988364279270172,
      "step": 54
    },
    {
      "clip_ratio/high_max": 0.0019663496750581544,
      "clip_ratio/high_mean": 0.0007126980458451726,
      "clip_ratio/low_mean": 0.0002605135380235879,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009732115763654292,
      "completions/clipped_ratio": 0.006696428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2255.0,
      "completions/mean_length": 522.3359375,
      "completions/mean_terminated_length": 498.24383544921875,
      "completions/min_length": 6.0,
      "completions/min_terminated_length": 6.0,
      "epoch": 0.5132691746864976,
      "grad_norm": 0.1669921875,
      "learning_rate": 1e-06,
      "loss": 0.001,
      "num_tokens": 31787151.0,
      "reward": 0.566964328289032,
      "reward_std": 0.25960850715637207,
      "rewards/verify_math_reward/mean": 0.5669642686843872,
      "rewards/verify_math_reward/std": 0.49577224254608154,
      "step": 55
    },
    {
      "clip_ratio/high_max": 0.001540494351502275,
      "clip_ratio/high_mean": 0.00041581053733352746,
      "clip_ratio/low_mean": 0.000380868444835869,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007966789917190908,
      "completions/clipped_ratio": 0.006696428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2734.0,
      "completions/mean_length": 564.703125,
      "completions/mean_terminated_length": 540.8966064453125,
      "completions/min_length": 70.0,
      "completions/min_terminated_length": 70.0,
      "epoch": 0.5226013414989793,
      "grad_norm": 0.1435546875,
      "learning_rate": 1e-06,
      "loss": 0.0157,
      "num_tokens": 32359957.0,
      "reward": 0.4910714626312256,
      "reward_std": 0.21601885557174683,
      "rewards/verify_math_reward/mean": 0.4910714328289032,
      "rewards/verify_math_reward/std": 0.5001994967460632,
      "step": 56
    },
    {
      "clip_ratio/high_max": 0.0012436833549145376,
      "clip_ratio/high_mean": 0.00035380065605750133,
      "clip_ratio/low_mean": 0.0002859475207515061,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006397481815838546,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2337.0,
      "completions/mean_length": 563.638427734375,
      "completions/mean_terminated_length": 519.7333374023438,
      "completions/min_length": 81.0,
      "completions/min_terminated_length": 81.0,
      "epoch": 0.531933508311461,
      "grad_norm": 0.14453125,
      "learning_rate": 1e-06,
      "loss": 0.0005,
      "num_tokens": 32906673.0,
      "reward": 0.543526828289032,
      "reward_std": 0.19925953447818756,
      "rewards/verify_math_reward/mean": 0.5435267686843872,
      "rewards/verify_math_reward/std": 0.49838000535964966,
      "step": 57
    },
    {
      "clip_ratio/high_max": 0.0018532251688156975,
      "clip_ratio/high_mean": 0.0005421158548415406,
      "clip_ratio/low_mean": 0.0003463318034846452,
      "clip_ratio/low_min": 1.0109996765095275e-05,
      "clip_ratio/region_mean": 0.0008884476683306275,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3806.0,
      "completions/mean_length": 598.5826416015625,
      "completions/mean_terminated_length": 539.0352172851562,
      "completions/min_length": 104.0,
      "completions/min_terminated_length": 104.0,
      "epoch": 0.5412656751239429,
      "grad_norm": 0.130859375,
      "learning_rate": 1e-06,
      "loss": -0.0038,
      "num_tokens": 33471427.0,
      "reward": 0.5267857313156128,
      "reward_std": 0.24194666743278503,
      "rewards/verify_math_reward/mean": 0.5267857313156128,
      "rewards/verify_math_reward/std": 0.4995608329772949,
      "step": 58
    },
    {
      "clip_ratio/high_max": 0.0015568905928375898,
      "clip_ratio/high_mean": 0.0004729019640308252,
      "clip_ratio/low_mean": 0.00026004129472312343,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007329432628466748,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3615.0,
      "completions/mean_length": 582.3270263671875,
      "completions/mean_terminated_length": 518.4420166015625,
      "completions/min_length": 88.0,
      "completions/min_terminated_length": 88.0,
      "epoch": 0.5505978419364246,
      "grad_norm": 0.1474609375,
      "learning_rate": 1e-06,
      "loss": -0.0135,
      "num_tokens": 34021800.0,
      "reward": 0.543526828289032,
      "reward_std": 0.208427295088768,
      "rewards/verify_math_reward/mean": 0.5435267686843872,
      "rewards/verify_math_reward/std": 0.49838000535964966,
      "step": 59
    },
    {
      "clip_ratio/high_max": 0.0015631410460628103,
      "clip_ratio/high_mean": 0.0004982794935131096,
      "clip_ratio/low_mean": 0.0003381057590559067,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008363852512047742,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3014.0,
      "completions/mean_length": 567.2801513671875,
      "completions/mean_terminated_length": 519.3789672851562,
      "completions/min_length": 98.0,
      "completions/min_terminated_length": 98.0,
      "epoch": 0.5599300087489064,
      "grad_norm": 0.140625,
      "learning_rate": 1e-06,
      "loss": 0.0014,
      "num_tokens": 34565939.0,
      "reward": 0.546875,
      "reward_std": 0.22668297588825226,
      "rewards/verify_math_reward/mean": 0.546875,
      "rewards/verify_math_reward/std": 0.4980759024620056,
      "step": 60
    },
    {
      "clip_ratio/high_max": 0.0014611678398068761,
      "clip_ratio/high_mean": 0.0004804958073236776,
      "clip_ratio/low_mean": 0.00039822009489398624,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008787158944869589,
      "completions/clipped_ratio": 0.0234375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3113.0,
      "completions/mean_length": 670.9542846679688,
      "completions/mean_terminated_length": 588.7531127929688,
      "completions/min_length": 21.0,
      "completions/min_terminated_length": 21.0,
      "epoch": 0.5692621755613881,
      "grad_norm": 0.1357421875,
      "learning_rate": 1e-06,
      "loss": -0.0028,
      "num_tokens": 35165122.0,
      "reward": 0.5078125,
      "reward_std": 0.2520155608654022,
      "rewards/verify_math_reward/mean": 0.5078125,
      "rewards/verify_math_reward/std": 0.5002182126045227,
      "step": 61
    },
    {
      "clip_ratio/high_max": 0.0016026523899199674,
      "clip_ratio/high_mean": 0.0004569236366478435,
      "clip_ratio/low_mean": 0.00040043612898443826,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008573597569920821,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3630.0,
      "completions/mean_length": 640.9319458007812,
      "completions/mean_terminated_length": 590.0645141601562,
      "completions/min_length": 139.0,
      "completions/min_terminated_length": 139.0,
      "epoch": 0.57859434237387,
      "grad_norm": 0.130859375,
      "learning_rate": 1e-06,
      "loss": 0.014,
      "num_tokens": 35770117.0,
      "reward": 0.4843750298023224,
      "reward_std": 0.28068071603775024,
      "rewards/verify_math_reward/mean": 0.484375,
      "rewards/verify_math_reward/std": 0.5000349283218384,
      "step": 62
    },
    {
      "clip_ratio/high_max": 0.0015333942537836265,
      "clip_ratio/high_mean": 0.000492742325150175,
      "clip_ratio/low_mean": 0.00027141847272105224,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007641608012818324,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2655.0,
      "completions/mean_length": 582.247802734375,
      "completions/mean_terminated_length": 530.5164184570312,
      "completions/min_length": 35.0,
      "completions/min_terminated_length": 35.0,
      "epoch": 0.5879265091863517,
      "grad_norm": 0.1533203125,
      "learning_rate": 1e-06,
      "loss": -0.0099,
      "num_tokens": 36317219.0,
      "reward": 0.578125,
      "reward_std": 0.24972325563430786,
      "rewards/verify_math_reward/mean": 0.578125,
      "rewards/verify_math_reward/std": 0.4941346049308777,
      "step": 63
    },
    {
      "clip_ratio/high_max": 0.0017877031059470028,
      "clip_ratio/high_mean": 0.0005220111013386486,
      "clip_ratio/low_mean": 0.00028729273242333875,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000809303836831532,
      "completions/clipped_ratio": 0.0234375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3996.0,
      "completions/mean_length": 646.6663208007812,
      "completions/mean_terminated_length": 563.8822631835938,
      "completions/min_length": 23.0,
      "completions/min_terminated_length": 23.0,
      "epoch": 0.5972586759988335,
      "grad_norm": 0.15234375,
      "learning_rate": 1e-06,
      "loss": -0.013,
      "num_tokens": 36900384.0,
      "reward": 0.5424107313156128,
      "reward_std": 0.2383444458246231,
      "rewards/verify_math_reward/mean": 0.5424107313156128,
      "rewards/verify_math_reward/std": 0.4984763264656067,
      "step": 64
    },
    {
      "clip_ratio/high_max": 0.0015150777999224374,
      "clip_ratio/high_mean": 0.00041612504696786345,
      "clip_ratio/low_mean": 0.00030822398287000397,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007243490281325649,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3929.0,
      "completions/mean_length": 616.5736694335938,
      "completions/mean_terminated_length": 581.2694091796875,
      "completions/min_length": 89.0,
      "completions/min_terminated_length": 89.0,
      "epoch": 0.6065908428113153,
      "grad_norm": 0.134765625,
      "learning_rate": 1e-06,
      "loss": -0.0026,
      "num_tokens": 37513650.0,
      "reward": 0.5133928656578064,
      "reward_std": 0.22654101252555847,
      "rewards/verify_math_reward/mean": 0.5133928656578064,
      "rewards/verify_math_reward/std": 0.500099778175354,
      "step": 65
    },
    {
      "clip_ratio/high_max": 0.001474865075579146,
      "clip_ratio/high_mean": 0.0003922959596138753,
      "clip_ratio/low_mean": 0.00031516578314949584,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007074617333273636,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3326.0,
      "completions/mean_length": 567.1607666015625,
      "completions/mean_terminated_length": 515.2072143554688,
      "completions/min_length": 107.0,
      "completions/min_terminated_length": 107.0,
      "epoch": 0.615923009623797,
      "grad_norm": 0.1298828125,
      "learning_rate": 1e-06,
      "loss": 0.0003,
      "num_tokens": 38067114.0,
      "reward": 0.5580357313156128,
      "reward_std": 0.20436903834342957,
      "rewards/verify_math_reward/mean": 0.5580357313156128,
      "rewards/verify_math_reward/std": 0.49689781665802,
      "step": 66
    },
    {
      "clip_ratio/high_max": 0.0016830643362482078,
      "clip_ratio/high_mean": 0.00047929826359904837,
      "clip_ratio/low_mean": 0.0003632792582948241,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008425775249634171,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2716.0,
      "completions/mean_length": 655.7623291015625,
      "completions/mean_terminated_length": 609.062255859375,
      "completions/min_length": 91.0,
      "completions/min_terminated_length": 91.0,
      "epoch": 0.6252551764362788,
      "grad_norm": 0.1279296875,
      "learning_rate": 1e-06,
      "loss": -0.0102,
      "num_tokens": 38701197.0,
      "reward": 0.478794664144516,
      "reward_std": 0.25655919313430786,
      "rewards/verify_math_reward/mean": 0.4787946343421936,
      "rewards/verify_math_reward/std": 0.49982914328575134,
      "step": 67
    },
    {
      "clip_ratio/high_max": 0.0013804803229504614,
      "clip_ratio/high_mean": 0.0003894913461408578,
      "clip_ratio/low_mean": 0.0002950324698076656,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006845238103778684,
      "completions/clipped_ratio": 0.006696428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3800.0,
      "completions/mean_length": 567.6183471679688,
      "completions/mean_terminated_length": 543.8314819335938,
      "completions/min_length": 3.0,
      "completions/min_terminated_length": 3.0,
      "epoch": 0.6345873432487605,
      "grad_norm": 0.1279296875,
      "learning_rate": 1e-06,
      "loss": 0.0097,
      "num_tokens": 39268831.0,
      "reward": 0.5189732313156128,
      "reward_std": 0.20478273928165436,
      "rewards/verify_math_reward/mean": 0.5189732313156128,
      "rewards/verify_math_reward/std": 0.49991893768310547,
      "step": 68
    },
    {
      "clip_ratio/high_max": 0.001562234175253252,
      "clip_ratio/high_mean": 0.0004804975578736048,
      "clip_ratio/low_mean": 0.00032069702626813523,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008011945865291636,
      "completions/clipped_ratio": 0.021205357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3845.0,
      "completions/mean_length": 655.90625,
      "completions/mean_terminated_length": 581.3773803710938,
      "completions/min_length": 129.0,
      "completions/min_terminated_length": 129.0,
      "epoch": 0.6439195100612424,
      "grad_norm": 0.1318359375,
      "learning_rate": 1e-06,
      "loss": 0.0025,
      "num_tokens": 39869067.0,
      "reward": 0.5334821939468384,
      "reward_std": 0.25821453332901,
      "rewards/verify_math_reward/mean": 0.5334821343421936,
      "rewards/verify_math_reward/std": 0.49915632605552673,
      "step": 69
    },
    {
      "clip_ratio/high_max": 0.0017730849103827495,
      "clip_ratio/high_mean": 0.000578458475501975,
      "clip_ratio/low_mean": 0.00031695155996658286,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008954100294431555,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3794.0,
      "completions/mean_length": 615.0145263671875,
      "completions/mean_terminated_length": 571.748046875,
      "completions/min_length": 74.0,
      "completions/min_terminated_length": 74.0,
      "epoch": 0.6532516768737241,
      "grad_norm": 0.1376953125,
      "learning_rate": 1e-06,
      "loss": 0.0011,
      "num_tokens": 40465104.0,
      "reward": 0.5290178656578064,
      "reward_std": 0.2776102125644684,
      "rewards/verify_math_reward/mean": 0.5290178656578064,
      "rewards/verify_math_reward/std": 0.49943605065345764,
      "step": 70
    },
    {
      "clip_ratio/high_max": 0.0014781976860831492,
      "clip_ratio/high_mean": 0.0004499135351352379,
      "clip_ratio/low_mean": 0.00035473589559842367,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008046494249356329,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2857.0,
      "completions/mean_length": 590.0324096679688,
      "completions/mean_terminated_length": 554.4588012695312,
      "completions/min_length": 88.0,
      "completions/min_terminated_length": 88.0,
      "epoch": 0.6625838436862059,
      "grad_norm": 0.1396484375,
      "learning_rate": 1e-06,
      "loss": 0.0085,
      "num_tokens": 41056493.0,
      "reward": 0.5111607313156128,
      "reward_std": 0.2310115545988083,
      "rewards/verify_math_reward/mean": 0.5111607313156128,
      "rewards/verify_math_reward/std": 0.5001546144485474,
      "step": 71
    },
    {
      "clip_ratio/high_max": 0.0014062314075999893,
      "clip_ratio/high_mean": 0.00042140097184528713,
      "clip_ratio/low_mean": 0.00031373776926102437,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007351387566814083,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3267.0,
      "completions/mean_length": 617.328125,
      "completions/mean_terminated_length": 574.0903930664062,
      "completions/min_length": 80.0,
      "completions/min_terminated_length": 80.0,
      "epoch": 0.6719160104986877,
      "grad_norm": 0.1240234375,
      "learning_rate": 1e-06,
      "loss": 0.0066,
      "num_tokens": 41662003.0,
      "reward": 0.4832589626312256,
      "reward_std": 0.2109428197145462,
      "rewards/verify_math_reward/mean": 0.4832589328289032,
      "rewards/verify_math_reward/std": 0.4999987483024597,
      "step": 72
    },
    {
      "clip_ratio/high_max": 0.0015306857339965063,
      "clip_ratio/high_mean": 0.00042900978519355704,
      "clip_ratio/low_mean": 0.00036409031235962175,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007931000982352998,
      "completions/clipped_ratio": 0.0200892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2582.0,
      "completions/mean_length": 622.6986694335938,
      "completions/mean_terminated_length": 551.4920654296875,
      "completions/min_length": 29.0,
      "completions/min_terminated_length": 29.0,
      "epoch": 0.6812481773111695,
      "grad_norm": 0.1337890625,
      "learning_rate": 1e-06,
      "loss": -0.0017,
      "num_tokens": 42242989.0,
      "reward": 0.5189732313156128,
      "reward_std": 0.22252734005451202,
      "rewards/verify_math_reward/mean": 0.5189732313156128,
      "rewards/verify_math_reward/std": 0.49991893768310547,
      "step": 73
    },
    {
      "clip_ratio/high_max": 0.0016175444525288185,
      "clip_ratio/high_mean": 0.0004853530490436242,
      "clip_ratio/low_mean": 0.00029653505043825135,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007818881131242961,
      "completions/clipped_ratio": 0.021205357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3468.0,
      "completions/mean_length": 656.9174194335938,
      "completions/mean_terminated_length": 582.4104614257812,
      "completions/min_length": 147.0,
      "completions/min_terminated_length": 147.0,
      "epoch": 0.6905803441236512,
      "grad_norm": 0.1357421875,
      "learning_rate": 1e-06,
      "loss": 0.004,
      "num_tokens": 42843467.0,
      "reward": 0.5133928656578064,
      "reward_std": 0.2449902892112732,
      "rewards/verify_math_reward/mean": 0.5133928656578064,
      "rewards/verify_math_reward/std": 0.500099778175354,
      "step": 74
    },
    {
      "clip_ratio/high_max": 0.0015286737470887601,
      "clip_ratio/high_mean": 0.00041930390784727933,
      "clip_ratio/low_mean": 0.00022943636531636002,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006487402688435395,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3826.0,
      "completions/mean_length": 641.3449096679688,
      "completions/mean_terminated_length": 590.4835815429688,
      "completions/min_length": 150.0,
      "completions/min_terminated_length": 150.0,
      "epoch": 0.6999125109361329,
      "grad_norm": 0.12158203125,
      "learning_rate": 1e-06,
      "loss": -0.0038,
      "num_tokens": 43449928.0,
      "reward": 0.512276828289032,
      "reward_std": 0.18021291494369507,
      "rewards/verify_math_reward/mean": 0.5122767686843872,
      "rewards/verify_math_reward/std": 0.500128448009491,
      "step": 75
    },
    {
      "clip_ratio/high_max": 0.0014769398867429118,
      "clip_ratio/high_mean": 0.0004729157137717266,
      "clip_ratio/low_mean": 0.0003285410001581113,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008014567233658454,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3077.0,
      "completions/mean_length": 662.3928833007812,
      "completions/mean_terminated_length": 599.963623046875,
      "completions/min_length": 49.0,
      "completions/min_terminated_length": 49.0,
      "epoch": 0.7092446777486148,
      "grad_norm": 0.1455078125,
      "learning_rate": 1e-06,
      "loss": 0.0039,
      "num_tokens": 44071136.0,
      "reward": 0.4810267984867096,
      "reward_std": 0.2617064416408539,
      "rewards/verify_math_reward/mean": 0.4810267984867096,
      "rewards/verify_math_reward/std": 0.49991893768310547,
      "step": 76
    },
    {
      "clip_ratio/high_max": 0.0018359375881118467,
      "clip_ratio/high_mean": 0.0005651184619637206,
      "clip_ratio/low_mean": 0.0004314097777751158,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009965282588382252,
      "completions/clipped_ratio": 0.0200892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4045.0,
      "completions/mean_length": 630.4140625,
      "completions/mean_terminated_length": 559.3656005859375,
      "completions/min_length": 24.0,
      "completions/min_terminated_length": 24.0,
      "epoch": 0.7185768445610965,
      "grad_norm": 0.14453125,
      "learning_rate": 1e-06,
      "loss": 0.0077,
      "num_tokens": 44653883.0,
      "reward": 0.5066964626312256,
      "reward_std": 0.2591606676578522,
      "rewards/verify_math_reward/mean": 0.5066964030265808,
      "rewards/verify_math_reward/std": 0.5002344250679016,
      "step": 77
    },
    {
      "clip_ratio/high_max": 0.0014776905736653134,
      "clip_ratio/high_mean": 0.00043647162488014146,
      "clip_ratio/low_mean": 0.000357973758582375,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007944453647041883,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2043.0,
      "completions/mean_length": 583.5513916015625,
      "completions/mean_terminated_length": 539.893798828125,
      "completions/min_length": 85.0,
      "completions/min_terminated_length": 85.0,
      "epoch": 0.7279090113735783,
      "grad_norm": 0.1396484375,
      "learning_rate": 1e-06,
      "loss": -0.0092,
      "num_tokens": 45217601.0,
      "reward": 0.5647321939468384,
      "reward_std": 0.2161722332239151,
      "rewards/verify_math_reward/mean": 0.5647321343421936,
      "rewards/verify_math_reward/std": 0.49606892466545105,
      "step": 78
    },
    {
      "clip_ratio/high_max": 0.0017077371121558826,
      "clip_ratio/high_mean": 0.000597816728259204,
      "clip_ratio/low_mean": 0.00041782399648582214,
      "clip_ratio/low_min": 8.30675162433181e-06,
      "clip_ratio/region_mean": 0.0010156407151953317,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2142.0,
      "completions/mean_length": 576.6707763671875,
      "completions/mean_terminated_length": 532.9276733398438,
      "completions/min_length": 39.0,
      "completions/min_terminated_length": 39.0,
      "epoch": 0.73724117818606,
      "grad_norm": 0.150390625,
      "learning_rate": 1e-06,
      "loss": -0.0014,
      "num_tokens": 45778330.0,
      "reward": 0.5491071939468384,
      "reward_std": 0.2764067053794861,
      "rewards/verify_math_reward/mean": 0.5491071343421936,
      "rewards/verify_math_reward/std": 0.49786055088043213,
      "step": 79
    },
    {
      "clip_ratio/high_max": 0.0015504330685871537,
      "clip_ratio/high_mean": 0.00042385151959933864,
      "clip_ratio/low_mean": 0.0004107067993572855,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008345583219124819,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4092.0,
      "completions/mean_length": 574.9442138671875,
      "completions/mean_terminated_length": 531.1796875,
      "completions/min_length": 18.0,
      "completions/min_terminated_length": 18.0,
      "epoch": 0.7465733449985419,
      "grad_norm": 0.1455078125,
      "learning_rate": 1e-06,
      "loss": 0.0116,
      "num_tokens": 46338472.0,
      "reward": 0.5602678656578064,
      "reward_std": 0.27185171842575073,
      "rewards/verify_math_reward/mean": 0.5602678656578064,
      "rewards/verify_math_reward/std": 0.4966317117214203,
      "step": 80
    },
    {
      "clip_ratio/high_max": 0.0018751709330899757,
      "clip_ratio/high_mean": 0.000534818087999156,
      "clip_ratio/low_mean": 0.00041298053474747576,
      "clip_ratio/low_min": 1.201923078042455e-05,
      "clip_ratio/region_mean": 0.0009477986250203685,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1882.0,
      "completions/mean_length": 644.7142944335938,
      "completions/mean_terminated_length": 593.902587890625,
      "completions/min_length": 32.0,
      "completions/min_terminated_length": 32.0,
      "epoch": 0.7559055118110236,
      "grad_norm": 0.14453125,
      "learning_rate": 1e-06,
      "loss": 0.0086,
      "num_tokens": 46958704.0,
      "reward": 0.5111607313156128,
      "reward_std": 0.2724490761756897,
      "rewards/verify_math_reward/mean": 0.5111607313156128,
      "rewards/verify_math_reward/std": 0.5001546144485474,
      "step": 81
    },
    {
      "clip_ratio/high_max": 0.001774657021087478,
      "clip_ratio/high_mean": 0.0005811095738863514,
      "clip_ratio/low_mean": 0.00030802302762822364,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000889132607881038,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4069.0,
      "completions/mean_length": 642.3326416015625,
      "completions/mean_terminated_length": 563.481689453125,
      "completions/min_length": 133.0,
      "completions/min_terminated_length": 133.0,
      "epoch": 0.7652376786235054,
      "grad_norm": 0.140625,
      "learning_rate": 1e-06,
      "loss": 0.0095,
      "num_tokens": 47546482.0,
      "reward": 0.543526828289032,
      "reward_std": 0.22124601900577545,
      "rewards/verify_math_reward/mean": 0.5435267686843872,
      "rewards/verify_math_reward/std": 0.49838000535964966,
      "step": 82
    },
    {
      "clip_ratio/high_max": 0.0017228152819370735,
      "clip_ratio/high_mean": 0.0005372254679514299,
      "clip_ratio/low_mean": 0.0002621714993438218,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007993969729795936,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2364.0,
      "completions/mean_length": 585.703125,
      "completions/mean_terminated_length": 550.0856323242188,
      "completions/min_length": 79.0,
      "completions/min_terminated_length": 79.0,
      "epoch": 0.7745698454359872,
      "grad_norm": 0.134765625,
      "learning_rate": 1e-06,
      "loss": -0.0122,
      "num_tokens": 48119624.0,
      "reward": 0.6116071939468384,
      "reward_std": 0.21981680393218994,
      "rewards/verify_math_reward/mean": 0.6116071343421936,
      "rewards/verify_math_reward/std": 0.48765692114830017,
      "step": 83
    },
    {
      "clip_ratio/high_max": 0.0015164682645263383,
      "clip_ratio/high_mean": 0.0003703758887922959,
      "clip_ratio/low_mean": 0.00025774244113563327,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006281183268583845,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4013.0,
      "completions/mean_length": 631.4330444335938,
      "completions/mean_terminated_length": 584.4027099609375,
      "completions/min_length": 55.0,
      "completions/min_terminated_length": 55.0,
      "epoch": 0.7839020122484689,
      "grad_norm": 0.12158203125,
      "learning_rate": 1e-06,
      "loss": -0.0057,
      "num_tokens": 48731852.0,
      "reward": 0.4508928656578064,
      "reward_std": 0.20778609812259674,
      "rewards/verify_math_reward/mean": 0.4508928656578064,
      "rewards/verify_math_reward/std": 0.49786055088043213,
      "step": 84
    },
    {
      "clip_ratio/high_max": 0.001654619662076584,
      "clip_ratio/high_mean": 0.0005539177609534818,
      "clip_ratio/low_mean": 0.00027753965173360484,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008314574142787023,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3794.0,
      "completions/mean_length": 609.1239013671875,
      "completions/mean_terminated_length": 549.7559814453125,
      "completions/min_length": 29.0,
      "completions/min_terminated_length": 29.0,
      "epoch": 0.7932341790609507,
      "grad_norm": 0.1435546875,
      "learning_rate": 1e-06,
      "loss": -0.0047,
      "num_tokens": 49305915.0,
      "reward": 0.520089328289032,
      "reward_std": 0.22800637781620026,
      "rewards/verify_math_reward/mean": 0.5200892686843872,
      "rewards/verify_math_reward/std": 0.4998753070831299,
      "step": 85
    },
    {
      "clip_ratio/high_max": 0.0014105494983596145,
      "clip_ratio/high_mean": 0.0004022495686513139,
      "clip_ratio/low_mean": 0.00031966669246230595,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007219162589535699,
      "completions/clipped_ratio": 0.004464285714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2128.0,
      "completions/mean_length": 560.828125,
      "completions/mean_terminated_length": 544.975341796875,
      "completions/min_length": 7.0,
      "completions/min_terminated_length": 7.0,
      "epoch": 0.8025663458734325,
      "grad_norm": 0.12890625,
      "learning_rate": 1e-06,
      "loss": -0.0073,
      "num_tokens": 49878897.0,
      "reward": 0.5703125,
      "reward_std": 0.2051176279783249,
      "rewards/verify_math_reward/mean": 0.5703125,
      "rewards/verify_math_reward/std": 0.49530795216560364,
      "step": 86
    },
    {
      "clip_ratio/high_max": 0.0018720411289905314,
      "clip_ratio/high_mean": 0.0005458629669874426,
      "clip_ratio/low_mean": 0.0002696333073117785,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008154962742992211,
      "completions/clipped_ratio": 0.0078125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2253.0,
      "completions/mean_length": 522.9933471679688,
      "completions/mean_terminated_length": 494.8594055175781,
      "completions/min_length": 16.0,
      "completions/min_terminated_length": 16.0,
      "epoch": 0.8118985126859143,
      "grad_norm": 0.14453125,
      "learning_rate": 1e-06,
      "loss": 0.0133,
      "num_tokens": 50399491.0,
      "reward": 0.5848214626312256,
      "reward_std": 0.21879789233207703,
      "rewards/verify_math_reward/mean": 0.5848214030265808,
      "rewards/verify_math_reward/std": 0.49302801489830017,
      "step": 87
    },
    {
      "clip_ratio/high_max": 0.0016477267345180735,
      "clip_ratio/high_mean": 0.0004959221550961956,
      "clip_ratio/low_mean": 0.00031129847548072576,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008072206387623737,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2472.0,
      "completions/mean_length": 572.3092041015625,
      "completions/mean_terminated_length": 512.314453125,
      "completions/min_length": 15.0,
      "completions/min_terminated_length": 15.0,
      "epoch": 0.821230679498396,
      "grad_norm": 0.138671875,
      "learning_rate": 1e-06,
      "loss": 0.007,
      "num_tokens": 50930912.0,
      "reward": 0.6071428656578064,
      "reward_std": 0.20921824872493744,
      "rewards/verify_math_reward/mean": 0.6071428656578064,
      "rewards/verify_math_reward/std": 0.48865827918052673,
      "step": 88
    },
    {
      "clip_ratio/high_max": 0.0017612755818845471,
      "clip_ratio/high_mean": 0.00045294952428776014,
      "clip_ratio/low_mean": 0.0003718053224019968,
      "clip_ratio/low_min": 2.210980892414227e-05,
      "clip_ratio/region_mean": 0.0008247548489634937,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2488.0,
      "completions/mean_length": 598.7467041015625,
      "completions/mean_terminated_length": 555.2779541015625,
      "completions/min_length": 122.0,
      "completions/min_terminated_length": 122.0,
      "epoch": 0.8305628463108778,
      "grad_norm": 0.1416015625,
      "learning_rate": 1e-06,
      "loss": -0.008,
      "num_tokens": 51524181.0,
      "reward": 0.5078125,
      "reward_std": 0.21928435564041138,
      "rewards/verify_math_reward/mean": 0.5078125,
      "rewards/verify_math_reward/std": 0.5002182126045227,
      "step": 89
    },
    {
      "clip_ratio/high_max": 0.0015908889745333,
      "clip_ratio/high_mean": 0.0004882191981323558,
      "clip_ratio/low_mean": 0.00028090034481920156,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007691195569350384,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2957.0,
      "completions/mean_length": 603.0725708007812,
      "completions/mean_terminated_length": 559.6576538085938,
      "completions/min_length": 14.0,
      "completions/min_terminated_length": 14.0,
      "epoch": 0.8398950131233596,
      "grad_norm": 0.1328125,
      "learning_rate": 1e-06,
      "loss": 0.0088,
      "num_tokens": 52107030.0,
      "reward": 0.5178571939468384,
      "reward_std": 0.22804872691631317,
      "rewards/verify_math_reward/mean": 0.5178571343421936,
      "rewards/verify_math_reward/std": 0.4999600946903229,
      "step": 90
    },
    {
      "clip_ratio/high_max": 0.0013870225702703465,
      "clip_ratio/high_mean": 0.00045190190371613426,
      "clip_ratio/low_mean": 0.00035744704405260563,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008093489577731816,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3973.0,
      "completions/mean_length": 609.4330444335938,
      "completions/mean_terminated_length": 550.0703735351562,
      "completions/min_length": 20.0,
      "completions/min_terminated_length": 20.0,
      "epoch": 0.8492271799358414,
      "grad_norm": 0.1396484375,
      "learning_rate": 1e-06,
      "loss": -0.0079,
      "num_tokens": 52679602.0,
      "reward": 0.5647321939468384,
      "reward_std": 0.2426195591688156,
      "rewards/verify_math_reward/mean": 0.5647321343421936,
      "rewards/verify_math_reward/std": 0.49606895446777344,
      "step": 91
    },
    {
      "clip_ratio/high_max": 0.001600240262632724,
      "clip_ratio/high_mean": 0.0005148330510564847,
      "clip_ratio/low_mean": 0.0003026256623570589,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008174586901077419,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2956.0,
      "completions/mean_length": 613.90625,
      "completions/mean_terminated_length": 558.6349487304688,
      "completions/min_length": 56.0,
      "completions/min_terminated_length": 56.0,
      "epoch": 0.8585593467483231,
      "grad_norm": 0.1279296875,
      "learning_rate": 1e-06,
      "loss": 0.0042,
      "num_tokens": 53259398.0,
      "reward": 0.5848214626312256,
      "reward_std": 0.21797305345535278,
      "rewards/verify_math_reward/mean": 0.5848214030265808,
      "rewards/verify_math_reward/std": 0.49302801489830017,
      "step": 92
    },
    {
      "clip_ratio/high_max": 0.0018657794225873658,
      "clip_ratio/high_mean": 0.0006483363099505368,
      "clip_ratio/low_mean": 0.00034770081265378394,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009960370989574585,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3717.0,
      "completions/mean_length": 586.0256958007812,
      "completions/mean_terminated_length": 542.3988647460938,
      "completions/min_length": 129.0,
      "completions/min_terminated_length": 129.0,
      "epoch": 0.8678915135608049,
      "grad_norm": 0.1533203125,
      "learning_rate": 1e-06,
      "loss": 0.0216,
      "num_tokens": 53836957.0,
      "reward": 0.5647321939468384,
      "reward_std": 0.2878982126712799,
      "rewards/verify_math_reward/mean": 0.5647321343421936,
      "rewards/verify_math_reward/std": 0.49606895446777344,
      "step": 93
    },
    {
      "clip_ratio/high_max": 0.0016519532255188096,
      "clip_ratio/high_mean": 0.0005057442658653599,
      "clip_ratio/low_mean": 0.0003273701208854618,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008331143963005161,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4040.0,
      "completions/mean_length": 611.1116333007812,
      "completions/mean_terminated_length": 563.8054809570312,
      "completions/min_length": 74.0,
      "completions/min_terminated_length": 74.0,
      "epoch": 0.8772236803732867,
      "grad_norm": 0.1435546875,
      "learning_rate": 1e-06,
      "loss": 0.0194,
      "num_tokens": 54412113.0,
      "reward": 0.5424107313156128,
      "reward_std": 0.23409229516983032,
      "rewards/verify_math_reward/mean": 0.5424107313156128,
      "rewards/verify_math_reward/std": 0.4984763562679291,
      "step": 94
    },
    {
      "clip_ratio/high_max": 0.0016927642309383373,
      "clip_ratio/high_mean": 0.000469393598791612,
      "clip_ratio/low_mean": 0.00029131326095921395,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007607068546349183,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3994.0,
      "completions/mean_length": 648.1741333007812,
      "completions/mean_terminated_length": 589.4710693359375,
      "completions/min_length": 123.0,
      "completions/min_terminated_length": 123.0,
      "epoch": 0.8865558471857684,
      "grad_norm": 0.12109375,
      "learning_rate": 1e-06,
      "loss": -0.012,
      "num_tokens": 55018933.0,
      "reward": 0.5089285969734192,
      "reward_std": 0.20644059777259827,
      "rewards/verify_math_reward/mean": 0.5089285969734192,
      "rewards/verify_math_reward/std": 0.5001994967460632,
      "step": 95
    },
    {
      "clip_ratio/high_max": 0.0017833485844676034,
      "clip_ratio/high_mean": 0.0006488943522526824,
      "clip_ratio/low_mean": 0.00030729869297374535,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009561930583004141,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2564.0,
      "completions/mean_length": 627.1004638671875,
      "completions/mean_terminated_length": 560.0113525390625,
      "completions/min_length": 101.0,
      "completions/min_terminated_length": 101.0,
      "epoch": 0.8958880139982502,
      "grad_norm": 0.1474609375,
      "learning_rate": 1e-06,
      "loss": -0.0016,
      "num_tokens": 55594639.0,
      "reward": 0.5546875,
      "reward_std": 0.26103493571281433,
      "rewards/verify_math_reward/mean": 0.5546875,
      "rewards/verify_math_reward/std": 0.4972778558731079,
      "step": 96
    },
    {
      "clip_ratio/high_max": 0.0013815107549817185,
      "clip_ratio/high_mean": 0.00044456125920078193,
      "clip_ratio/low_mean": 0.0003650004285873365,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000809561681762716,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3633.0,
      "completions/mean_length": 610.333740234375,
      "completions/mean_terminated_length": 574.9661254882812,
      "completions/min_length": 146.0,
      "completions/min_terminated_length": 146.0,
      "epoch": 0.905220180810732,
      "grad_norm": 0.138671875,
      "learning_rate": 1e-06,
      "loss": 0.0125,
      "num_tokens": 56195930.0,
      "reward": 0.5290178656578064,
      "reward_std": 0.22154095768928528,
      "rewards/verify_math_reward/mean": 0.5290178656578064,
      "rewards/verify_math_reward/std": 0.49943605065345764,
      "step": 97
    },
    {
      "clip_ratio/high_max": 0.0015758289937366499,
      "clip_ratio/high_mean": 0.00048606450809529633,
      "clip_ratio/low_mean": 0.00035013958085983177,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008362040998690645,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3612.0,
      "completions/mean_length": 678.099365234375,
      "completions/mean_terminated_length": 615.9556884765625,
      "completions/min_length": 39.0,
      "completions/min_terminated_length": 39.0,
      "epoch": 0.9145523476232138,
      "grad_norm": 0.123046875,
      "learning_rate": 1e-06,
      "loss": 0.0026,
      "num_tokens": 56823899.0,
      "reward": 0.4698660969734192,
      "reward_std": 0.225752592086792,
      "rewards/verify_math_reward/mean": 0.4698660671710968,
      "rewards/verify_math_reward/std": 0.49936988949775696,
      "step": 98
    },
    {
      "clip_ratio/high_max": 0.0015351482543337625,
      "clip_ratio/high_mean": 0.0004317303132665984,
      "clip_ratio/low_mean": 0.0002192336081634494,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000650963912903535,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3610.0,
      "completions/mean_length": 640.6105346679688,
      "completions/mean_terminated_length": 577.7852172851562,
      "completions/min_length": 80.0,
      "completions/min_terminated_length": 80.0,
      "epoch": 0.9238845144356955,
      "grad_norm": 0.12451171875,
      "learning_rate": 1e-06,
      "loss": 0.0173,
      "num_tokens": 57431566.0,
      "reward": 0.4843750298023224,
      "reward_std": 0.2132733017206192,
      "rewards/verify_math_reward/mean": 0.484375,
      "rewards/verify_math_reward/std": 0.5000349283218384,
      "step": 99
    },
    {
      "clip_ratio/high_max": 0.001455231406907842,
      "clip_ratio/high_mean": 0.0004697137283073971,
      "clip_ratio/low_mean": 0.00026407662903693563,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007337903650750377,
      "completions/clipped_ratio": 0.021205357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2518.0,
      "completions/mean_length": 644.2890625,
      "completions/mean_terminated_length": 569.508544921875,
      "completions/min_length": 71.0,
      "completions/min_terminated_length": 71.0,
      "epoch": 0.9332166812481774,
      "grad_norm": 0.130859375,
      "learning_rate": 1e-06,
      "loss": 0.0143,
      "num_tokens": 58021289.0,
      "reward": 0.53125,
      "reward_std": 0.2083178460597992,
      "rewards/verify_math_reward/mean": 0.53125,
      "rewards/verify_math_reward/std": 0.4993011951446533,
      "step": 100
    },
    {
      "clip_ratio/high_max": 0.001401610332322889,
      "clip_ratio/high_mean": 0.0004110899087663711,
      "clip_ratio/low_mean": 0.0003003903148055542,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000711480231530004,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3700.0,
      "completions/mean_length": 611.7701416015625,
      "completions/mean_terminated_length": 560.473388671875,
      "completions/min_length": 53.0,
      "completions/min_terminated_length": 53.0,
      "epoch": 0.9425488480606591,
      "grad_norm": 0.12255859375,
      "learning_rate": 1e-06,
      "loss": -0.0039,
      "num_tokens": 58601707.0,
      "reward": 0.527901828289032,
      "reward_std": 0.21383923292160034,
      "rewards/verify_math_reward/mean": 0.5279017686843872,
      "rewards/verify_math_reward/std": 0.49949970841407776,
      "step": 101
    },
    {
      "clip_ratio/high_max": 0.0014903666742611676,
      "clip_ratio/high_mean": 0.0004585197335700286,
      "clip_ratio/low_mean": 0.00033919756333489204,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007977173081599176,
      "completions/clipped_ratio": 0.0200892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3685.0,
      "completions/mean_length": 635.4330444335938,
      "completions/mean_terminated_length": 564.4874877929688,
      "completions/min_length": 116.0,
      "completions/min_terminated_length": 116.0,
      "epoch": 0.9518810148731408,
      "grad_norm": 0.1328125,
      "learning_rate": 1e-06,
      "loss": 0.007,
      "num_tokens": 59197239.0,
      "reward": 0.504464328289032,
      "reward_std": 0.23217298090457916,
      "rewards/verify_math_reward/mean": 0.5044642686843872,
      "rewards/verify_math_reward/std": 0.5002593398094177,
      "step": 102
    },
    {
      "clip_ratio/high_max": 0.0013767386080871802,
      "clip_ratio/high_mean": 0.00037214821895759087,
      "clip_ratio/low_mean": 0.0003223466443387224,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006944948577256582,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3788.0,
      "completions/mean_length": 686.8438110351562,
      "completions/mean_terminated_length": 624.8590698242188,
      "completions/min_length": 124.0,
      "completions/min_terminated_length": 124.0,
      "epoch": 0.9612131816856226,
      "grad_norm": 0.1220703125,
      "learning_rate": 1e-06,
      "loss": -0.0013,
      "num_tokens": 59834579.0,
      "reward": 0.494419664144516,
      "reward_std": 0.22564129531383514,
      "rewards/verify_math_reward/mean": 0.4944196343421936,
      "rewards/verify_math_reward/std": 0.5002480745315552,
      "step": 103
    },
    {
      "clip_ratio/high_max": 0.0015710645147919422,
      "clip_ratio/high_mean": 0.00045990343096491415,
      "clip_ratio/low_mean": 0.00040235662572740694,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000862260055328079,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2159.0,
      "completions/mean_length": 611.8058471679688,
      "completions/mean_terminated_length": 544.4208984375,
      "completions/min_length": 86.0,
      "completions/min_terminated_length": 86.0,
      "epoch": 0.9705453484981044,
      "grad_norm": 0.1416015625,
      "learning_rate": 1e-06,
      "loss": -0.0011,
      "num_tokens": 60406069.0,
      "reward": 0.5446428656578064,
      "reward_std": 0.23514607548713684,
      "rewards/verify_math_reward/mean": 0.5446428656578064,
      "rewards/verify_math_reward/std": 0.49828118085861206,
      "step": 104
    },
    {
      "clip_ratio/high_max": 0.0015253851961460896,
      "clip_ratio/high_mean": 0.0004695453960721352,
      "clip_ratio/low_mean": 0.0003123652346630479,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007819106272108911,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4033.0,
      "completions/mean_length": 578.4296875,
      "completions/mean_terminated_length": 518.5391845703125,
      "completions/min_length": 106.0,
      "completions/min_terminated_length": 106.0,
      "epoch": 0.9798775153105862,
      "grad_norm": 0.146484375,
      "learning_rate": 1e-06,
      "loss": -0.0096,
      "num_tokens": 60950798.0,
      "reward": 0.5100446939468384,
      "reward_std": 0.23822499811649323,
      "rewards/verify_math_reward/mean": 0.5100446343421936,
      "rewards/verify_math_reward/std": 0.5001782774925232,
      "step": 105
    },
    {
      "clip_ratio/high_max": 0.0015600823371642036,
      "clip_ratio/high_mean": 0.00041957787686897063,
      "clip_ratio/low_mean": 0.00031790625700978126,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000737484128876531,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3423.0,
      "completions/mean_length": 594.1082763671875,
      "completions/mean_terminated_length": 550.5819091796875,
      "completions/min_length": 105.0,
      "completions/min_terminated_length": 105.0,
      "epoch": 0.9892096821230679,
      "grad_norm": 0.126953125,
      "learning_rate": 1e-06,
      "loss": 0.0023,
      "num_tokens": 61520255.0,
      "reward": 0.5145089626312256,
      "reward_std": 0.2048247903585434,
      "rewards/verify_math_reward/mean": 0.5145089030265808,
      "rewards/verify_math_reward/std": 0.5000685453414917,
      "step": 106
    },
    {
      "clip_ratio/high_max": 0.0016536110906599788,
      "clip_ratio/high_mean": 0.00047350559771075496,
      "clip_ratio/low_mean": 0.0002858078601093439,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007593134505441412,
      "completions/clipped_ratio": 0.017045454545454586,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2737.0,
      "completions/mean_length": 574.446044921875,
      "completions/mean_terminated_length": 513.3786010742188,
      "completions/min_length": 17.0,
      "completions/min_terminated_length": 17.0,
      "epoch": 0.9985418489355498,
      "grad_norm": 0.1416015625,
      "learning_rate": 1e-06,
      "loss": 0.0073,
      "num_tokens": 62094613.0,
      "reward": 0.5245535969734192,
      "reward_std": 0.24536897242069244,
      "rewards/verify_math_reward/mean": 0.5245535969734192,
      "rewards/verify_math_reward/std": 0.4996756911277771,
      "step": 107
    },
    {
      "clip_ratio/high_max": 0.001530498701868055,
      "clip_ratio/high_mean": 0.00045928891904623015,
      "clip_ratio/low_mean": 0.00035026266925797245,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008095516059256624,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4052.0,
      "completions/mean_length": 585.390625,
      "completions/mean_terminated_length": 553.7635498046875,
      "completions/min_length": 26.0,
      "completions/min_terminated_length": 26.0,
      "epoch": 1.0093321668124817,
      "grad_norm": 0.1318359375,
      "learning_rate": 1e-06,
      "loss": -0.0047,
      "num_tokens": 62676779.0,
      "reward": 0.5345982313156128,
      "reward_std": 0.2416074424982071,
      "rewards/verify_math_reward/mean": 0.5345982313156128,
      "rewards/verify_math_reward/std": 0.4990801215171814,
      "step": 108
    },
    {
      "clip_ratio/high_max": 0.0019083833612967283,
      "clip_ratio/high_mean": 0.0006770117274754739,
      "clip_ratio/low_mean": 0.00033359393023602024,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001010605651572405,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4018.0,
      "completions/mean_length": 589.703125,
      "completions/mean_terminated_length": 546.1220703125,
      "completions/min_length": 34.0,
      "completions/min_terminated_length": 34.0,
      "epoch": 1.0186643336249634,
      "grad_norm": 0.154296875,
      "learning_rate": 1e-06,
      "loss": 0.0188,
      "num_tokens": 63256545.0,
      "reward": 0.5558035969734192,
      "reward_std": 0.2604754865169525,
      "rewards/verify_math_reward/mean": 0.5558035969734192,
      "rewards/verify_math_reward/std": 0.49715372920036316,
      "step": 109
    },
    {
      "clip_ratio/high_max": 0.0015690998388890876,
      "clip_ratio/high_mean": 0.0004443408483894018,
      "clip_ratio/low_mean": 0.0003363973024761435,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000780738146204385,
      "completions/clipped_ratio": 0.021205357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3858.0,
      "completions/mean_length": 654.1785888671875,
      "completions/mean_terminated_length": 579.6123046875,
      "completions/min_length": 118.0,
      "completions/min_terminated_length": 118.0,
      "epoch": 1.0279965004374454,
      "grad_norm": 0.130859375,
      "learning_rate": 1e-06,
      "loss": 0.0081,
      "num_tokens": 63857865.0,
      "reward": 0.4988839626312256,
      "reward_std": 0.2183186411857605,
      "rewards/verify_math_reward/mean": 0.4988839328289032,
      "rewards/verify_math_reward/std": 0.5002779960632324,
      "step": 110
    },
    {
      "clip_ratio/high_max": 0.0015895372371232952,
      "clip_ratio/high_mean": 0.0004398960209073266,
      "clip_ratio/low_mean": 0.0003045257033136295,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000744421729223177,
      "completions/clipped_ratio": 0.0234375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2673.0,
      "completions/mean_length": 637.169677734375,
      "completions/mean_terminated_length": 554.15771484375,
      "completions/min_length": 84.0,
      "completions/min_terminated_length": 84.0,
      "epoch": 1.0373286672499271,
      "grad_norm": 0.1494140625,
      "learning_rate": 1e-06,
      "loss": -0.0036,
      "num_tokens": 64447129.0,
      "reward": 0.455357164144516,
      "reward_std": 0.23278027772903442,
      "rewards/verify_math_reward/mean": 0.4553571343421936,
      "rewards/verify_math_reward/std": 0.49828118085861206,
      "step": 111
    },
    {
      "clip_ratio/high_max": 0.001427862858690787,
      "clip_ratio/high_mean": 0.0004775260467795306,
      "clip_ratio/low_mean": 0.0003213918915889735,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007989179357537068,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3104.0,
      "completions/mean_length": 585.0949096679688,
      "completions/mean_terminated_length": 533.4053955078125,
      "completions/min_length": 94.0,
      "completions/min_terminated_length": 94.0,
      "epoch": 1.0466608340624088,
      "grad_norm": 0.1357421875,
      "learning_rate": 1e-06,
      "loss": -0.006,
      "num_tokens": 65012702.0,
      "reward": 0.5558035969734192,
      "reward_std": 0.24092638492584229,
      "rewards/verify_math_reward/mean": 0.5558035969734192,
      "rewards/verify_math_reward/std": 0.49715372920036316,
      "step": 112
    },
    {
      "clip_ratio/high_max": 0.00150731401390658,
      "clip_ratio/high_mean": 0.0004276625006696122,
      "clip_ratio/low_mean": 0.0003573052471210758,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007849677404010436,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3862.0,
      "completions/mean_length": 602.1049194335938,
      "completions/mean_terminated_length": 558.677978515625,
      "completions/min_length": 154.0,
      "completions/min_terminated_length": 154.0,
      "epoch": 1.0559930008748906,
      "grad_norm": 0.130859375,
      "learning_rate": 1e-06,
      "loss": -0.0044,
      "num_tokens": 65597764.0,
      "reward": 0.5267857313156128,
      "reward_std": 0.22913458943367004,
      "rewards/verify_math_reward/mean": 0.5267857313156128,
      "rewards/verify_math_reward/std": 0.4995608627796173,
      "step": 113
    },
    {
      "clip_ratio/high_max": 0.0013777668355032802,
      "clip_ratio/high_mean": 0.00040216286163285986,
      "clip_ratio/low_mean": 0.00028282253288125503,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006849853864423494,
      "completions/clipped_ratio": 0.0200892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2109.0,
      "completions/mean_length": 622.2020263671875,
      "completions/mean_terminated_length": 550.9852294921875,
      "completions/min_length": 100.0,
      "completions/min_terminated_length": 100.0,
      "epoch": 1.0653251676873725,
      "grad_norm": 0.1298828125,
      "learning_rate": 1e-06,
      "loss": 0.0007,
      "num_tokens": 66192977.0,
      "reward": 0.4720982313156128,
      "reward_std": 0.20162531733512878,
      "rewards/verify_math_reward/mean": 0.4720982015132904,
      "rewards/verify_math_reward/std": 0.49949967861175537,
      "step": 114
    },
    {
      "clip_ratio/high_max": 0.0019017514350707643,
      "clip_ratio/high_mean": 0.0005487539110617945,
      "clip_ratio/low_mean": 0.0002849090384415831,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008336629598488798,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3430.0,
      "completions/mean_length": 602.4140625,
      "completions/mean_terminated_length": 550.9796142578125,
      "completions/min_length": 87.0,
      "completions/min_terminated_length": 87.0,
      "epoch": 1.0746573344998542,
      "grad_norm": 0.123046875,
      "learning_rate": 1e-06,
      "loss": 0.0025,
      "num_tokens": 66767404.0,
      "reward": 0.5256696939468384,
      "reward_std": 0.18926939368247986,
      "rewards/verify_math_reward/mean": 0.5256696343421936,
      "rewards/verify_math_reward/std": 0.4996195137500763,
      "step": 115
    },
    {
      "clip_ratio/high_max": 0.0016422621683886973,
      "clip_ratio/high_mean": 0.0005316739079717081,
      "clip_ratio/low_mean": 0.0003052589680692108,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008369328761546058,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3778.0,
      "completions/mean_length": 596.8504638671875,
      "completions/mean_terminated_length": 553.3582153320312,
      "completions/min_length": 126.0,
      "completions/min_terminated_length": 126.0,
      "epoch": 1.083989501312336,
      "grad_norm": 0.130859375,
      "learning_rate": 1e-06,
      "loss": -0.0088,
      "num_tokens": 67344966.0,
      "reward": 0.5814732313156128,
      "reward_std": 0.2368360310792923,
      "rewards/verify_math_reward/mean": 0.5814732313156128,
      "rewards/verify_math_reward/std": 0.4935929775238037,
      "step": 116
    },
    {
      "clip_ratio/high_max": 0.0015750610418763245,
      "clip_ratio/high_mean": 0.00046872477969372994,
      "clip_ratio/low_mean": 0.00023229588077811059,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000701020672750019,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2539.0,
      "completions/mean_length": 581.203125,
      "completions/mean_terminated_length": 537.5164184570312,
      "completions/min_length": 120.0,
      "completions/min_terminated_length": 120.0,
      "epoch": 1.0933216681248177,
      "grad_norm": 0.1328125,
      "learning_rate": 1e-06,
      "loss": 0.0101,
      "num_tokens": 67905788.0,
      "reward": 0.520089328289032,
      "reward_std": 0.23138132691383362,
      "rewards/verify_math_reward/mean": 0.5200892686843872,
      "rewards/verify_math_reward/std": 0.4998753070831299,
      "step": 117
    },
    {
      "clip_ratio/high_max": 0.0017898867627081927,
      "clip_ratio/high_mean": 0.0006009550015733112,
      "clip_ratio/low_mean": 0.0003076823650189908,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000908637357497355,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3980.0,
      "completions/mean_length": 606.8638916015625,
      "completions/mean_terminated_length": 551.480712890625,
      "completions/min_length": 22.0,
      "completions/min_terminated_length": 22.0,
      "epoch": 1.1026538349372994,
      "grad_norm": 0.1435546875,
      "learning_rate": 1e-06,
      "loss": -0.0089,
      "num_tokens": 68466546.0,
      "reward": 0.5602678656578064,
      "reward_std": 0.2578386664390564,
      "rewards/verify_math_reward/mean": 0.5602678656578064,
      "rewards/verify_math_reward/std": 0.4966317415237427,
      "step": 118
    },
    {
      "clip_ratio/high_max": 0.0014992362812336069,
      "clip_ratio/high_mean": 0.00047664243606959644,
      "clip_ratio/low_mean": 0.0003313528115995723,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008079952503976529,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2877.0,
      "completions/mean_length": 570.1998291015625,
      "completions/mean_terminated_length": 514.2346801757812,
      "completions/min_length": 11.0,
      "completions/min_terminated_length": 11.0,
      "epoch": 1.1119860017497813,
      "grad_norm": 0.1474609375,
      "learning_rate": 1e-06,
      "loss": -0.001,
      "num_tokens": 68993725.0,
      "reward": 0.590401828289032,
      "reward_std": 0.23255206644535065,
      "rewards/verify_math_reward/mean": 0.5904017686843872,
      "rewards/verify_math_reward/std": 0.49203425645828247,
      "step": 119
    },
    {
      "clip_ratio/high_max": 0.0017286572456214344,
      "clip_ratio/high_mean": 0.0005849628532814677,
      "clip_ratio/low_mean": 0.00034277511713298736,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009277379631384974,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3482.0,
      "completions/mean_length": 610.125,
      "completions/mean_terminated_length": 570.7810668945312,
      "completions/min_length": 22.0,
      "completions/min_terminated_length": 22.0,
      "epoch": 1.121318168562263,
      "grad_norm": 0.1416015625,
      "learning_rate": 1e-06,
      "loss": 0.0049,
      "num_tokens": 69596077.0,
      "reward": 0.546875,
      "reward_std": 0.2594548761844635,
      "rewards/verify_math_reward/mean": 0.546875,
      "rewards/verify_math_reward/std": 0.4980759024620056,
      "step": 120
    },
    {
      "clip_ratio/high_max": 0.0015606451106577879,
      "clip_ratio/high_mean": 0.0004644531281883246,
      "clip_ratio/low_mean": 0.0003220886408143997,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000786541769230098,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3428.0,
      "completions/mean_length": 618.7846069335938,
      "completions/mean_terminated_length": 563.5906982421875,
      "completions/min_length": 4.0,
      "completions/min_terminated_length": 4.0,
      "epoch": 1.1306503353747448,
      "grad_norm": 0.12890625,
      "learning_rate": 1e-06,
      "loss": -0.0022,
      "num_tokens": 70190364.0,
      "reward": 0.5066964626312256,
      "reward_std": 0.20136386156082153,
      "rewards/verify_math_reward/mean": 0.5066964030265808,
      "rewards/verify_math_reward/std": 0.5002344250679016,
      "step": 121
    },
    {
      "clip_ratio/high_max": 0.0019570581998777925,
      "clip_ratio/high_mean": 0.0006037022139935289,
      "clip_ratio/low_mean": 0.00031340639498012024,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009171086157948594,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3824.0,
      "completions/mean_length": 610.0,
      "completions/mean_terminated_length": 546.6181640625,
      "completions/min_length": 43.0,
      "completions/min_terminated_length": 43.0,
      "epoch": 1.1399825021872265,
      "grad_norm": 0.1435546875,
      "learning_rate": 1e-06,
      "loss": 0.0002,
      "num_tokens": 70749956.0,
      "reward": 0.6071428656578064,
      "reward_std": 0.25292134284973145,
      "rewards/verify_math_reward/mean": 0.6071428656578064,
      "rewards/verify_math_reward/std": 0.48865827918052673,
      "step": 122
    },
    {
      "clip_ratio/high_max": 0.001493587864388246,
      "clip_ratio/high_mean": 0.0004782370549492043,
      "clip_ratio/low_mean": 0.0003309355058718211,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008091725567282992,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3851.0,
      "completions/mean_length": 619.6027221679688,
      "completions/mean_terminated_length": 580.36572265625,
      "completions/min_length": 9.0,
      "completions/min_terminated_length": 9.0,
      "epoch": 1.1493146689997085,
      "grad_norm": 0.13671875,
      "learning_rate": 1e-06,
      "loss": -0.0024,
      "num_tokens": 71354248.0,
      "reward": 0.5569196939468384,
      "reward_std": 0.2237972617149353,
      "rewards/verify_math_reward/mean": 0.5569196343421936,
      "rewards/verify_math_reward/std": 0.4970270097255707,
      "step": 123
    },
    {
      "clip_ratio/high_max": 0.0016810951947263675,
      "clip_ratio/high_mean": 0.0005409228716644066,
      "clip_ratio/low_mean": 0.00027879514937012573,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008197180222850875,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3330.0,
      "completions/mean_length": 607.2957763671875,
      "completions/mean_terminated_length": 555.9331665039062,
      "completions/min_length": 83.0,
      "completions/min_terminated_length": 83.0,
      "epoch": 1.1586468358121902,
      "grad_norm": 0.1474609375,
      "learning_rate": 1e-06,
      "loss": 0.0064,
      "num_tokens": 71934649.0,
      "reward": 0.5680803656578064,
      "reward_std": 0.22320246696472168,
      "rewards/verify_math_reward/mean": 0.5680803656578064,
      "rewards/verify_math_reward/std": 0.4956200420856476,
      "step": 124
    },
    {
      "clip_ratio/high_max": 0.001345407143162447,
      "clip_ratio/high_mean": 0.0003656016708646348,
      "clip_ratio/low_mean": 0.00030714410536347714,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006727457744091225,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4025.0,
      "completions/mean_length": 653.8605346679688,
      "completions/mean_terminated_length": 587.2889404296875,
      "completions/min_length": 106.0,
      "completions/min_terminated_length": 106.0,
      "epoch": 1.167979002624672,
      "grad_norm": 0.12255859375,
      "learning_rate": 1e-06,
      "loss": 0.0041,
      "num_tokens": 72541220.0,
      "reward": 0.4687500298023224,
      "reward_std": 0.1849105805158615,
      "rewards/verify_math_reward/mean": 0.46875,
      "rewards/verify_math_reward/std": 0.4993011951446533,
      "step": 125
    },
    {
      "clip_ratio/high_max": 0.0015350585535998107,
      "clip_ratio/high_mean": 0.00047525672493975435,
      "clip_ratio/low_mean": 0.0002555170239020299,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007307737505470868,
      "completions/clipped_ratio": 0.004464285714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1873.0,
      "completions/mean_length": 559.700927734375,
      "completions/mean_terminated_length": 543.8430786132812,
      "completions/min_length": 124.0,
      "completions/min_terminated_length": 124.0,
      "epoch": 1.1773111694371536,
      "grad_norm": 0.11962890625,
      "learning_rate": 1e-06,
      "loss": -0.0021,
      "num_tokens": 73114808.0,
      "reward": 0.5837053656578064,
      "reward_std": 0.2014077603816986,
      "rewards/verify_math_reward/mean": 0.5837053656578064,
      "rewards/verify_math_reward/std": 0.49321895837783813,
      "step": 126
    },
    {
      "clip_ratio/high_max": 0.0018995921382156666,
      "clip_ratio/high_mean": 0.0005328663756927199,
      "clip_ratio/low_mean": 0.000282887822208977,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008157542024491704,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3492.0,
      "completions/mean_length": 575.5703125,
      "completions/mean_terminated_length": 543.854736328125,
      "completions/min_length": 102.0,
      "completions/min_terminated_length": 102.0,
      "epoch": 1.1866433362496354,
      "grad_norm": 0.1298828125,
      "learning_rate": 1e-06,
      "loss": 0.0165,
      "num_tokens": 73680351.0,
      "reward": 0.5948660969734192,
      "reward_std": 0.20970863103866577,
      "rewards/verify_math_reward/mean": 0.5948660969734192,
      "rewards/verify_math_reward/std": 0.49119213223457336,
      "step": 127
    },
    {
      "clip_ratio/high_max": 0.0015654981434636284,
      "clip_ratio/high_mean": 0.00047197203639370855,
      "clip_ratio/low_mean": 0.00032296653432695166,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007949385735628312,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3145.0,
      "completions/mean_length": 630.0390625,
      "completions/mean_terminated_length": 563.0068359375,
      "completions/min_length": 12.0,
      "completions/min_terminated_length": 12.0,
      "epoch": 1.1959755030621173,
      "grad_norm": 0.1220703125,
      "learning_rate": 1e-06,
      "loss": 0.0024,
      "num_tokens": 74267154.0,
      "reward": 0.5290178656578064,
      "reward_std": 0.24348537623882294,
      "rewards/verify_math_reward/mean": 0.5290178656578064,
      "rewards/verify_math_reward/std": 0.49943605065345764,
      "step": 128
    },
    {
      "clip_ratio/high_max": 0.0017097643831220921,
      "clip_ratio/high_mean": 0.000537017465376266,
      "clip_ratio/low_mean": 0.00046191234196157893,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009989298032451188,
      "completions/clipped_ratio": 0.0200892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3274.0,
      "completions/mean_length": 629.2578125,
      "completions/mean_terminated_length": 558.1856689453125,
      "completions/min_length": 2.0,
      "completions/min_terminated_length": 2.0,
      "epoch": 1.205307669874599,
      "grad_norm": 0.1396484375,
      "learning_rate": 1e-06,
      "loss": -0.0019,
      "num_tokens": 74850329.0,
      "reward": 0.520089328289032,
      "reward_std": 0.2848559021949768,
      "rewards/verify_math_reward/mean": 0.5200892686843872,
      "rewards/verify_math_reward/std": 0.4998753070831299,
      "step": 129
    },
    {
      "clip_ratio/high_max": 0.0014128207740213838,
      "clip_ratio/high_mean": 0.00044809176881699386,
      "clip_ratio/low_mean": 0.0004100578944417066,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008581496549595613,
      "completions/clipped_ratio": 0.0234375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3684.0,
      "completions/mean_length": 674.7042846679688,
      "completions/mean_terminated_length": 592.5931396484375,
      "completions/min_length": 49.0,
      "completions/min_terminated_length": 49.0,
      "epoch": 1.2146398366870808,
      "grad_norm": 0.1259765625,
      "learning_rate": 1e-06,
      "loss": -0.0156,
      "num_tokens": 75461992.0,
      "reward": 0.463169664144516,
      "reward_std": 0.22529610991477966,
      "rewards/verify_math_reward/mean": 0.4631696343421936,
      "rewards/verify_math_reward/std": 0.49892017245292664,
      "step": 130
    },
    {
      "clip_ratio/high_max": 0.00170088264712831,
      "clip_ratio/high_mean": 0.0005455831060316996,
      "clip_ratio/low_mean": 0.00022974602575231984,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007753291392873507,
      "completions/clipped_ratio": 0.006696428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4091.0,
      "completions/mean_length": 547.3928833007812,
      "completions/mean_terminated_length": 523.4696655273438,
      "completions/min_length": 75.0,
      "completions/min_terminated_length": 75.0,
      "epoch": 1.2239720034995625,
      "grad_norm": 0.138671875,
      "learning_rate": 1e-06,
      "loss": 0.0008,
      "num_tokens": 76013032.0,
      "reward": 0.582589328289032,
      "reward_std": 0.2192518711090088,
      "rewards/verify_math_reward/mean": 0.5825892686843872,
      "rewards/verify_math_reward/std": 0.493407279253006,
      "step": 131
    },
    {
      "clip_ratio/high_max": 0.0014436859664783697,
      "clip_ratio/high_mean": 0.0004139404118177481,
      "clip_ratio/low_mean": 0.0003627363410032558,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007766767539578723,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3784.0,
      "completions/mean_length": 575.0123291015625,
      "completions/mean_terminated_length": 531.2485961914062,
      "completions/min_length": 86.0,
      "completions/min_terminated_length": 86.0,
      "epoch": 1.2333041703120444,
      "grad_norm": 0.142578125,
      "learning_rate": 1e-06,
      "loss": 0.0039,
      "num_tokens": 76568683.0,
      "reward": 0.5345982313156128,
      "reward_std": 0.21838393807411194,
      "rewards/verify_math_reward/mean": 0.5345982313156128,
      "rewards/verify_math_reward/std": 0.4990801215171814,
      "step": 132
    },
    {
      "clip_ratio/high_max": 0.0016194839154195506,
      "clip_ratio/high_mean": 0.0004929634028485452,
      "clip_ratio/low_mean": 0.0002613802015503097,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007543436026935524,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3628.0,
      "completions/mean_length": 588.1283569335938,
      "completions/mean_terminated_length": 536.4835815429688,
      "completions/min_length": 126.0,
      "completions/min_terminated_length": 126.0,
      "epoch": 1.2426363371245261,
      "grad_norm": 0.1279296875,
      "learning_rate": 1e-06,
      "loss": -0.0057,
      "num_tokens": 77133270.0,
      "reward": 0.5212053656578064,
      "reward_std": 0.20613820850849152,
      "rewards/verify_math_reward/mean": 0.5212053656578064,
      "rewards/verify_math_reward/std": 0.49982914328575134,
      "step": 133
    },
    {
      "clip_ratio/high_max": 0.0018116976552846609,
      "clip_ratio/high_mean": 0.0005805711366519972,
      "clip_ratio/low_mean": 0.00030581744613300543,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008863885868777288,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2496.0,
      "completions/mean_length": 585.453125,
      "completions/mean_terminated_length": 537.7986450195312,
      "completions/min_length": 87.0,
      "completions/min_terminated_length": 87.0,
      "epoch": 1.2519685039370079,
      "grad_norm": 0.1318359375,
      "learning_rate": 1e-06,
      "loss": 0.0049,
      "num_tokens": 77689948.0,
      "reward": 0.5792410969734192,
      "reward_std": 0.23405978083610535,
      "rewards/verify_math_reward/mean": 0.5792410969734192,
      "rewards/verify_math_reward/std": 0.49395665526390076,
      "step": 134
    },
    {
      "clip_ratio/high_max": 0.0019365234938959475,
      "clip_ratio/high_mean": 0.0006199660264201157,
      "clip_ratio/low_mean": 0.00035826957866902376,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009782355946299504,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3678.0,
      "completions/mean_length": 568.747802734375,
      "completions/mean_terminated_length": 500.5301208496094,
      "completions/min_length": 6.0,
      "completions/min_terminated_length": 6.0,
      "epoch": 1.2613006707494896,
      "grad_norm": 0.162109375,
      "learning_rate": 1e-06,
      "loss": -0.0084,
      "num_tokens": 78213930.0,
      "reward": 0.6049107313156128,
      "reward_std": 0.2616020143032074,
      "rewards/verify_math_reward/mean": 0.6049107313156128,
      "rewards/verify_math_reward/std": 0.48914292454719543,
      "step": 135
    },
    {
      "clip_ratio/high_max": 0.0016102466006486793,
      "clip_ratio/high_mean": 0.00047373598295052943,
      "clip_ratio/low_mean": 0.00022196987777078903,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006957058722036891,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3942.0,
      "completions/mean_length": 569.9006958007812,
      "completions/mean_terminated_length": 526.073486328125,
      "completions/min_length": 123.0,
      "completions/min_terminated_length": 123.0,
      "epoch": 1.2706328375619713,
      "grad_norm": 0.12890625,
      "learning_rate": 1e-06,
      "loss": 0.0049,
      "num_tokens": 78771105.0,
      "reward": 0.546875,
      "reward_std": 0.1892266422510147,
      "rewards/verify_math_reward/mean": 0.546875,
      "rewards/verify_math_reward/std": 0.4980759024620056,
      "step": 136
    },
    {
      "clip_ratio/high_max": 0.0017010311821650248,
      "clip_ratio/high_mean": 0.0004654307119835721,
      "clip_ratio/low_mean": 0.00017142242313639144,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006368531379621345,
      "completions/clipped_ratio": 0.021205357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3878.0,
      "completions/mean_length": 637.5770263671875,
      "completions/mean_terminated_length": 562.6510620117188,
      "completions/min_length": 111.0,
      "completions/min_terminated_length": 111.0,
      "epoch": 1.2799650043744533,
      "grad_norm": 0.125,
      "learning_rate": 1e-06,
      "loss": -0.0038,
      "num_tokens": 79350774.0,
      "reward": 0.559151828289032,
      "reward_std": 0.19730672240257263,
      "rewards/verify_math_reward/mean": 0.5591517686843872,
      "rewards/verify_math_reward/std": 0.496766060590744,
      "step": 137
    },
    {
      "clip_ratio/high_max": 0.001485254843828443,
      "clip_ratio/high_mean": 0.0004001565591806866,
      "clip_ratio/low_mean": 0.00037924269122413534,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007793992515416903,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3942.0,
      "completions/mean_length": 625.6027221679688,
      "completions/mean_terminated_length": 562.5045166015625,
      "completions/min_length": 115.0,
      "completions/min_terminated_length": 115.0,
      "epoch": 1.289297171186935,
      "grad_norm": 0.12890625,
      "learning_rate": 1e-06,
      "loss": -0.0051,
      "num_tokens": 79938090.0,
      "reward": 0.5401785969734192,
      "reward_std": 0.21222344040870667,
      "rewards/verify_math_reward/mean": 0.5401785969734192,
      "rewards/verify_math_reward/std": 0.49866142868995667,
      "step": 138
    },
    {
      "clip_ratio/high_max": 0.001471151117584668,
      "clip_ratio/high_mean": 0.0004658673462927254,
      "clip_ratio/low_mean": 0.0003632058278526529,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008290731775559834,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3431.0,
      "completions/mean_length": 619.0535888671875,
      "completions/mean_terminated_length": 571.855224609375,
      "completions/min_length": 4.0,
      "completions/min_terminated_length": 4.0,
      "epoch": 1.2986293379994167,
      "grad_norm": 0.146484375,
      "learning_rate": 1e-06,
      "loss": 0.0004,
      "num_tokens": 80539394.0,
      "reward": 0.4676339626312256,
      "reward_std": 0.24558056890964508,
      "rewards/verify_math_reward/mean": 0.4676339328289032,
      "rewards/verify_math_reward/std": 0.4992299973964691,
      "step": 139
    },
    {
      "clip_ratio/high_max": 0.0013791911542284652,
      "clip_ratio/high_mean": 0.00041825788878213643,
      "clip_ratio/low_mean": 0.00029218789666174416,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007104457718014601,
      "completions/clipped_ratio": 0.021205357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2577.0,
      "completions/mean_length": 654.0625,
      "completions/mean_terminated_length": 579.4937133789062,
      "completions/min_length": 129.0,
      "completions/min_terminated_length": 129.0,
      "epoch": 1.3079615048118984,
      "grad_norm": 0.12255859375,
      "learning_rate": 1e-06,
      "loss": 0.0074,
      "num_tokens": 81140666.0,
      "reward": 0.4754464626312256,
      "reward_std": 0.21421971917152405,
      "rewards/verify_math_reward/mean": 0.4754464328289032,
      "rewards/verify_math_reward/std": 0.4996756315231323,
      "step": 140
    },
    {
      "clip_ratio/high_max": 0.0015901103088253876,
      "clip_ratio/high_mean": 0.0004360975998451977,
      "clip_ratio/low_mean": 0.00036504875345144683,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008011463532966445,
      "completions/clipped_ratio": 0.0200892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3518.0,
      "completions/mean_length": 657.25,
      "completions/mean_terminated_length": 586.751708984375,
      "completions/min_length": 119.0,
      "completions/min_terminated_length": 119.0,
      "epoch": 1.3172936716243804,
      "grad_norm": 0.1376953125,
      "learning_rate": 1e-06,
      "loss": 0.0037,
      "num_tokens": 81749762.0,
      "reward": 0.4654017984867096,
      "reward_std": 0.24352674186229706,
      "rewards/verify_math_reward/mean": 0.4654017984867096,
      "rewards/verify_math_reward/std": 0.4990801215171814,
      "step": 141
    },
    {
      "clip_ratio/high_max": 0.0012775105051332503,
      "clip_ratio/high_mean": 0.00041183771418218384,
      "clip_ratio/low_mean": 0.0003315282663152175,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007433659852722485,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4037.0,
      "completions/mean_length": 606.578125,
      "completions/mean_terminated_length": 571.1724853515625,
      "completions/min_length": 4.0,
      "completions/min_terminated_length": 4.0,
      "epoch": 1.326625838436862,
      "grad_norm": 0.1259765625,
      "learning_rate": 1e-06,
      "loss": 0.0075,
      "num_tokens": 82349800.0,
      "reward": 0.504464328289032,
      "reward_std": 0.21294091641902924,
      "rewards/verify_math_reward/mean": 0.5044642686843872,
      "rewards/verify_math_reward/std": 0.5002593398094177,
      "step": 142
    },
    {
      "clip_ratio/high_max": 0.001707187309875735,
      "clip_ratio/high_mean": 0.0005364659032238706,
      "clip_ratio/low_mean": 0.0002514300375651146,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007878959459048929,
      "completions/clipped_ratio": 0.0234375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3176.0,
      "completions/mean_length": 652.2745971679688,
      "completions/mean_terminated_length": 569.6251220703125,
      "completions/min_length": 5.0,
      "completions/min_terminated_length": 5.0,
      "epoch": 1.3359580052493438,
      "grad_norm": 0.126953125,
      "learning_rate": 1e-06,
      "loss": -0.0009,
      "num_tokens": 82939790.0,
      "reward": 0.486607164144516,
      "reward_std": 0.22218145430088043,
      "rewards/verify_math_reward/mean": 0.4866071343421936,
      "rewards/verify_math_reward/std": 0.500099778175354,
      "step": 143
    },
    {
      "clip_ratio/high_max": 0.0014778714521526126,
      "clip_ratio/high_mean": 0.0004181734707344731,
      "clip_ratio/low_mean": 0.0003226648072995886,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007408382789435564,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2449.0,
      "completions/mean_length": 656.372802734375,
      "completions/mean_terminated_length": 601.7755126953125,
      "completions/min_length": 104.0,
      "completions/min_terminated_length": 104.0,
      "epoch": 1.3452901720618256,
      "grad_norm": 0.12255859375,
      "learning_rate": 1e-06,
      "loss": 0.0041,
      "num_tokens": 83566772.0,
      "reward": 0.4609375298023224,
      "reward_std": 0.2352951020002365,
      "rewards/verify_math_reward/mean": 0.4609375,
      "rewards/verify_math_reward/std": 0.4987502098083496,
      "step": 144
    },
    {
      "clip_ratio/high_max": 0.002060226055618841,
      "clip_ratio/high_mean": 0.0005598815916982858,
      "clip_ratio/low_mean": 0.0002592910132079851,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008191726058157656,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3251.0,
      "completions/mean_length": 616.4832763671875,
      "completions/mean_terminated_length": 537.042236328125,
      "completions/min_length": 89.0,
      "completions/min_terminated_length": 89.0,
      "epoch": 1.3546223388743073,
      "grad_norm": 0.1435546875,
      "learning_rate": 1e-06,
      "loss": -0.0093,
      "num_tokens": 84123877.0,
      "reward": 0.5647321939468384,
      "reward_std": 0.22852841019630432,
      "rewards/verify_math_reward/mean": 0.5647321343421936,
      "rewards/verify_math_reward/std": 0.49606895446777344,
      "step": 145
    },
    {
      "clip_ratio/high_max": 0.0016651964615448378,
      "clip_ratio/high_mean": 0.0005243159653218754,
      "clip_ratio/low_mean": 0.00037592443391076813,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009002403958220384,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4095.0,
      "completions/mean_length": 631.380615234375,
      "completions/mean_terminated_length": 572.3916015625,
      "completions/min_length": 91.0,
      "completions/min_terminated_length": 91.0,
      "epoch": 1.3639545056867892,
      "grad_norm": 0.1396484375,
      "learning_rate": 1e-06,
      "loss": 0.0021,
      "num_tokens": 84731538.0,
      "reward": 0.527901828289032,
      "reward_std": 0.23349706828594208,
      "rewards/verify_math_reward/mean": 0.5279017686843872,
      "rewards/verify_math_reward/std": 0.49949970841407776,
      "step": 146
    },
    {
      "clip_ratio/high_max": 0.001918751197081292,
      "clip_ratio/high_mean": 0.0005806725478123553,
      "clip_ratio/low_mean": 0.00036979717219765007,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009504697172815213,
      "completions/clipped_ratio": 0.0200892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3948.0,
      "completions/mean_length": 659.8660888671875,
      "completions/mean_terminated_length": 589.4214477539062,
      "completions/min_length": 71.0,
      "completions/min_terminated_length": 71.0,
      "epoch": 1.373286672499271,
      "grad_norm": 0.134765625,
      "learning_rate": 1e-06,
      "loss": -0.0037,
      "num_tokens": 85336658.0,
      "reward": 0.5089285969734192,
      "reward_std": 0.24296332895755768,
      "rewards/verify_math_reward/mean": 0.5089285969734192,
      "rewards/verify_math_reward/std": 0.5001994967460632,
      "step": 147
    },
    {
      "clip_ratio/high_max": 0.0018146254715247778,
      "clip_ratio/high_mean": 0.0006021163496825466,
      "clip_ratio/low_mean": 0.0003653205460523168,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000967436893915874,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3292.0,
      "completions/mean_length": 639.5614013671875,
      "completions/mean_terminated_length": 580.7117309570312,
      "completions/min_length": 133.0,
      "completions/min_terminated_length": 133.0,
      "epoch": 1.3826188393117527,
      "grad_norm": 0.1357421875,
      "learning_rate": 1e-06,
      "loss": 0.0015,
      "num_tokens": 85931713.0,
      "reward": 0.5412946939468384,
      "reward_std": 0.24123017489910126,
      "rewards/verify_math_reward/mean": 0.5412946343421936,
      "rewards/verify_math_reward/std": 0.49857014417648315,
      "step": 148
    },
    {
      "clip_ratio/high_max": 0.001473166490541189,
      "clip_ratio/high_mean": 0.0004518850794283935,
      "clip_ratio/low_mean": 0.000227511869979935,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006793969478167128,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4076.0,
      "completions/mean_length": 597.7131958007812,
      "completions/mean_terminated_length": 534.10791015625,
      "completions/min_length": 71.0,
      "completions/min_terminated_length": 71.0,
      "epoch": 1.3919510061242344,
      "grad_norm": 0.125,
      "learning_rate": 1e-06,
      "loss": -0.0084,
      "num_tokens": 86486352.0,
      "reward": 0.5613839626312256,
      "reward_std": 0.18708907067775726,
      "rewards/verify_math_reward/mean": 0.5613839030265808,
      "rewards/verify_math_reward/std": 0.496494859457016,
      "step": 149
    },
    {
      "clip_ratio/high_max": 0.0018019757117144763,
      "clip_ratio/high_mean": 0.0005032439592014271,
      "clip_ratio/low_mean": 0.0003022702036332703,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008055141565819213,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3827.0,
      "completions/mean_length": 600.8449096679688,
      "completions/mean_terminated_length": 545.3662109375,
      "completions/min_length": 7.0,
      "completions/min_terminated_length": 7.0,
      "epoch": 1.4012831729367163,
      "grad_norm": 0.123046875,
      "learning_rate": 1e-06,
      "loss": -0.0118,
      "num_tokens": 87063621.0,
      "reward": 0.578125,
      "reward_std": 0.19276241958141327,
      "rewards/verify_math_reward/mean": 0.578125,
      "rewards/verify_math_reward/std": 0.4941346049308777,
      "step": 150
    },
    {
      "clip_ratio/high_max": 0.001472546028708166,
      "clip_ratio/high_mean": 0.00042694759076766786,
      "clip_ratio/low_mean": 0.00031916322961933474,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007461108234565472,
      "completions/clipped_ratio": 0.021205357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3750.0,
      "completions/mean_length": 638.6652221679688,
      "completions/mean_terminated_length": 563.7628173828125,
      "completions/min_length": 61.0,
      "completions/min_terminated_length": 61.0,
      "epoch": 1.410615339749198,
      "grad_norm": 0.130859375,
      "learning_rate": 1e-06,
      "loss": 0.009,
      "num_tokens": 87655241.0,
      "reward": 0.504464328289032,
      "reward_std": 0.21534626185894012,
      "rewards/verify_math_reward/mean": 0.5044642686843872,
      "rewards/verify_math_reward/std": 0.5002593398094177,
      "step": 151
    },
    {
      "clip_ratio/high_max": 0.0015102116994967218,
      "clip_ratio/high_mean": 0.00044700323132929043,
      "clip_ratio/low_mean": 0.0003898030786331219,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008368063117814017,
      "completions/clipped_ratio": 0.006696428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2332.0,
      "completions/mean_length": 560.7545166015625,
      "completions/mean_terminated_length": 536.9213256835938,
      "completions/min_length": 35.0,
      "completions/min_terminated_length": 35.0,
      "epoch": 1.4199475065616798,
      "grad_norm": 0.1357421875,
      "learning_rate": 1e-06,
      "loss": 0.0186,
      "num_tokens": 88217581.0,
      "reward": 0.59375,
      "reward_std": 0.23653365671634674,
      "rewards/verify_math_reward/mean": 0.59375,
      "rewards/verify_math_reward/std": 0.4914066195487976,
      "step": 152
    },
    {
      "clip_ratio/high_max": 0.001383383129905269,
      "clip_ratio/high_mean": 0.000401061109641887,
      "clip_ratio/low_mean": 0.00030080922033448587,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007018703408903093,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2550.0,
      "completions/mean_length": 592.9029541015625,
      "completions/mean_terminated_length": 533.2588500976562,
      "completions/min_length": 92.0,
      "completions/min_terminated_length": 92.0,
      "epoch": 1.4292796733741615,
      "grad_norm": 0.126953125,
      "learning_rate": 1e-06,
      "loss": 0.0074,
      "num_tokens": 88776606.0,
      "reward": 0.5301339626312256,
      "reward_std": 0.19730813801288605,
      "rewards/verify_math_reward/mean": 0.5301339030265808,
      "rewards/verify_math_reward/std": 0.49936985969543457,
      "step": 153
    },
    {
      "clip_ratio/high_max": 0.0017530759469082113,
      "clip_ratio/high_mean": 0.0005233436927483126,
      "clip_ratio/low_mean": 0.0002847095665856614,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008080532716121525,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1682.0,
      "completions/mean_length": 575.3136596679688,
      "completions/mean_terminated_length": 539.5907592773438,
      "completions/min_length": 43.0,
      "completions/min_terminated_length": 43.0,
      "epoch": 1.4386118401866432,
      "grad_norm": 0.1318359375,
      "learning_rate": 1e-06,
      "loss": 0.011,
      "num_tokens": 89337751.0,
      "reward": 0.5602678656578064,
      "reward_std": 0.2180151343345642,
      "rewards/verify_math_reward/mean": 0.5602678656578064,
      "rewards/verify_math_reward/std": 0.4966317415237427,
      "step": 154
    },
    {
      "clip_ratio/high_max": 0.0014256071099225665,
      "clip_ratio/high_mean": 0.0005033377417476004,
      "clip_ratio/low_mean": 0.00025389279960563726,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007572305385110667,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2283.0,
      "completions/mean_length": 578.6049194335938,
      "completions/mean_terminated_length": 514.6522827148438,
      "completions/min_length": 95.0,
      "completions/min_terminated_length": 95.0,
      "epoch": 1.4479440069991252,
      "grad_norm": 0.130859375,
      "learning_rate": 1e-06,
      "loss": 0.0092,
      "num_tokens": 89874973.0,
      "reward": 0.5915178656578064,
      "reward_std": 0.2219550609588623,
      "rewards/verify_math_reward/mean": 0.5915178656578064,
      "rewards/verify_math_reward/std": 0.49182769656181335,
      "step": 155
    },
    {
      "clip_ratio/high_max": 0.0016124090470839292,
      "clip_ratio/high_mean": 0.00046968579681561096,
      "clip_ratio/low_mean": 0.0003855286004181835,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008552144081477309,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2753.0,
      "completions/mean_length": 560.0335083007812,
      "completions/mean_terminated_length": 528.1779174804688,
      "completions/min_length": 29.0,
      "completions/min_terminated_length": 29.0,
      "epoch": 1.457276173811607,
      "grad_norm": 0.1279296875,
      "learning_rate": 1e-06,
      "loss": 0.0048,
      "num_tokens": 90433299.0,
      "reward": 0.5602678656578064,
      "reward_std": 0.22064054012298584,
      "rewards/verify_math_reward/mean": 0.5602678656578064,
      "rewards/verify_math_reward/std": 0.4966317415237427,
      "step": 156
    },
    {
      "clip_ratio/high_max": 0.001310938184360566,
      "clip_ratio/high_mean": 0.0003792753468587762,
      "clip_ratio/low_mean": 0.00029047973248452763,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006697550852550194,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3763.0,
      "completions/mean_length": 630.8013916015625,
      "completions/mean_terminated_length": 595.6414794921875,
      "completions/min_length": 46.0,
      "completions/min_terminated_length": 46.0,
      "epoch": 1.4666083406240886,
      "grad_norm": 0.115234375,
      "learning_rate": 1e-06,
      "loss": -0.01,
      "num_tokens": 91053137.0,
      "reward": 0.5145089626312256,
      "reward_std": 0.2079797238111496,
      "rewards/verify_math_reward/mean": 0.5145089030265808,
      "rewards/verify_math_reward/std": 0.5000685453414917,
      "step": 157
    },
    {
      "clip_ratio/high_max": 0.001303116920098546,
      "clip_ratio/high_mean": 0.00037509429557758267,
      "clip_ratio/low_mean": 0.00026581576435091847,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006409100451492122,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3264.0,
      "completions/mean_length": 611.3939819335938,
      "completions/mean_terminated_length": 572.0643310546875,
      "completions/min_length": 135.0,
      "completions/min_terminated_length": 135.0,
      "epoch": 1.4759405074365703,
      "grad_norm": 0.12890625,
      "learning_rate": 1e-06,
      "loss": 0.005,
      "num_tokens": 91650842.0,
      "reward": 0.543526828289032,
      "reward_std": 0.20298220217227936,
      "rewards/verify_math_reward/mean": 0.5435267686843872,
      "rewards/verify_math_reward/std": 0.49838000535964966,
      "step": 158
    },
    {
      "clip_ratio/high_max": 0.0016756560808062204,
      "clip_ratio/high_mean": 0.0005686681888619205,
      "clip_ratio/low_mean": 0.0002220531608827514,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007907213466751273,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2831.0,
      "completions/mean_length": 665.8783569335938,
      "completions/mean_terminated_length": 587.5650634765625,
      "completions/min_length": 19.0,
      "completions/min_terminated_length": 19.0,
      "epoch": 1.4852726742490523,
      "grad_norm": 0.1220703125,
      "learning_rate": 1e-06,
      "loss": -0.0064,
      "num_tokens": 92244093.0,
      "reward": 0.5424107313156128,
      "reward_std": 0.2174845188856125,
      "rewards/verify_math_reward/mean": 0.5424107313156128,
      "rewards/verify_math_reward/std": 0.4984763562679291,
      "step": 159
    },
    {
      "clip_ratio/high_max": 0.0015577739741274854,
      "clip_ratio/high_mean": 0.00045788256193191046,
      "clip_ratio/low_mean": 0.0003860801249402357,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008439626881227014,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3892.0,
      "completions/mean_length": 618.5301513671875,
      "completions/mean_terminated_length": 567.3329467773438,
      "completions/min_length": 104.0,
      "completions/min_terminated_length": 104.0,
      "epoch": 1.494604841061534,
      "grad_norm": 0.130859375,
      "learning_rate": 1e-06,
      "loss": 0.0106,
      "num_tokens": 92835152.0,
      "reward": 0.5725446939468384,
      "reward_std": 0.23759308457374573,
      "rewards/verify_math_reward/mean": 0.5725446343421936,
      "rewards/verify_math_reward/std": 0.49498558044433594,
      "step": 160
    },
    {
      "clip_ratio/high_max": 0.0016572253407503013,
      "clip_ratio/high_mean": 0.0005490923235811351,
      "clip_ratio/low_mean": 0.0002949228953639249,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008440152068942552,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3117.0,
      "completions/mean_length": 606.3694458007812,
      "completions/mean_terminated_length": 554.9931640625,
      "completions/min_length": 12.0,
      "completions/min_terminated_length": 12.0,
      "epoch": 1.5039370078740157,
      "grad_norm": 0.1396484375,
      "learning_rate": 1e-06,
      "loss": -0.0076,
      "num_tokens": 93407603.0,
      "reward": 0.5758928656578064,
      "reward_std": 0.23304016888141632,
      "rewards/verify_math_reward/mean": 0.5758928656578064,
      "rewards/verify_math_reward/std": 0.49448272585868835,
      "step": 161
    },
    {
      "clip_ratio/high_max": 0.0015902642408036627,
      "clip_ratio/high_mean": 0.0005070419258572656,
      "clip_ratio/low_mean": 0.00037056753353681415,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008776094673521584,
      "completions/clipped_ratio": 0.025669642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3024.0,
      "completions/mean_length": 660.7957763671875,
      "completions/mean_terminated_length": 570.2921142578125,
      "completions/min_length": 3.0,
      "completions/min_terminated_length": 3.0,
      "epoch": 1.5132691746864975,
      "grad_norm": 0.12890625,
      "learning_rate": 1e-06,
      "loss": -0.0035,
      "num_tokens": 94000908.0,
      "reward": 0.5055803656578064,
      "reward_std": 0.2254137545824051,
      "rewards/verify_math_reward/mean": 0.5055803656578064,
      "rewards/verify_math_reward/std": 0.5002480745315552,
      "step": 162
    },
    {
      "clip_ratio/high_max": 0.0013241914139143773,
      "clip_ratio/high_mean": 0.0003986500561268258,
      "clip_ratio/low_mean": 0.0004089949568424345,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008076450162661786,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2820.0,
      "completions/mean_length": 588.0491333007812,
      "completions/mean_terminated_length": 552.4554443359375,
      "completions/min_length": 11.0,
      "completions/min_terminated_length": 11.0,
      "epoch": 1.5226013414989792,
      "grad_norm": 0.1328125,
      "learning_rate": 1e-06,
      "loss": 0.0089,
      "num_tokens": 94592552.0,
      "reward": 0.4832589626312256,
      "reward_std": 0.25959643721580505,
      "rewards/verify_math_reward/mean": 0.4832589328289032,
      "rewards/verify_math_reward/std": 0.4999987483024597,
      "step": 163
    },
    {
      "clip_ratio/high_max": 0.0015286482203009655,
      "clip_ratio/high_mean": 0.0004840133163952487,
      "clip_ratio/low_mean": 0.0002756354216444379,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007596487298542343,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3384.0,
      "completions/mean_length": 590.3817138671875,
      "completions/mean_terminated_length": 530.6947021484375,
      "completions/min_length": 141.0,
      "completions/min_terminated_length": 141.0,
      "epoch": 1.531933508311461,
      "grad_norm": 0.123046875,
      "learning_rate": 1e-06,
      "loss": 0.0031,
      "num_tokens": 95153582.0,
      "reward": 0.5491071939468384,
      "reward_std": 0.19486860930919647,
      "rewards/verify_math_reward/mean": 0.5491071343421936,
      "rewards/verify_math_reward/std": 0.49786055088043213,
      "step": 164
    },
    {
      "clip_ratio/high_max": 0.001393048787576845,
      "clip_ratio/high_mean": 0.00045955346922710305,
      "clip_ratio/low_mean": 0.0002616896331346652,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007212431041807577,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3177.0,
      "completions/mean_length": 636.2254638671875,
      "completions/mean_terminated_length": 589.2601928710938,
      "completions/min_length": 3.0,
      "completions/min_terminated_length": 3.0,
      "epoch": 1.5412656751239429,
      "grad_norm": 0.125,
      "learning_rate": 1e-06,
      "loss": -0.0005,
      "num_tokens": 95766376.0,
      "reward": 0.515625,
      "reward_std": 0.21876581013202667,
      "rewards/verify_math_reward/mean": 0.515625,
      "rewards/verify_math_reward/std": 0.5000349283218384,
      "step": 165
    },
    {
      "clip_ratio/high_max": 0.0015330221358453855,
      "clip_ratio/high_mean": 0.0004225006006208787,
      "clip_ratio/low_mean": 0.0003513339688652195,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007738345693724114,
      "completions/clipped_ratio": 0.004464285714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3382.0,
      "completions/mean_length": 524.646240234375,
      "completions/mean_terminated_length": 508.6311950683594,
      "completions/min_length": 80.0,
      "completions/min_terminated_length": 80.0,
      "epoch": 1.5505978419364246,
      "grad_norm": 0.130859375,
      "learning_rate": 1e-06,
      "loss": 0.0108,
      "num_tokens": 96313635.0,
      "reward": 0.5390625,
      "reward_std": 0.18652385473251343,
      "rewards/verify_math_reward/mean": 0.5390625,
      "rewards/verify_math_reward/std": 0.4987502098083496,
      "step": 166
    },
    {
      "clip_ratio/high_max": 0.0018427275372232543,
      "clip_ratio/high_mean": 0.0005930055681346857,
      "clip_ratio/low_mean": 0.0003942071366509481,
      "clip_ratio/low_min": 1.1531365089467727e-05,
      "clip_ratio/region_mean": 0.0009872127111520967,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3141.0,
      "completions/mean_length": 606.59375,
      "completions/mean_terminated_length": 551.2063598632812,
      "completions/min_length": 66.0,
      "completions/min_terminated_length": 66.0,
      "epoch": 1.5599300087489065,
      "grad_norm": 0.1455078125,
      "learning_rate": 1e-06,
      "loss": -0.0067,
      "num_tokens": 96882703.0,
      "reward": 0.5424107313156128,
      "reward_std": 0.27500778436660767,
      "rewards/verify_math_reward/mean": 0.5424107313156128,
      "rewards/verify_math_reward/std": 0.4984763264656067,
      "step": 167
    },
    {
      "clip_ratio/high_max": 0.0017362831840728177,
      "clip_ratio/high_mean": 0.0005281108299186599,
      "clip_ratio/low_mean": 0.00038208843841402995,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009101992700379924,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3256.0,
      "completions/mean_length": 630.1395263671875,
      "completions/mean_terminated_length": 587.06103515625,
      "completions/min_length": 66.0,
      "completions/min_terminated_length": 66.0,
      "epoch": 1.5692621755613883,
      "grad_norm": 0.1328125,
      "learning_rate": 1e-06,
      "loss": 0.0029,
      "num_tokens": 97486596.0,
      "reward": 0.527901828289032,
      "reward_std": 0.23364794254302979,
      "rewards/verify_math_reward/mean": 0.5279017686843872,
      "rewards/verify_math_reward/std": 0.49949967861175537,
      "step": 168
    },
    {
      "clip_ratio/high_max": 0.0015097710402187658,
      "clip_ratio/high_mean": 0.00043105074610139127,
      "clip_ratio/low_mean": 0.0002880731638015277,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007191239051280718,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3691.0,
      "completions/mean_length": 589.4308471679688,
      "completions/mean_terminated_length": 553.8511352539062,
      "completions/min_length": 27.0,
      "completions/min_terminated_length": 27.0,
      "epoch": 1.57859434237387,
      "grad_norm": 0.140625,
      "learning_rate": 1e-06,
      "loss": -0.0024,
      "num_tokens": 98065662.0,
      "reward": 0.543526828289032,
      "reward_std": 0.2099350243806839,
      "rewards/verify_math_reward/mean": 0.5435267686843872,
      "rewards/verify_math_reward/std": 0.49838000535964966,
      "step": 169
    },
    {
      "clip_ratio/high_max": 0.0012964018524144194,
      "clip_ratio/high_mean": 0.0004398460700940632,
      "clip_ratio/low_mean": 0.00031049041422193113,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.00075033648590761,
      "completions/clipped_ratio": 0.006696428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4056.0,
      "completions/mean_length": 584.7199096679688,
      "completions/mean_terminated_length": 561.04833984375,
      "completions/min_length": 4.0,
      "completions/min_terminated_length": 4.0,
      "epoch": 1.5879265091863517,
      "grad_norm": 0.1220703125,
      "learning_rate": 1e-06,
      "loss": -0.0001,
      "num_tokens": 98654163.0,
      "reward": 0.559151828289032,
      "reward_std": 0.23101112246513367,
      "rewards/verify_math_reward/mean": 0.5591517686843872,
      "rewards/verify_math_reward/std": 0.496766060590744,
      "step": 170
    },
    {
      "clip_ratio/high_max": 0.0014608978817705065,
      "clip_ratio/high_mean": 0.0004272782414318499,
      "clip_ratio/low_mean": 0.00038870188473083545,
      "clip_ratio/low_min": 9.882985068543348e-06,
      "clip_ratio/region_mean": 0.0008159801154761226,
      "completions/clipped_ratio": 0.005580357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2955.0,
      "completions/mean_length": 577.6105346679688,
      "completions/mean_terminated_length": 557.866455078125,
      "completions/min_length": 63.0,
      "completions/min_terminated_length": 63.0,
      "epoch": 1.5972586759988334,
      "grad_norm": 0.138671875,
      "learning_rate": 1e-06,
      "loss": 0.0077,
      "num_tokens": 99242310.0,
      "reward": 0.5022321939468384,
      "reward_std": 0.23322536051273346,
      "rewards/verify_math_reward/mean": 0.5022321343421936,
      "rewards/verify_math_reward/std": 0.5002742409706116,
      "step": 171
    },
    {
      "clip_ratio/high_max": 0.0016138159080583137,
      "clip_ratio/high_mean": 0.0005044096062647441,
      "clip_ratio/low_mean": 0.00039532564051114605,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000899735243365285,
      "completions/clipped_ratio": 0.0078125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3262.0,
      "completions/mean_length": 613.7254638671875,
      "completions/mean_terminated_length": 586.3059692382812,
      "completions/min_length": 12.0,
      "completions/min_terminated_length": 12.0,
      "epoch": 1.6065908428113151,
      "grad_norm": 0.1328125,
      "learning_rate": 1e-06,
      "loss": 0.004,
      "num_tokens": 99855288.0,
      "reward": 0.5401785969734192,
      "reward_std": 0.27092063426971436,
      "rewards/verify_math_reward/mean": 0.5401785969734192,
      "rewards/verify_math_reward/std": 0.49866142868995667,
      "step": 172
    },
    {
      "clip_ratio/high_max": 0.0017673315305728465,
      "clip_ratio/high_mean": 0.0005731661549361888,
      "clip_ratio/low_mean": 0.0003138328314662431,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000886998975147435,
      "completions/clipped_ratio": 0.0200892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3660.0,
      "completions/mean_length": 611.2288208007812,
      "completions/mean_terminated_length": 539.7870483398438,
      "completions/min_length": 59.0,
      "completions/min_terminated_length": 59.0,
      "epoch": 1.6159230096237969,
      "grad_norm": 0.1611328125,
      "learning_rate": 1e-06,
      "loss": -0.0062,
      "num_tokens": 100409669.0,
      "reward": 0.5546875,
      "reward_std": 0.2342003732919693,
      "rewards/verify_math_reward/mean": 0.5546875,
      "rewards/verify_math_reward/std": 0.4972778558731079,
      "step": 173
    },
    {
      "clip_ratio/high_max": 0.0017514068495074753,
      "clip_ratio/high_mean": 0.0005628040185001737,
      "clip_ratio/low_mean": 0.00027705558295565424,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008398595955441124,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4009.0,
      "completions/mean_length": 611.2767944335938,
      "completions/mean_terminated_length": 543.8816528320312,
      "completions/min_length": 109.0,
      "completions/min_terminated_length": 109.0,
      "epoch": 1.6252551764362788,
      "grad_norm": 0.138671875,
      "learning_rate": 1e-06,
      "loss": 0.0078,
      "num_tokens": 100978053.0,
      "reward": 0.53125,
      "reward_std": 0.23773828148841858,
      "rewards/verify_math_reward/mean": 0.53125,
      "rewards/verify_math_reward/std": 0.4993011951446533,
      "step": 174
    },
    {
      "clip_ratio/high_max": 0.0016308020103679155,
      "clip_ratio/high_mean": 0.00046576996840030915,
      "clip_ratio/low_mean": 0.0003534856388114349,
      "clip_ratio/low_min": 1.5067502317833714e-05,
      "clip_ratio/region_mean": 0.0008192556115318439,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3960.0,
      "completions/mean_length": 636.96875,
      "completions/mean_terminated_length": 574.0772705078125,
      "completions/min_length": 140.0,
      "completions/min_terminated_length": 140.0,
      "epoch": 1.6345873432487605,
      "grad_norm": 0.1318359375,
      "learning_rate": 1e-06,
      "loss": 0.0121,
      "num_tokens": 101591609.0,
      "reward": 0.5502232313156128,
      "reward_std": 0.20711390674114227,
      "rewards/verify_math_reward/mean": 0.5502232313156128,
      "rewards/verify_math_reward/std": 0.49774909019470215,
      "step": 175
    },
    {
      "clip_ratio/high_max": 0.0016254229958576616,
      "clip_ratio/high_mean": 0.0004968349437604047,
      "clip_ratio/low_mean": 0.00037942842868687876,
      "clip_ratio/low_min": 1.242544749402441e-05,
      "clip_ratio/region_mean": 0.0008762633733567782,
      "completions/clipped_ratio": 0.0078125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3897.0,
      "completions/mean_length": 611.802490234375,
      "completions/mean_terminated_length": 584.3678588867188,
      "completions/min_length": 39.0,
      "completions/min_terminated_length": 39.0,
      "epoch": 1.6439195100612425,
      "grad_norm": 0.13671875,
      "learning_rate": 1e-06,
      "loss": 0.0039,
      "num_tokens": 102201416.0,
      "reward": 0.5502232313156128,
      "reward_std": 0.24957171082496643,
      "rewards/verify_math_reward/mean": 0.5502232313156128,
      "rewards/verify_math_reward/std": 0.49774909019470215,
      "step": 176
    },
    {
      "clip_ratio/high_max": 0.00159170483766502,
      "clip_ratio/high_mean": 0.0005176980662326969,
      "clip_ratio/low_mean": 0.0003070960899549391,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008247941505032941,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2242.0,
      "completions/mean_length": 618.4230346679688,
      "completions/mean_terminated_length": 559.2134399414062,
      "completions/min_length": 104.0,
      "completions/min_terminated_length": 104.0,
      "epoch": 1.6532516768737242,
      "grad_norm": 0.1455078125,
      "learning_rate": 1e-06,
      "loss": 0.0021,
      "num_tokens": 102784699.0,
      "reward": 0.5033482313156128,
      "reward_std": 0.22774632275104523,
      "rewards/verify_math_reward/mean": 0.5033482313156128,
      "rewards/verify_math_reward/std": 0.5002680420875549,
      "step": 177
    },
    {
      "clip_ratio/high_max": 0.0018602106811158592,
      "clip_ratio/high_mean": 0.0005304169801547687,
      "clip_ratio/low_mean": 0.00028010148514567845,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008105184706437285,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3690.0,
      "completions/mean_length": 583.4676513671875,
      "completions/mean_terminated_length": 547.8274536132812,
      "completions/min_length": 11.0,
      "completions/min_terminated_length": 11.0,
      "epoch": 1.662583843686206,
      "grad_norm": 0.1396484375,
      "learning_rate": 1e-06,
      "loss": 0.0057,
      "num_tokens": 103354078.0,
      "reward": 0.5714285969734192,
      "reward_std": 0.22187836468219757,
      "rewards/verify_math_reward/mean": 0.5714285969734192,
      "rewards/verify_math_reward/std": 0.49514806270599365,
      "step": 178
    },
    {
      "clip_ratio/high_max": 0.0012931452702105162,
      "clip_ratio/high_mean": 0.0003741790530966682,
      "clip_ratio/low_mean": 0.0003236346656194655,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006978137143960339,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1951.0,
      "completions/mean_length": 566.6975708007812,
      "completions/mean_terminated_length": 534.9020385742188,
      "completions/min_length": 5.0,
      "completions/min_terminated_length": 5.0,
      "epoch": 1.6719160104986877,
      "grad_norm": 0.126953125,
      "learning_rate": 1e-06,
      "loss": 0.0099,
      "num_tokens": 103911255.0,
      "reward": 0.5479910969734192,
      "reward_std": 0.22198784351348877,
      "rewards/verify_math_reward/mean": 0.5479910969734192,
      "rewards/verify_math_reward/std": 0.49796950817108154,
      "step": 179
    },
    {
      "clip_ratio/high_max": 0.0016382071171392454,
      "clip_ratio/high_mean": 0.000511829075321657,
      "clip_ratio/low_mean": 0.00027342408543518104,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007852531634853221,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3362.0,
      "completions/mean_length": 562.0145263671875,
      "completions/mean_terminated_length": 522.1275634765625,
      "completions/min_length": 68.0,
      "completions/min_terminated_length": 68.0,
      "epoch": 1.6812481773111694,
      "grad_norm": 0.1416015625,
      "learning_rate": 1e-06,
      "loss": 0.0069,
      "num_tokens": 104460764.0,
      "reward": 0.5758928656578064,
      "reward_std": 0.22436067461967468,
      "rewards/verify_math_reward/mean": 0.5758928656578064,
      "rewards/verify_math_reward/std": 0.49448272585868835,
      "step": 180
    },
    {
      "clip_ratio/high_max": 0.0016517209041921888,
      "clip_ratio/high_mean": 0.00045418715899359086,
      "clip_ratio/low_mean": 0.00036191883907576994,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008161060031852685,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4069.0,
      "completions/mean_length": 611.5926513671875,
      "completions/mean_terminated_length": 548.23974609375,
      "completions/min_length": 135.0,
      "completions/min_terminated_length": 135.0,
      "epoch": 1.690580344123651,
      "grad_norm": 0.1328125,
      "learning_rate": 1e-06,
      "loss": -0.0009,
      "num_tokens": 105026063.0,
      "reward": 0.5725446939468384,
      "reward_std": 0.21928434073925018,
      "rewards/verify_math_reward/mean": 0.5725446343421936,
      "rewards/verify_math_reward/std": 0.49498558044433594,
      "step": 181
    },
    {
      "clip_ratio/high_max": 0.0015860793446336174,
      "clip_ratio/high_mean": 0.0004572858007918512,
      "clip_ratio/low_mean": 0.0003717205906923482,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000829006378808117,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3033.0,
      "completions/mean_length": 659.6339721679688,
      "completions/mean_terminated_length": 605.0884399414062,
      "completions/min_length": 99.0,
      "completions/min_terminated_length": 99.0,
      "epoch": 1.6999125109361328,
      "grad_norm": 0.1318359375,
      "learning_rate": 1e-06,
      "loss": -0.0045,
      "num_tokens": 105664255.0,
      "reward": 0.4687500298023224,
      "reward_std": 0.24250829219818115,
      "rewards/verify_math_reward/mean": 0.46875,
      "rewards/verify_math_reward/std": 0.4993011951446533,
      "step": 182
    },
    {
      "clip_ratio/high_max": 0.0019195967724954244,
      "clip_ratio/high_mean": 0.0005889438366466493,
      "clip_ratio/low_mean": 0.000331454284719257,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009203981280734297,
      "completions/clipped_ratio": 0.0078125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2211.0,
      "completions/mean_length": 527.6082763671875,
      "completions/mean_terminated_length": 499.5107116699219,
      "completions/min_length": 121.0,
      "completions/min_terminated_length": 121.0,
      "epoch": 1.7092446777486148,
      "grad_norm": 0.142578125,
      "learning_rate": 1e-06,
      "loss": -0.0011,
      "num_tokens": 106204096.0,
      "reward": 0.5970982313156128,
      "reward_std": 0.23187805712223053,
      "rewards/verify_math_reward/mean": 0.5970982313156128,
      "rewards/verify_math_reward/std": 0.49075525999069214,
      "step": 183
    },
    {
      "clip_ratio/high_max": 0.0017383342383254785,
      "clip_ratio/high_mean": 0.0005435834009404061,
      "clip_ratio/low_mean": 0.0003668535971428355,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009104369983106153,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3056.0,
      "completions/mean_length": 607.7489013671875,
      "completions/mean_terminated_length": 576.3232421875,
      "completions/min_length": 156.0,
      "completions/min_terminated_length": 156.0,
      "epoch": 1.7185768445610965,
      "grad_norm": 0.12255859375,
      "learning_rate": 1e-06,
      "loss": 0.0063,
      "num_tokens": 106808719.0,
      "reward": 0.535714328289032,
      "reward_std": 0.2203042209148407,
      "rewards/verify_math_reward/mean": 0.5357142686843872,
      "rewards/verify_math_reward/std": 0.4990014135837555,
      "step": 184
    },
    {
      "clip_ratio/high_max": 0.0014295119999587769,
      "clip_ratio/high_mean": 0.0004413099918565422,
      "clip_ratio/low_mean": 0.00035032830066938914,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000791638280134066,
      "completions/clipped_ratio": 0.025669642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2822.0,
      "completions/mean_length": 650.6908569335938,
      "completions/mean_terminated_length": 559.9209594726562,
      "completions/min_length": 16.0,
      "completions/min_terminated_length": 16.0,
      "epoch": 1.7279090113735784,
      "grad_norm": 0.130859375,
      "learning_rate": 1e-06,
      "loss": 0.0033,
      "num_tokens": 107383314.0,
      "reward": 0.5145089626312256,
      "reward_std": 0.23653294146060944,
      "rewards/verify_math_reward/mean": 0.5145089030265808,
      "rewards/verify_math_reward/std": 0.5000685453414917,
      "step": 185
    },
    {
      "clip_ratio/high_max": 0.0013785191176793887,
      "clip_ratio/high_mean": 0.000440667439875142,
      "clip_ratio/low_mean": 0.0002983641954870109,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007390316295641242,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3910.0,
      "completions/mean_length": 626.3192138671875,
      "completions/mean_terminated_length": 547.1027221679688,
      "completions/min_length": 3.0,
      "completions/min_terminated_length": 3.0,
      "epoch": 1.7372411781860602,
      "grad_norm": 0.13671875,
      "learning_rate": 1e-06,
      "loss": -0.0053,
      "num_tokens": 107954424.0,
      "reward": 0.4877232313156128,
      "reward_std": 0.22071540355682373,
      "rewards/verify_math_reward/mean": 0.4877232015132904,
      "rewards/verify_math_reward/std": 0.500128448009491,
      "step": 186
    },
    {
      "clip_ratio/high_max": 0.002037179237959208,
      "clip_ratio/high_mean": 0.0006475231461990916,
      "clip_ratio/low_mean": 0.00035991261415802,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010074357742269058,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1821.0,
      "completions/mean_length": 546.9765625,
      "completions/mean_terminated_length": 515.00341796875,
      "completions/min_length": 102.0,
      "completions/min_terminated_length": 102.0,
      "epoch": 1.7465733449985419,
      "grad_norm": 0.140625,
      "learning_rate": 1e-06,
      "loss": 0.0037,
      "num_tokens": 108499491.0,
      "reward": 0.606026828289032,
      "reward_std": 0.2405993491411209,
      "rewards/verify_math_reward/mean": 0.6060267686843872,
      "rewards/verify_math_reward/std": 0.48890194296836853,
      "step": 187
    },
    {
      "clip_ratio/high_max": 0.0016446320369141176,
      "clip_ratio/high_mean": 0.0004841918025704217,
      "clip_ratio/low_mean": 0.0003864412077518864,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008706330017957953,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4034.0,
      "completions/mean_length": 650.833740234375,
      "completions/mean_terminated_length": 588.1942749023438,
      "completions/min_length": 115.0,
      "completions/min_terminated_length": 115.0,
      "epoch": 1.7559055118110236,
      "grad_norm": 0.1328125,
      "learning_rate": 1e-06,
      "loss": -0.0053,
      "num_tokens": 109108118.0,
      "reward": 0.5334821939468384,
      "reward_std": 0.23860770463943481,
      "rewards/verify_math_reward/mean": 0.5334821343421936,
      "rewards/verify_math_reward/std": 0.49915632605552673,
      "step": 188
    },
    {
      "clip_ratio/high_max": 0.0017032873129210202,
      "clip_ratio/high_mean": 0.0005462472404360597,
      "clip_ratio/low_mean": 0.0002708622223508428,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008171094632416498,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3971.0,
      "completions/mean_length": 626.6964721679688,
      "completions/mean_terminated_length": 563.6181640625,
      "completions/min_length": 3.0,
      "completions/min_terminated_length": 3.0,
      "epoch": 1.7652376786235053,
      "grad_norm": 0.134765625,
      "learning_rate": 1e-06,
      "loss": 0.0067,
      "num_tokens": 109700790.0,
      "reward": 0.5301339626312256,
      "reward_std": 0.2315317690372467,
      "rewards/verify_math_reward/mean": 0.5301339030265808,
      "rewards/verify_math_reward/std": 0.49936988949775696,
      "step": 189
    },
    {
      "clip_ratio/high_max": 0.0016155478788277833,
      "clip_ratio/high_mean": 0.0005073620002349344,
      "clip_ratio/low_mean": 0.0003483386274183431,
      "clip_ratio/low_min": 8.278145287476946e-06,
      "clip_ratio/region_mean": 0.000855700641295698,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3282.0,
      "completions/mean_length": 647.1138916015625,
      "completions/mean_terminated_length": 600.29638671875,
      "completions/min_length": 16.0,
      "completions/min_terminated_length": 16.0,
      "epoch": 1.774569845435987,
      "grad_norm": 0.1279296875,
      "learning_rate": 1e-06,
      "loss": 0.008,
      "num_tokens": 110316236.0,
      "reward": 0.5446428656578064,
      "reward_std": 0.24089176952838898,
      "rewards/verify_math_reward/mean": 0.5446428656578064,
      "rewards/verify_math_reward/std": 0.4982811510562897,
      "step": 190
    },
    {
      "clip_ratio/high_max": 0.0015572338061247137,
      "clip_ratio/high_mean": 0.0004307132467147312,
      "clip_ratio/low_mean": 0.0002452737038538544,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006759869578445432,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3457.0,
      "completions/mean_length": 601.9989013671875,
      "completions/mean_terminated_length": 542.5097045898438,
      "completions/min_length": 136.0,
      "completions/min_terminated_length": 136.0,
      "epoch": 1.7839020122484688,
      "grad_norm": 0.123046875,
      "learning_rate": 1e-06,
      "loss": -0.0056,
      "num_tokens": 110884539.0,
      "reward": 0.5736607313156128,
      "reward_std": 0.19325098395347595,
      "rewards/verify_math_reward/mean": 0.5736607313156128,
      "rewards/verify_math_reward/std": 0.4948205351829529,
      "step": 191
    },
    {
      "clip_ratio/high_max": 0.0017674143418844324,
      "clip_ratio/high_mean": 0.0005331737964979766,
      "clip_ratio/low_mean": 0.00028827472226566897,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008214485351345502,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2948.0,
      "completions/mean_length": 593.4051513671875,
      "completions/mean_terminated_length": 545.858642578125,
      "completions/min_length": 82.0,
      "completions/min_terminated_length": 82.0,
      "epoch": 1.7932341790609507,
      "grad_norm": 0.138671875,
      "learning_rate": 1e-06,
      "loss": -0.0015,
      "num_tokens": 111450726.0,
      "reward": 0.5379464626312256,
      "reward_std": 0.24709786474704742,
      "rewards/verify_math_reward/mean": 0.5379464030265808,
      "rewards/verify_math_reward/std": 0.4988364279270172,
      "step": 192
    },
    {
      "clip_ratio/high_max": 0.0015050893280204036,
      "clip_ratio/high_mean": 0.0004983497168495887,
      "clip_ratio/low_mean": 0.00032437484799174854,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008227245612033585,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3481.0,
      "completions/mean_length": 666.810302734375,
      "completions/mean_terminated_length": 604.4613647460938,
      "completions/min_length": 35.0,
      "completions/min_terminated_length": 35.0,
      "epoch": 1.8025663458734325,
      "grad_norm": 0.119140625,
      "learning_rate": 1e-06,
      "loss": -0.0172,
      "num_tokens": 112075148.0,
      "reward": 0.520089328289032,
      "reward_std": 0.2022358477115631,
      "rewards/verify_math_reward/mean": 0.5200892686843872,
      "rewards/verify_math_reward/std": 0.4998753070831299,
      "step": 193
    },
    {
      "clip_ratio/high_max": 0.001532794936792925,
      "clip_ratio/high_mean": 0.0004692633813192515,
      "clip_ratio/low_mean": 0.00026098032344634703,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000730243698853883,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3893.0,
      "completions/mean_length": 581.3225708007812,
      "completions/mean_terminated_length": 533.6119995117188,
      "completions/min_length": 57.0,
      "completions/min_terminated_length": 57.0,
      "epoch": 1.8118985126859144,
      "grad_norm": 0.1279296875,
      "learning_rate": 1e-06,
      "loss": -0.0022,
      "num_tokens": 112632317.0,
      "reward": 0.546875,
      "reward_std": 0.18945668637752533,
      "rewards/verify_math_reward/mean": 0.546875,
      "rewards/verify_math_reward/std": 0.4980759024620056,
      "step": 194
    },
    {
      "clip_ratio/high_max": 0.0016100813445518725,
      "clip_ratio/high_mean": 0.0005150434521965508,
      "clip_ratio/low_mean": 0.0002971570513636834,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008122004992401344,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3582.0,
      "completions/mean_length": 605.6127319335938,
      "completions/mean_terminated_length": 546.18505859375,
      "completions/min_length": 106.0,
      "completions/min_terminated_length": 106.0,
      "epoch": 1.8212306794983961,
      "grad_norm": 0.138671875,
      "learning_rate": 1e-06,
      "loss": 0.004,
      "num_tokens": 113197890.0,
      "reward": 0.53125,
      "reward_std": 0.2296549677848816,
      "rewards/verify_math_reward/mean": 0.53125,
      "rewards/verify_math_reward/std": 0.4993011951446533,
      "step": 195
    },
    {
      "clip_ratio/high_max": 0.0014897636101522949,
      "clip_ratio/high_mean": 0.00043615922061235324,
      "clip_ratio/low_mean": 0.0003125943557051869,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007487535808650136,
      "completions/clipped_ratio": 0.0200892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2664.0,
      "completions/mean_length": 611.0,
      "completions/mean_terminated_length": 539.5535278320312,
      "completions/min_length": 109.0,
      "completions/min_terminated_length": 109.0,
      "epoch": 1.8305628463108778,
      "grad_norm": 0.11572265625,
      "learning_rate": 1e-06,
      "loss": -0.0146,
      "num_tokens": 113754490.0,
      "reward": 0.5881696939468384,
      "reward_std": 0.19711239635944366,
      "rewards/verify_math_reward/mean": 0.5881696343421936,
      "rewards/verify_math_reward/std": 0.4924395978450775,
      "step": 196
    },
    {
      "clip_ratio/high_max": 0.0015158454298216384,
      "clip_ratio/high_mean": 0.0005011510493204696,
      "clip_ratio/low_mean": 0.0004062542257088353,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009074052768482943,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3622.0,
      "completions/mean_length": 692.9297485351562,
      "completions/mean_terminated_length": 615.2340087890625,
      "completions/min_length": 5.0,
      "completions/min_terminated_length": 5.0,
      "epoch": 1.8398950131233596,
      "grad_norm": 0.1318359375,
      "learning_rate": 1e-06,
      "loss": 0.0194,
      "num_tokens": 114394243.0,
      "reward": 0.4174107313156128,
      "reward_std": 0.27331990003585815,
      "rewards/verify_math_reward/mean": 0.4174107015132904,
      "rewards/verify_math_reward/std": 0.4934072494506836,
      "step": 197
    },
    {
      "clip_ratio/high_max": 0.0015211136451398488,
      "clip_ratio/high_mean": 0.0004293553620300372,
      "clip_ratio/low_mean": 0.0004139009126902238,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008432562708549085,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3918.0,
      "completions/mean_length": 614.5201416015625,
      "completions/mean_terminated_length": 555.2440795898438,
      "completions/min_length": 77.0,
      "completions/min_terminated_length": 77.0,
      "epoch": 1.8492271799358413,
      "grad_norm": 0.1416015625,
      "learning_rate": 1e-06,
      "loss": -0.0078,
      "num_tokens": 114978949.0,
      "reward": 0.4609375298023224,
      "reward_std": 0.25711795687675476,
      "rewards/verify_math_reward/mean": 0.4609375,
      "rewards/verify_math_reward/std": 0.4987502098083496,
      "step": 198
    },
    {
      "clip_ratio/high_max": 0.0016054617317422526,
      "clip_ratio/high_mean": 0.00046373914187824994,
      "clip_ratio/low_mean": 0.0002626001449925752,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007263392790264334,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3612.0,
      "completions/mean_length": 605.904052734375,
      "completions/mean_terminated_length": 538.4049682617188,
      "completions/min_length": 82.0,
      "completions/min_terminated_length": 82.0,
      "epoch": 1.858559346748323,
      "grad_norm": 0.126953125,
      "learning_rate": 1e-06,
      "loss": 0.0002,
      "num_tokens": 115532775.0,
      "reward": 0.5011160969734192,
      "reward_std": 0.2074911743402481,
      "rewards/verify_math_reward/mean": 0.5011160969734192,
      "rewards/verify_math_reward/std": 0.5002779960632324,
      "step": 199
    },
    {
      "clip_ratio/high_max": 0.001363764398774947,
      "clip_ratio/high_mean": 0.0004251047785146511,
      "clip_ratio/low_mean": 0.00025553890270657575,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006806436749684508,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2639.0,
      "completions/mean_length": 614.4765625,
      "completions/mean_terminated_length": 563.2196655273438,
      "completions/min_length": 100.0,
      "completions/min_terminated_length": 100.0,
      "epoch": 1.8678915135608047,
      "grad_norm": 0.1171875,
      "learning_rate": 1e-06,
      "loss": 0.0032,
      "num_tokens": 116121490.0,
      "reward": 0.6071428656578064,
      "reward_std": 0.20835243165493011,
      "rewards/verify_math_reward/mean": 0.6071428656578064,
      "rewards/verify_math_reward/std": 0.48865824937820435,
      "step": 200
    },
    {
      "clip_ratio/high_max": 0.001835477736676694,
      "clip_ratio/high_mean": 0.0006160475415981637,
      "clip_ratio/low_mean": 0.0004029053911835945,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001018952926642669,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3816.0,
      "completions/mean_length": 595.8092041015625,
      "completions/mean_terminated_length": 544.2774658203125,
      "completions/min_length": 130.0,
      "completions/min_terminated_length": 130.0,
      "epoch": 1.8772236803732867,
      "grad_norm": 0.1435546875,
      "learning_rate": 1e-06,
      "loss": -0.015,
      "num_tokens": 116682703.0,
      "reward": 0.5446428656578064,
      "reward_std": 0.2618962228298187,
      "rewards/verify_math_reward/mean": 0.5446428656578064,
      "rewards/verify_math_reward/std": 0.49828118085861206,
      "step": 201
    },
    {
      "clip_ratio/high_max": 0.0018763634798233397,
      "clip_ratio/high_mean": 0.0005957267251233134,
      "clip_ratio/low_mean": 0.00028847546036558924,
      "clip_ratio/low_min": 6.042149834684096e-06,
      "clip_ratio/region_mean": 0.0008842021939017286,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3431.0,
      "completions/mean_length": 673.896240234375,
      "completions/mean_terminated_length": 595.7659301757812,
      "completions/min_length": 19.0,
      "completions/min_terminated_length": 19.0,
      "epoch": 1.8865558471857684,
      "grad_norm": 0.12060546875,
      "learning_rate": 1e-06,
      "loss": -0.0026,
      "num_tokens": 117284674.0,
      "reward": 0.5491071939468384,
      "reward_std": 0.23686742782592773,
      "rewards/verify_math_reward/mean": 0.5491071343421936,
      "rewards/verify_math_reward/std": 0.49786055088043213,
      "step": 202
    },
    {
      "clip_ratio/high_max": 0.0015708985192759428,
      "clip_ratio/high_mean": 0.0005060054093064537,
      "clip_ratio/low_mean": 0.0003360165705998952,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008420219883191749,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4055.0,
      "completions/mean_length": 608.3080444335938,
      "completions/mean_terminated_length": 568.943603515625,
      "completions/min_length": 4.0,
      "completions/min_terminated_length": 4.0,
      "epoch": 1.8958880139982504,
      "grad_norm": 0.1396484375,
      "learning_rate": 1e-06,
      "loss": 0.0024,
      "num_tokens": 117880550.0,
      "reward": 0.494419664144516,
      "reward_std": 0.2561882734298706,
      "rewards/verify_math_reward/mean": 0.4944196343421936,
      "rewards/verify_math_reward/std": 0.5002480745315552,
      "step": 203
    },
    {
      "clip_ratio/high_max": 0.0015028491043267422,
      "clip_ratio/high_mean": 0.0005095660935694468,
      "clip_ratio/low_mean": 0.0003117276033890448,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008212936972995522,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3866.0,
      "completions/mean_length": 625.9330444335938,
      "completions/mean_terminated_length": 578.8280639648438,
      "completions/min_length": 90.0,
      "completions/min_terminated_length": 90.0,
      "epoch": 1.905220180810732,
      "grad_norm": 0.1494140625,
      "learning_rate": 1e-06,
      "loss": 0.0157,
      "num_tokens": 118489186.0,
      "reward": 0.535714328289032,
      "reward_std": 0.26035529375076294,
      "rewards/verify_math_reward/mean": 0.5357142686843872,
      "rewards/verify_math_reward/std": 0.4990014135837555,
      "step": 204
    },
    {
      "clip_ratio/high_max": 0.0013872589634047472,
      "clip_ratio/high_mean": 0.00036737400603215065,
      "clip_ratio/low_mean": 0.0002780808164288828,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006454548242800229,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3114.0,
      "completions/mean_length": 630.53125,
      "completions/mean_terminated_length": 575.5238037109375,
      "completions/min_length": 14.0,
      "completions/min_terminated_length": 14.0,
      "epoch": 1.9145523476232138,
      "grad_norm": 0.115234375,
      "learning_rate": 1e-06,
      "loss": -0.004,
      "num_tokens": 119082614.0,
      "reward": 0.5089285969734192,
      "reward_std": 0.18239142000675201,
      "rewards/verify_math_reward/mean": 0.5089285969734192,
      "rewards/verify_math_reward/std": 0.5001994967460632,
      "step": 205
    },
    {
      "clip_ratio/high_max": 0.0015918045519356383,
      "clip_ratio/high_mean": 0.0004928177258989308,
      "clip_ratio/low_mean": 0.0003009687043231679,
      "clip_ratio/low_min": 9.489826879871543e-06,
      "clip_ratio/region_mean": 0.0007937864347695722,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2512.0,
      "completions/mean_length": 656.5971069335938,
      "completions/mean_terminated_length": 590.0784912109375,
      "completions/min_length": 4.0,
      "completions/min_terminated_length": 4.0,
      "epoch": 1.9238845144356955,
      "grad_norm": 0.1259765625,
      "learning_rate": 1e-06,
      "loss": -0.0077,
      "num_tokens": 119680917.0,
      "reward": 0.5078125,
      "reward_std": 0.22751741111278534,
      "rewards/verify_math_reward/mean": 0.5078125,
      "rewards/verify_math_reward/std": 0.5002182126045227,
      "step": 206
    },
    {
      "clip_ratio/high_max": 0.0014030275124241598,
      "clip_ratio/high_mean": 0.0004283967252831644,
      "clip_ratio/low_mean": 0.00028666629077633843,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007150630153773818,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3158.0,
      "completions/mean_length": 604.9553833007812,
      "completions/mean_terminated_length": 553.5582885742188,
      "completions/min_length": 18.0,
      "completions/min_terminated_length": 18.0,
      "epoch": 1.9332166812481772,
      "grad_norm": 0.1259765625,
      "learning_rate": 1e-06,
      "loss": -0.0031,
      "num_tokens": 120263157.0,
      "reward": 0.5691964626312256,
      "reward_std": 0.2050851285457611,
      "rewards/verify_math_reward/mean": 0.5691964030265808,
      "rewards/verify_math_reward/std": 0.4954652786254883,
      "step": 207
    },
    {
      "clip_ratio/high_max": 0.0016974749250948662,
      "clip_ratio/high_mean": 0.0004747115065129037,
      "clip_ratio/low_mean": 0.00037400690644062706,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008487184168188833,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2375.0,
      "completions/mean_length": 631.3449096679688,
      "completions/mean_terminated_length": 596.1904907226562,
      "completions/min_length": 143.0,
      "completions/min_terminated_length": 143.0,
      "epoch": 1.942548848060659,
      "grad_norm": 0.1201171875,
      "learning_rate": 1e-06,
      "loss": -0.0036,
      "num_tokens": 120882970.0,
      "reward": 0.5055803656578064,
      "reward_std": 0.22537913918495178,
      "rewards/verify_math_reward/mean": 0.5055803656578064,
      "rewards/verify_math_reward/std": 0.5002480745315552,
      "step": 208
    },
    {
      "clip_ratio/high_max": 0.001820983075958793,
      "clip_ratio/high_mean": 0.0006102061242927448,
      "clip_ratio/low_mean": 0.0003176068756829409,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009278130028178566,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3603.0,
      "completions/mean_length": 559.8683471679688,
      "completions/mean_terminated_length": 499.6617736816406,
      "completions/min_length": 28.0,
      "completions/min_terminated_length": 28.0,
      "epoch": 1.9518810148731407,
      "grad_norm": 0.12890625,
      "learning_rate": 1e-06,
      "loss": -0.0089,
      "num_tokens": 121408396.0,
      "reward": 0.5993303656578064,
      "reward_std": 0.2248920202255249,
      "rewards/verify_math_reward/mean": 0.5993303656578064,
      "rewards/verify_math_reward/std": 0.49030786752700806,
      "step": 209
    },
    {
      "clip_ratio/high_max": 0.0019035836230614223,
      "clip_ratio/high_mean": 0.0006059127754269866,
      "clip_ratio/low_mean": 0.00039355266926577315,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000999465443783265,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3370.0,
      "completions/mean_length": 599.7980346679688,
      "completions/mean_terminated_length": 536.2306518554688,
      "completions/min_length": 90.0,
      "completions/min_terminated_length": 90.0,
      "epoch": 1.9612131816856226,
      "grad_norm": 0.142578125,
      "learning_rate": 1e-06,
      "loss": 0.0003,
      "num_tokens": 121962903.0,
      "reward": 0.5691964626312256,
      "reward_std": 0.24487264454364777,
      "rewards/verify_math_reward/mean": 0.5691964030265808,
      "rewards/verify_math_reward/std": 0.4954652488231659,
      "step": 210
    },
    {
      "clip_ratio/high_max": 0.0016127133912959835,
      "clip_ratio/high_mean": 0.000553788932847965,
      "clip_ratio/low_mean": 0.0004200730081720394,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009738619387462677,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4002.0,
      "completions/mean_length": 596.724365234375,
      "completions/mean_terminated_length": 561.2186889648438,
      "completions/min_length": 155.0,
      "completions/min_terminated_length": 155.0,
      "epoch": 1.9705453484981044,
      "grad_norm": 0.138671875,
      "learning_rate": 1e-06,
      "loss": 0.0213,
      "num_tokens": 122550624.0,
      "reward": 0.5613839626312256,
      "reward_std": 0.250287801027298,
      "rewards/verify_math_reward/mean": 0.5613839030265808,
      "rewards/verify_math_reward/std": 0.496494859457016,
      "step": 211
    },
    {
      "clip_ratio/high_max": 0.0015774362473166548,
      "clip_ratio/high_mean": 0.000461443678887008,
      "clip_ratio/low_mean": 0.00034105664599337615,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008025003280636156,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3063.0,
      "completions/mean_length": 582.279052734375,
      "completions/mean_terminated_length": 522.4540405273438,
      "completions/min_length": 124.0,
      "completions/min_terminated_length": 124.0,
      "epoch": 1.9798775153105863,
      "grad_norm": 0.1328125,
      "learning_rate": 1e-06,
      "loss": 0.0105,
      "num_tokens": 123095442.0,
      "reward": 0.5837053656578064,
      "reward_std": 0.20339404046535492,
      "rewards/verify_math_reward/mean": 0.5837053656578064,
      "rewards/verify_math_reward/std": 0.49321892857551575,
      "step": 212
    },
    {
      "clip_ratio/high_max": 0.0013675859308932559,
      "clip_ratio/high_mean": 0.00044480577344074845,
      "clip_ratio/low_mean": 0.000273161410063949,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007179671843005053,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2225.0,
      "completions/mean_length": 564.7801513671875,
      "completions/mean_terminated_length": 516.8450317382812,
      "completions/min_length": 120.0,
      "completions/min_terminated_length": 120.0,
      "epoch": 1.989209682123068,
      "grad_norm": 0.126953125,
      "learning_rate": 1e-06,
      "loss": 0.008,
      "num_tokens": 123645085.0,
      "reward": 0.5859375,
      "reward_std": 0.20087602734565735,
      "rewards/verify_math_reward/mean": 0.5859375,
      "rewards/verify_math_reward/std": 0.4928344786167145,
      "step": 213
    },
    {
      "clip_ratio/high_max": 0.0015998765984477359,
      "clip_ratio/high_mean": 0.0005085137538571871,
      "clip_ratio/low_mean": 0.000265932229694954,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007744459771856782,
      "completions/clipped_ratio": 0.005681818181818232,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3360.0,
      "completions/mean_length": 588.5369262695312,
      "completions/mean_terminated_length": 568.4942626953125,
      "completions/min_length": 65.0,
      "completions/min_terminated_length": 65.0,
      "epoch": 1.9985418489355498,
      "grad_norm": 0.12158203125,
      "learning_rate": 1e-06,
      "loss": 0.0152,
      "num_tokens": 124229877.0,
      "reward": 0.6171875,
      "reward_std": 0.20387978851795197,
      "rewards/verify_math_reward/mean": 0.6171875,
      "rewards/verify_math_reward/std": 0.4863446056842804,
      "step": 214
    },
    {
      "clip_ratio/high_max": 0.0015850091485845041,
      "clip_ratio/high_mean": 0.0005638165930577088,
      "clip_ratio/low_mean": 0.0002923993365584465,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008562159546272596,
      "completions/clipped_ratio": 0.0200892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3623.0,
      "completions/mean_length": 625.107177734375,
      "completions/mean_terminated_length": 553.9498901367188,
      "completions/min_length": 85.0,
      "completions/min_terminated_length": 85.0,
      "epoch": 2.0093321668124817,
      "grad_norm": 0.1396484375,
      "learning_rate": 1e-06,
      "loss": -0.0124,
      "num_tokens": 124804733.0,
      "reward": 0.5580357313156128,
      "reward_std": 0.23560000956058502,
      "rewards/verify_math_reward/mean": 0.5580357313156128,
      "rewards/verify_math_reward/std": 0.49689778685569763,
      "step": 215
    },
    {
      "clip_ratio/high_max": 0.0014533565063175047,
      "clip_ratio/high_mean": 0.0004272621478094152,
      "clip_ratio/low_mean": 0.00027526743645012175,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007025295690255007,
      "completions/clipped_ratio": 0.029017857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4010.0,
      "completions/mean_length": 679.6015625,
      "completions/mean_terminated_length": 577.5023193359375,
      "completions/min_length": 102.0,
      "completions/min_terminated_length": 102.0,
      "epoch": 2.0186643336249634,
      "grad_norm": 0.1279296875,
      "learning_rate": 1e-06,
      "loss": -0.0028,
      "num_tokens": 125396840.0,
      "reward": 0.5212053656578064,
      "reward_std": 0.1908424347639084,
      "rewards/verify_math_reward/mean": 0.5212053656578064,
      "rewards/verify_math_reward/std": 0.49982914328575134,
      "step": 216
    },
    {
      "clip_ratio/high_max": 0.0014981682179495692,
      "clip_ratio/high_mean": 0.0004227994944585589,
      "clip_ratio/low_mean": 0.0003687052671921265,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007915047617643722,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3070.0,
      "completions/mean_length": 609.4631958007812,
      "completions/mean_terminated_length": 554.121337890625,
      "completions/min_length": 114.0,
      "completions/min_terminated_length": 114.0,
      "epoch": 2.027996500437445,
      "grad_norm": 0.1259765625,
      "learning_rate": 1e-06,
      "loss": 0.0112,
      "num_tokens": 125979271.0,
      "reward": 0.5066964626312256,
      "reward_std": 0.2080136239528656,
      "rewards/verify_math_reward/mean": 0.5066964030265808,
      "rewards/verify_math_reward/std": 0.5002344250679016,
      "step": 217
    },
    {
      "clip_ratio/high_max": 0.0016089115633803885,
      "clip_ratio/high_mean": 0.0004406281209412555,
      "clip_ratio/low_mean": 0.00037061431135043676,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008112424502542126,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4039.0,
      "completions/mean_length": 646.9631958007812,
      "completions/mean_terminated_length": 608.0349731445312,
      "completions/min_length": 43.0,
      "completions/min_terminated_length": 43.0,
      "epoch": 2.037328667249927,
      "grad_norm": 0.1220703125,
      "learning_rate": 1e-06,
      "loss": 0.0125,
      "num_tokens": 126609302.0,
      "reward": 0.504464328289032,
      "reward_std": 0.23469960689544678,
      "rewards/verify_math_reward/mean": 0.5044642686843872,
      "rewards/verify_math_reward/std": 0.5002593398094177,
      "step": 218
    },
    {
      "clip_ratio/high_max": 0.0019641154885903234,
      "clip_ratio/high_mean": 0.0006396210555976722,
      "clip_ratio/low_mean": 0.0003920840014188798,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010317050582671072,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2194.0,
      "completions/mean_length": 527.4989013671875,
      "completions/mean_terminated_length": 495.3502502441406,
      "completions/min_length": 87.0,
      "completions/min_terminated_length": 87.0,
      "epoch": 2.046660834062409,
      "grad_norm": 0.1591796875,
      "learning_rate": 1e-06,
      "loss": -0.0031,
      "num_tokens": 127136517.0,
      "reward": 0.6194196939468384,
      "reward_std": 0.25434422492980957,
      "rewards/verify_math_reward/mean": 0.6194196343421936,
      "rewards/verify_math_reward/std": 0.48580074310302734,
      "step": 219
    },
    {
      "clip_ratio/high_max": 0.0013395062705967575,
      "clip_ratio/high_mean": 0.00038412570756918285,
      "clip_ratio/low_mean": 0.0004287898282200331,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000812915544884163,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3811.0,
      "completions/mean_length": 628.989990234375,
      "completions/mean_terminated_length": 589.8589477539062,
      "completions/min_length": 24.0,
      "completions/min_terminated_length": 24.0,
      "epoch": 2.055993000874891,
      "grad_norm": 0.12255859375,
      "learning_rate": 1e-06,
      "loss": 0.0021,
      "num_tokens": 127753668.0,
      "reward": 0.470982164144516,
      "reward_std": 0.2315768003463745,
      "rewards/verify_math_reward/mean": 0.4709821343421936,
      "rewards/verify_math_reward/std": 0.49943602085113525,
      "step": 220
    },
    {
      "clip_ratio/high_max": 0.0015597017663822044,
      "clip_ratio/high_mean": 0.0004319779341130925,
      "clip_ratio/low_mean": 0.00028511065738712205,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007170885951381933,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3443.0,
      "completions/mean_length": 584.3147583007812,
      "completions/mean_terminated_length": 548.6831665039062,
      "completions/min_length": 58.0,
      "completions/min_terminated_length": 58.0,
      "epoch": 2.0653251676873725,
      "grad_norm": 0.134765625,
      "learning_rate": 1e-06,
      "loss": -0.0016,
      "num_tokens": 128328262.0,
      "reward": 0.5546875,
      "reward_std": 0.21079309284687042,
      "rewards/verify_math_reward/mean": 0.5546875,
      "rewards/verify_math_reward/std": 0.4972778558731079,
      "step": 221
    },
    {
      "clip_ratio/high_max": 0.0016742968946346082,
      "clip_ratio/high_mean": 0.000601753146156625,
      "clip_ratio/low_mean": 0.0003211780654055474,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009229312072420726,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3281.0,
      "completions/mean_length": 632.9631958007812,
      "completions/mean_terminated_length": 577.9943237304688,
      "completions/min_length": 129.0,
      "completions/min_terminated_length": 129.0,
      "epoch": 2.0746573344998542,
      "grad_norm": 0.1337890625,
      "learning_rate": 1e-06,
      "loss": 0.0124,
      "num_tokens": 128929701.0,
      "reward": 0.5412946939468384,
      "reward_std": 0.22469627857208252,
      "rewards/verify_math_reward/mean": 0.5412946343421936,
      "rewards/verify_math_reward/std": 0.49857014417648315,
      "step": 222
    },
    {
      "clip_ratio/high_max": 0.0017082222111639567,
      "clip_ratio/high_mean": 0.0005120211583289347,
      "clip_ratio/low_mean": 0.0002879522712646576,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007999734343684395,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3516.0,
      "completions/mean_length": 577.9163208007812,
      "completions/mean_terminated_length": 530.1595458984375,
      "completions/min_length": 29.0,
      "completions/min_terminated_length": 29.0,
      "epoch": 2.083989501312336,
      "grad_norm": 0.142578125,
      "learning_rate": 1e-06,
      "loss": 0.0066,
      "num_tokens": 129499482.0,
      "reward": 0.6104910969734192,
      "reward_std": 0.19723255932331085,
      "rewards/verify_math_reward/mean": 0.6104910969734192,
      "rewards/verify_math_reward/std": 0.48791125416755676,
      "step": 223
    },
    {
      "clip_ratio/high_max": 0.0014899580464771134,
      "clip_ratio/high_mean": 0.0004448560117680245,
      "clip_ratio/low_mean": 0.00024159531301393145,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006864513279651874,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2740.0,
      "completions/mean_length": 623.2277221679688,
      "completions/mean_terminated_length": 560.0863647460938,
      "completions/min_length": 137.0,
      "completions/min_terminated_length": 137.0,
      "epoch": 2.0933216681248177,
      "grad_norm": 0.11962890625,
      "learning_rate": 1e-06,
      "loss": 0.0014,
      "num_tokens": 130078062.0,
      "reward": 0.5736607313156128,
      "reward_std": 0.2121456265449524,
      "rewards/verify_math_reward/mean": 0.5736607313156128,
      "rewards/verify_math_reward/std": 0.4948205351829529,
      "step": 224
    },
    {
      "clip_ratio/high_max": 0.0013468493916661828,
      "clip_ratio/high_mean": 0.00040207266920333495,
      "clip_ratio/low_mean": 0.00037441720064634865,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007764898682580679,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2562.0,
      "completions/mean_length": 623.4342041015625,
      "completions/mean_terminated_length": 584.2404174804688,
      "completions/min_length": 105.0,
      "completions/min_terminated_length": 105.0,
      "epoch": 2.1026538349372994,
      "grad_norm": 0.1337890625,
      "learning_rate": 1e-06,
      "loss": 0.007,
      "num_tokens": 130694107.0,
      "reward": 0.4642857313156128,
      "reward_std": 0.23006653785705566,
      "rewards/verify_math_reward/mean": 0.4642857015132904,
      "rewards/verify_math_reward/std": 0.4990013837814331,
      "step": 225
    },
    {
      "clip_ratio/high_max": 0.0015661860970794805,
      "clip_ratio/high_mean": 0.0005118552935527987,
      "clip_ratio/low_mean": 0.0003541565188243112,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008660118146508466,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3569.0,
      "completions/mean_length": 640.6038208007812,
      "completions/mean_terminated_length": 601.6038818359375,
      "completions/min_length": 73.0,
      "completions/min_terminated_length": 73.0,
      "epoch": 2.111986001749781,
      "grad_norm": 0.1279296875,
      "learning_rate": 1e-06,
      "loss": -0.0019,
      "num_tokens": 131318976.0,
      "reward": 0.5502232313156128,
      "reward_std": 0.2358168661594391,
      "rewards/verify_math_reward/mean": 0.5502232313156128,
      "rewards/verify_math_reward/std": 0.49774909019470215,
      "step": 226
    },
    {
      "clip_ratio/high_max": 0.001513285938926856,
      "clip_ratio/high_mean": 0.00047211629862431437,
      "clip_ratio/low_mean": 0.0002881199313833349,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007602362429679488,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3505.0,
      "completions/mean_length": 618.193115234375,
      "completions/mean_terminated_length": 554.960205078125,
      "completions/min_length": 110.0,
      "completions/min_terminated_length": 110.0,
      "epoch": 2.121318168562263,
      "grad_norm": 0.13671875,
      "learning_rate": 1e-06,
      "loss": 0.0049,
      "num_tokens": 131890093.0,
      "reward": 0.6149553656578064,
      "reward_std": 0.2159428745508194,
      "rewards/verify_math_reward/mean": 0.6149553656578064,
      "rewards/verify_math_reward/std": 0.4868776500225067,
      "step": 227
    },
    {
      "clip_ratio/high_max": 0.0015444342507180409,
      "clip_ratio/high_mean": 0.0004563076370232011,
      "clip_ratio/low_mean": 0.0003338523467846244,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000790159988355299,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3884.0,
      "completions/mean_length": 602.9866333007812,
      "completions/mean_terminated_length": 563.5620727539062,
      "completions/min_length": 39.0,
      "completions/min_terminated_length": 39.0,
      "epoch": 2.130650335374745,
      "grad_norm": 0.12890625,
      "learning_rate": 1e-06,
      "loss": 0.0007,
      "num_tokens": 132477817.0,
      "reward": 0.5491071939468384,
      "reward_std": 0.21147526800632477,
      "rewards/verify_math_reward/mean": 0.5491071343421936,
      "rewards/verify_math_reward/std": 0.49786055088043213,
      "step": 228
    },
    {
      "clip_ratio/high_max": 0.0015167887777352007,
      "clip_ratio/high_mean": 0.00043480312615429284,
      "clip_ratio/low_mean": 0.0002541928790833481,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000688996006601883,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3922.0,
      "completions/mean_length": 672.4877319335938,
      "completions/mean_terminated_length": 614.1986694335938,
      "completions/min_length": 172.0,
      "completions/min_terminated_length": 172.0,
      "epoch": 2.1399825021872267,
      "grad_norm": 0.1142578125,
      "learning_rate": 1e-06,
      "loss": -0.0068,
      "num_tokens": 133113862.0,
      "reward": 0.5223214626312256,
      "reward_std": 0.187350794672966,
      "rewards/verify_math_reward/mean": 0.5223214030265808,
      "rewards/verify_math_reward/std": 0.49978047609329224,
      "step": 229
    },
    {
      "clip_ratio/high_max": 0.0012672489256146946,
      "clip_ratio/high_mean": 0.0003684084707629154,
      "clip_ratio/low_mean": 0.0003440370865064324,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007124455569282873,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3863.0,
      "completions/mean_length": 586.4453125,
      "completions/mean_terminated_length": 534.7757568359375,
      "completions/min_length": 47.0,
      "completions/min_terminated_length": 47.0,
      "epoch": 2.1493146689997085,
      "grad_norm": 0.1220703125,
      "learning_rate": 1e-06,
      "loss": 0.0011,
      "num_tokens": 133671661.0,
      "reward": 0.5691964626312256,
      "reward_std": 0.20316441357135773,
      "rewards/verify_math_reward/mean": 0.5691964030265808,
      "rewards/verify_math_reward/std": 0.4954652488231659,
      "step": 230
    },
    {
      "clip_ratio/high_max": 0.0013330569227036904,
      "clip_ratio/high_mean": 0.00041278249295828573,
      "clip_ratio/low_mean": 0.00036238569771285256,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007751681851004832,
      "completions/clipped_ratio": 0.006696428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2981.0,
      "completions/mean_length": 569.5982666015625,
      "completions/mean_terminated_length": 545.82470703125,
      "completions/min_length": 87.0,
      "completions/min_terminated_length": 87.0,
      "epoch": 2.15864683581219,
      "grad_norm": 0.130859375,
      "learning_rate": 1e-06,
      "loss": 0.0034,
      "num_tokens": 134247181.0,
      "reward": 0.535714328289032,
      "reward_std": 0.2050095647573471,
      "rewards/verify_math_reward/mean": 0.5357142686843872,
      "rewards/verify_math_reward/std": 0.4990014135837555,
      "step": 231
    },
    {
      "clip_ratio/high_max": 0.001488387457357021,
      "clip_ratio/high_mean": 0.00040857345675249235,
      "clip_ratio/low_mean": 0.0003010989770473316,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007096724366419949,
      "completions/clipped_ratio": 0.0267857142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3861.0,
      "completions/mean_length": 704.6707763671875,
      "completions/mean_terminated_length": 611.3314208984375,
      "completions/min_length": 129.0,
      "completions/min_terminated_length": 129.0,
      "epoch": 2.167979002624672,
      "grad_norm": 0.123046875,
      "learning_rate": 1e-06,
      "loss": 0.0136,
      "num_tokens": 134870638.0,
      "reward": 0.5,
      "reward_std": 0.20538821816444397,
      "rewards/verify_math_reward/mean": 0.5,
      "rewards/verify_math_reward/std": 0.5002792477607727,
      "step": 232
    },
    {
      "clip_ratio/high_max": 0.0013531004747164843,
      "clip_ratio/high_mean": 0.0004159661771154788,
      "clip_ratio/low_mean": 0.00030782140515839274,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007237875747705402,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3693.0,
      "completions/mean_length": 649.9085083007812,
      "completions/mean_terminated_length": 607.0757446289062,
      "completions/min_length": 38.0,
      "completions/min_terminated_length": 38.0,
      "epoch": 2.1773111694371536,
      "grad_norm": 0.1181640625,
      "learning_rate": 1e-06,
      "loss": -0.0005,
      "num_tokens": 135499164.0,
      "reward": 0.5256696939468384,
      "reward_std": 0.20936980843544006,
      "rewards/verify_math_reward/mean": 0.5256696343421936,
      "rewards/verify_math_reward/std": 0.4996195137500763,
      "step": 233
    },
    {
      "clip_ratio/high_max": 0.0016234310169238597,
      "clip_ratio/high_mean": 0.0005124375156810856,
      "clip_ratio/low_mean": 0.0003589206861533967,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008713581937627168,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3995.0,
      "completions/mean_length": 589.177490234375,
      "completions/mean_terminated_length": 541.5735473632812,
      "completions/min_length": 112.0,
      "completions/min_terminated_length": 112.0,
      "epoch": 2.1866433362496354,
      "grad_norm": 0.13671875,
      "learning_rate": 1e-06,
      "loss": 0.0045,
      "num_tokens": 136085179.0,
      "reward": 0.4810267984867096,
      "reward_std": 0.2249252200126648,
      "rewards/verify_math_reward/mean": 0.4810267984867096,
      "rewards/verify_math_reward/std": 0.49991899728775024,
      "step": 234
    },
    {
      "clip_ratio/high_max": 0.001494391443884524,
      "clip_ratio/high_mean": 0.0004722534943084611,
      "clip_ratio/low_mean": 0.0003999232769729133,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008721767658244062,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4083.0,
      "completions/mean_length": 568.8326416015625,
      "completions/mean_terminated_length": 533.0439453125,
      "completions/min_length": 114.0,
      "completions/min_terminated_length": 114.0,
      "epoch": 2.195975503062117,
      "grad_norm": 0.1396484375,
      "learning_rate": 1e-06,
      "loss": -0.0012,
      "num_tokens": 136645285.0,
      "reward": 0.574776828289032,
      "reward_std": 0.22564129531383514,
      "rewards/verify_math_reward/mean": 0.5747767686843872,
      "rewards/verify_math_reward/std": 0.49465295672416687,
      "step": 235
    },
    {
      "clip_ratio/high_max": 0.001773574347680551,
      "clip_ratio/high_mean": 0.0005579647352078609,
      "clip_ratio/low_mean": 0.0003477517964256549,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009057165216290741,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2375.0,
      "completions/mean_length": 592.9721069335938,
      "completions/mean_terminated_length": 537.3684692382812,
      "completions/min_length": 83.0,
      "completions/min_terminated_length": 83.0,
      "epoch": 2.205307669874599,
      "grad_norm": 0.140625,
      "learning_rate": 1e-06,
      "loss": 0.0062,
      "num_tokens": 137208676.0,
      "reward": 0.5803571939468384,
      "reward_std": 0.22774562239646912,
      "rewards/verify_math_reward/mean": 0.5803571343421936,
      "rewards/verify_math_reward/std": 0.4937761425971985,
      "step": 236
    },
    {
      "clip_ratio/high_max": 0.0015008118916739477,
      "clip_ratio/high_mean": 0.0004158200515576027,
      "clip_ratio/low_mean": 0.0003367571597436836,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007525772120970942,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2849.0,
      "completions/mean_length": 594.4631958007812,
      "completions/mean_terminated_length": 550.9412841796875,
      "completions/min_length": 120.0,
      "completions/min_terminated_length": 120.0,
      "epoch": 2.214639836687081,
      "grad_norm": 0.1376953125,
      "learning_rate": 1e-06,
      "loss": 0.0075,
      "num_tokens": 137779667.0,
      "reward": 0.5178571939468384,
      "reward_std": 0.2336026430130005,
      "rewards/verify_math_reward/mean": 0.5178571343421936,
      "rewards/verify_math_reward/std": 0.4999600946903229,
      "step": 237
    },
    {
      "clip_ratio/high_max": 0.0015419518131238874,
      "clip_ratio/high_mean": 0.0004725132537259924,
      "clip_ratio/low_mean": 0.0002834307499597344,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007559440082332003,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3808.0,
      "completions/mean_length": 602.0223388671875,
      "completions/mean_terminated_length": 558.5943603515625,
      "completions/min_length": 105.0,
      "completions/min_terminated_length": 105.0,
      "epoch": 2.2239720034995627,
      "grad_norm": 0.115234375,
      "learning_rate": 1e-06,
      "loss": -0.0181,
      "num_tokens": 138355759.0,
      "reward": 0.5379464626312256,
      "reward_std": 0.19572414457798004,
      "rewards/verify_math_reward/mean": 0.5379464030265808,
      "rewards/verify_math_reward/std": 0.4988364577293396,
      "step": 238
    },
    {
      "clip_ratio/high_max": 0.0018368209566688165,
      "clip_ratio/high_mean": 0.0006160234323715486,
      "clip_ratio/low_mean": 0.0002864181915356312,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009024416303873295,
      "completions/clipped_ratio": 0.0200892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3801.0,
      "completions/mean_length": 600.765625,
      "completions/mean_terminated_length": 529.109375,
      "completions/min_length": 18.0,
      "completions/min_terminated_length": 18.0,
      "epoch": 2.2333041703120444,
      "grad_norm": 0.1376953125,
      "learning_rate": 1e-06,
      "loss": 0.0158,
      "num_tokens": 138911509.0,
      "reward": 0.5636160969734192,
      "reward_std": 0.22559921443462372,
      "rewards/verify_math_reward/mean": 0.5636160969734192,
      "rewards/verify_math_reward/std": 0.49621346592903137,
      "step": 239
    },
    {
      "clip_ratio/high_max": 0.0012299857717152918,
      "clip_ratio/high_mean": 0.00033535454531374853,
      "clip_ratio/low_mean": 0.00033555841582710855,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006709129556838889,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3514.0,
      "completions/mean_length": 633.8582763671875,
      "completions/mean_terminated_length": 554.8139038085938,
      "completions/min_length": 4.0,
      "completions/min_terminated_length": 4.0,
      "epoch": 2.242636337124526,
      "grad_norm": 0.11572265625,
      "learning_rate": 1e-06,
      "loss": 0.0057,
      "num_tokens": 139472670.0,
      "reward": 0.606026828289032,
      "reward_std": 0.19080783426761627,
      "rewards/verify_math_reward/mean": 0.6060267686843872,
      "rewards/verify_math_reward/std": 0.48890194296836853,
      "step": 240
    },
    {
      "clip_ratio/high_max": 0.0015930078670862713,
      "clip_ratio/high_mean": 0.0005159634893061593,
      "clip_ratio/low_mean": 0.00026747419946104856,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007834376783648622,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2474.0,
      "completions/mean_length": 593.732177734375,
      "completions/mean_terminated_length": 550.201171875,
      "completions/min_length": 107.0,
      "completions/min_terminated_length": 107.0,
      "epoch": 2.251968503937008,
      "grad_norm": 0.126953125,
      "learning_rate": 1e-06,
      "loss": 0.0022,
      "num_tokens": 140048078.0,
      "reward": 0.5401785969734192,
      "reward_std": 0.2112131267786026,
      "rewards/verify_math_reward/mean": 0.5401785969734192,
      "rewards/verify_math_reward/std": 0.49866142868995667,
      "step": 241
    },
    {
      "clip_ratio/high_max": 0.0018969822485814802,
      "clip_ratio/high_mean": 0.0005742547214140359,
      "clip_ratio/low_mean": 0.0002685978399767919,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008428525607087067,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2051.0,
      "completions/mean_length": 595.7935791015625,
      "completions/mean_terminated_length": 552.2881469726562,
      "completions/min_length": 47.0,
      "completions/min_terminated_length": 47.0,
      "epoch": 2.2613006707494896,
      "grad_norm": 0.14453125,
      "learning_rate": 1e-06,
      "loss": -0.0078,
      "num_tokens": 140616821.0,
      "reward": 0.5948660969734192,
      "reward_std": 0.23067595064640045,
      "rewards/verify_math_reward/mean": 0.5948660969734192,
      "rewards/verify_math_reward/std": 0.49119213223457336,
      "step": 242
    },
    {
      "clip_ratio/high_max": 0.0016339275844075019,
      "clip_ratio/high_mean": 0.0005235973521848791,
      "clip_ratio/low_mean": 0.0003519884385241312,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008755857907090103,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2441.0,
      "completions/mean_length": 586.5245971679688,
      "completions/mean_terminated_length": 526.7718505859375,
      "completions/min_length": 118.0,
      "completions/min_terminated_length": 118.0,
      "epoch": 2.2706328375619713,
      "grad_norm": 0.13671875,
      "learning_rate": 1e-06,
      "loss": -0.0091,
      "num_tokens": 141166563.0,
      "reward": 0.5189732313156128,
      "reward_std": 0.22890609502792358,
      "rewards/verify_math_reward/mean": 0.5189732313156128,
      "rewards/verify_math_reward/std": 0.49991893768310547,
      "step": 243
    },
    {
      "clip_ratio/high_max": 0.0019044924256377271,
      "clip_ratio/high_mean": 0.0005128569018779672,
      "clip_ratio/low_mean": 0.00028219371199611487,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007950506101224164,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3243.0,
      "completions/mean_length": 552.2421875,
      "completions/mean_terminated_length": 491.90582275390625,
      "completions/min_length": 124.0,
      "completions/min_terminated_length": 124.0,
      "epoch": 2.279965004374453,
      "grad_norm": 0.1416015625,
      "learning_rate": 1e-06,
      "loss": -0.0058,
      "num_tokens": 141676996.0,
      "reward": 0.6205357313156128,
      "reward_std": 0.1829923838376999,
      "rewards/verify_math_reward/mean": 0.6205357313156128,
      "rewards/verify_math_reward/std": 0.4855247139930725,
      "step": 244
    },
    {
      "clip_ratio/high_max": 0.001306582753386465,
      "clip_ratio/high_mean": 0.0003661511698282993,
      "clip_ratio/low_mean": 0.0003034111775832571,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006695623519590299,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4086.0,
      "completions/mean_length": 645.771240234375,
      "completions/mean_terminated_length": 566.9988403320312,
      "completions/min_length": 18.0,
      "completions/min_terminated_length": 18.0,
      "epoch": 2.289297171186935,
      "grad_norm": 0.11669921875,
      "learning_rate": 1e-06,
      "loss": -0.0224,
      "num_tokens": 142256943.0,
      "reward": 0.5613839626312256,
      "reward_std": 0.1900942623615265,
      "rewards/verify_math_reward/mean": 0.5613839030265808,
      "rewards/verify_math_reward/std": 0.496494859457016,
      "step": 245
    },
    {
      "clip_ratio/high_max": 0.0014559138498952962,
      "clip_ratio/high_mean": 0.0004845738667427213,
      "clip_ratio/low_mean": 0.0004119984905628371,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008965723764049471,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3074.0,
      "completions/mean_length": 608.5402221679688,
      "completions/mean_terminated_length": 573.1544189453125,
      "completions/min_length": 30.0,
      "completions/min_terminated_length": 30.0,
      "epoch": 2.298629337999417,
      "grad_norm": 0.1337890625,
      "learning_rate": 1e-06,
      "loss": 0.002,
      "num_tokens": 142858323.0,
      "reward": 0.4743303656578064,
      "reward_std": 0.2416456937789917,
      "rewards/verify_math_reward/mean": 0.4743303656578064,
      "rewards/verify_math_reward/std": 0.4996195435523987,
      "step": 246
    },
    {
      "clip_ratio/high_max": 0.0017494164385425393,
      "clip_ratio/high_mean": 0.0005041158133280987,
      "clip_ratio/low_mean": 0.0003492841861998386,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008534000066902081,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2547.0,
      "completions/mean_length": 605.7689819335938,
      "completions/mean_terminated_length": 558.3903198242188,
      "completions/min_length": 109.0,
      "completions/min_terminated_length": 109.0,
      "epoch": 2.3079615048118987,
      "grad_norm": 0.130859375,
      "learning_rate": 1e-06,
      "loss": 0.0111,
      "num_tokens": 143447628.0,
      "reward": 0.5055803656578064,
      "reward_std": 0.20888377726078033,
      "rewards/verify_math_reward/mean": 0.5055803656578064,
      "rewards/verify_math_reward/std": 0.5002480745315552,
      "step": 247
    },
    {
      "clip_ratio/high_max": 0.00194004879995191,
      "clip_ratio/high_mean": 0.0005720839756122587,
      "clip_ratio/low_mean": 0.00037973327562212944,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000951817240093078,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3652.0,
      "completions/mean_length": 591.685302734375,
      "completions/mean_terminated_length": 532.0204467773438,
      "completions/min_length": 20.0,
      "completions/min_terminated_length": 20.0,
      "epoch": 2.3172936716243804,
      "grad_norm": 0.1494140625,
      "learning_rate": 1e-06,
      "loss": 0.0042,
      "num_tokens": 144007338.0,
      "reward": 0.5412946939468384,
      "reward_std": 0.24750901758670807,
      "rewards/verify_math_reward/mean": 0.5412946343421936,
      "rewards/verify_math_reward/std": 0.49857014417648315,
      "step": 248
    },
    {
      "clip_ratio/high_max": 0.0018254909682582365,
      "clip_ratio/high_mean": 0.0005285317274683621,
      "clip_ratio/low_mean": 0.00025506711529033055,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007835988594706578,
      "completions/clipped_ratio": 0.0200892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3725.0,
      "completions/mean_length": 648.9799194335938,
      "completions/mean_terminated_length": 578.3120727539062,
      "completions/min_length": 14.0,
      "completions/min_terminated_length": 14.0,
      "epoch": 2.326625838436862,
      "grad_norm": 0.12890625,
      "learning_rate": 1e-06,
      "loss": -0.0083,
      "num_tokens": 144605752.0,
      "reward": 0.5401785969734192,
      "reward_std": 0.21958816051483154,
      "rewards/verify_math_reward/mean": 0.5401785969734192,
      "rewards/verify_math_reward/std": 0.49866142868995667,
      "step": 249
    },
    {
      "clip_ratio/high_max": 0.0017700926309771603,
      "clip_ratio/high_mean": 0.0006402511294254509,
      "clip_ratio/low_mean": 0.0003122798659660475,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009525309833406936,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3511.0,
      "completions/mean_length": 592.958740234375,
      "completions/mean_terminated_length": 537.3548583984375,
      "completions/min_length": 60.0,
      "completions/min_terminated_length": 60.0,
      "epoch": 2.335958005249344,
      "grad_norm": 0.1376953125,
      "learning_rate": 1e-06,
      "loss": -0.0033,
      "num_tokens": 145162763.0,
      "reward": 0.5993303656578064,
      "reward_std": 0.24664321541786194,
      "rewards/verify_math_reward/mean": 0.5993303656578064,
      "rewards/verify_math_reward/std": 0.49030786752700806,
      "step": 250
    },
    {
      "clip_ratio/high_max": 0.0014909809515302186,
      "clip_ratio/high_mean": 0.0004996945815491927,
      "clip_ratio/low_mean": 0.0003681553966998763,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008678499707457377,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2830.0,
      "completions/mean_length": 558.4620971679688,
      "completions/mean_terminated_length": 522.5681762695312,
      "completions/min_length": 4.0,
      "completions/min_terminated_length": 4.0,
      "epoch": 2.3452901720618256,
      "grad_norm": 0.14453125,
      "learning_rate": 1e-06,
      "loss": -0.0081,
      "num_tokens": 145718121.0,
      "reward": 0.520089328289032,
      "reward_std": 0.22703349590301514,
      "rewards/verify_math_reward/mean": 0.5200892686843872,
      "rewards/verify_math_reward/std": 0.4998753070831299,
      "step": 251
    },
    {
      "clip_ratio/high_max": 0.0018064291252812836,
      "clip_ratio/high_mean": 0.0005305278225478105,
      "clip_ratio/low_mean": 0.00040231651246358524,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009328443215963489,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2810.0,
      "completions/mean_length": 547.921875,
      "completions/mean_terminated_length": 511.9210510253906,
      "completions/min_length": 76.0,
      "completions/min_terminated_length": 76.0,
      "epoch": 2.3546223388743073,
      "grad_norm": 0.14453125,
      "learning_rate": 1e-06,
      "loss": 0.0159,
      "num_tokens": 146257523.0,
      "reward": 0.5368303656578064,
      "reward_std": 0.22950340807437897,
      "rewards/verify_math_reward/mean": 0.5368303656578064,
      "rewards/verify_math_reward/std": 0.49892017245292664,
      "step": 252
    },
    {
      "clip_ratio/high_max": 0.0017361845375489793,
      "clip_ratio/high_mean": 0.0005629460900991035,
      "clip_ratio/low_mean": 0.00030345223558470025,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008663983217047644,
      "completions/clipped_ratio": 0.0200892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3486.0,
      "completions/mean_length": 627.9699096679688,
      "completions/mean_terminated_length": 556.871337890625,
      "completions/min_length": 16.0,
      "completions/min_terminated_length": 16.0,
      "epoch": 2.363954505686789,
      "grad_norm": 0.1279296875,
      "learning_rate": 1e-06,
      "loss": 0.0082,
      "num_tokens": 146831592.0,
      "reward": 0.582589328289032,
      "reward_std": 0.20125621557235718,
      "rewards/verify_math_reward/mean": 0.5825892686843872,
      "rewards/verify_math_reward/std": 0.4934072494506836,
      "step": 253
    },
    {
      "clip_ratio/high_max": 0.0014099233130764333,
      "clip_ratio/high_mean": 0.000422317779793957,
      "clip_ratio/low_mean": 0.00035918456478611915,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007815023473085603,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3229.0,
      "completions/mean_length": 655.5703125,
      "completions/mean_terminated_length": 596.9932250976562,
      "completions/min_length": 168.0,
      "completions/min_terminated_length": 168.0,
      "epoch": 2.3732866724992707,
      "grad_norm": 0.11669921875,
      "learning_rate": 1e-06,
      "loss": -0.0006,
      "num_tokens": 147439999.0,
      "reward": 0.4799107313156128,
      "reward_std": 0.22184516489505768,
      "rewards/verify_math_reward/mean": 0.4799107015132904,
      "rewards/verify_math_reward/std": 0.4998753070831299,
      "step": 254
    },
    {
      "clip_ratio/high_max": 0.00166749171421543,
      "clip_ratio/high_mean": 0.0005249900411854469,
      "clip_ratio/low_mean": 0.000330110839399822,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008551008886570344,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2655.0,
      "completions/mean_length": 592.224365234375,
      "completions/mean_terminated_length": 532.5687255859375,
      "completions/min_length": 25.0,
      "completions/min_terminated_length": 25.0,
      "epoch": 2.382618839311753,
      "grad_norm": 0.1328125,
      "learning_rate": 1e-06,
      "loss": 0.0055,
      "num_tokens": 147994224.0,
      "reward": 0.6082589626312256,
      "reward_std": 0.20287340879440308,
      "rewards/verify_math_reward/mean": 0.6082589030265808,
      "rewards/verify_math_reward/std": 0.4884119927883148,
      "step": 255
    },
    {
      "clip_ratio/high_max": 0.0015114786774574895,
      "clip_ratio/high_mean": 0.0004187021561392612,
      "clip_ratio/low_mean": 0.0003389281773706898,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007576303396490403,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4004.0,
      "completions/mean_length": 636.8147583007812,
      "completions/mean_terminated_length": 593.8192138671875,
      "completions/min_length": 27.0,
      "completions/min_terminated_length": 27.0,
      "epoch": 2.3919510061242346,
      "grad_norm": 0.12060546875,
      "learning_rate": 1e-06,
      "loss": 0.0091,
      "num_tokens": 148613450.0,
      "reward": 0.520089328289032,
      "reward_std": 0.216995969414711,
      "rewards/verify_math_reward/mean": 0.5200892686843872,
      "rewards/verify_math_reward/std": 0.4998753070831299,
      "step": 256
    },
    {
      "clip_ratio/high_max": 0.0016861584958860476,
      "clip_ratio/high_mean": 0.000553906721279418,
      "clip_ratio/low_mean": 0.00030574112531667197,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008596478355684667,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3557.0,
      "completions/mean_length": 609.6261596679688,
      "completions/mean_terminated_length": 570.2765502929688,
      "completions/min_length": 89.0,
      "completions/min_terminated_length": 89.0,
      "epoch": 2.4012831729367163,
      "grad_norm": 0.140625,
      "learning_rate": 1e-06,
      "loss": 0.0059,
      "num_tokens": 149207227.0,
      "reward": 0.5580357313156128,
      "reward_std": 0.22966887056827545,
      "rewards/verify_math_reward/mean": 0.5580357313156128,
      "rewards/verify_math_reward/std": 0.49689778685569763,
      "step": 257
    },
    {
      "clip_ratio/high_max": 0.0018101280911650974,
      "clip_ratio/high_mean": 0.000577292491470871,
      "clip_ratio/low_mean": 0.0003695304706070601,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009468229518461158,
      "completions/clipped_ratio": 0.006696428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2347.0,
      "completions/mean_length": 548.0592041015625,
      "completions/mean_terminated_length": 524.1404418945312,
      "completions/min_length": 69.0,
      "completions/min_terminated_length": 69.0,
      "epoch": 2.410615339749198,
      "grad_norm": 0.1494140625,
      "learning_rate": 1e-06,
      "loss": 0.0051,
      "num_tokens": 149758616.0,
      "reward": 0.5814732313156128,
      "reward_std": 0.25389915704727173,
      "rewards/verify_math_reward/mean": 0.5814732313156128,
      "rewards/verify_math_reward/std": 0.4935929775238037,
      "step": 258
    },
    {
      "clip_ratio/high_max": 0.00144162098695233,
      "clip_ratio/high_mean": 0.00045632900082637207,
      "clip_ratio/low_mean": 0.0002676564937473813,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007239855031002662,
      "completions/clipped_ratio": 0.0078125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4021.0,
      "completions/mean_length": 563.9475708007812,
      "completions/mean_terminated_length": 536.1361083984375,
      "completions/min_length": 157.0,
      "completions/min_terminated_length": 157.0,
      "epoch": 2.41994750656168,
      "grad_norm": 0.130859375,
      "learning_rate": 1e-06,
      "loss": 0.0117,
      "num_tokens": 150326777.0,
      "reward": 0.5959821939468384,
      "reward_std": 0.19208095967769623,
      "rewards/verify_math_reward/mean": 0.5959821343421936,
      "rewards/verify_math_reward/std": 0.490975022315979,
      "step": 259
    },
    {
      "clip_ratio/high_max": 0.001589305682500708,
      "clip_ratio/high_mean": 0.0004874247865700454,
      "clip_ratio/low_mean": 0.0003013444198813886,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007887692086114839,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2764.0,
      "completions/mean_length": 617.9017944335938,
      "completions/mean_terminated_length": 554.6636352539062,
      "completions/min_length": 112.0,
      "completions/min_terminated_length": 112.0,
      "epoch": 2.4292796733741615,
      "grad_norm": 0.1318359375,
      "learning_rate": 1e-06,
      "loss": -0.0193,
      "num_tokens": 150906513.0,
      "reward": 0.4988839626312256,
      "reward_std": 0.22274348139762878,
      "rewards/verify_math_reward/mean": 0.4988839328289032,
      "rewards/verify_math_reward/std": 0.5002779960632324,
      "step": 260
    },
    {
      "clip_ratio/high_max": 0.0017504588249721564,
      "clip_ratio/high_mean": 0.0004910838752039126,
      "clip_ratio/low_mean": 0.0002576393908384489,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007487232687708456,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4012.0,
      "completions/mean_length": 596.943115234375,
      "completions/mean_terminated_length": 557.4503784179688,
      "completions/min_length": 94.0,
      "completions/min_terminated_length": 94.0,
      "epoch": 2.4386118401866432,
      "grad_norm": 0.1318359375,
      "learning_rate": 1e-06,
      "loss": 0.0071,
      "num_tokens": 151495694.0,
      "reward": 0.5558035969734192,
      "reward_std": 0.20238415896892548,
      "rewards/verify_math_reward/mean": 0.5558035969734192,
      "rewards/verify_math_reward/std": 0.49715372920036316,
      "step": 261
    },
    {
      "clip_ratio/high_max": 0.0016746958863222972,
      "clip_ratio/high_mean": 0.0004377821796879289,
      "clip_ratio/low_mean": 0.00027589590604293335,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007136780805012677,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3955.0,
      "completions/mean_length": 592.3359375,
      "completions/mean_terminated_length": 548.78759765625,
      "completions/min_length": 3.0,
      "completions/min_terminated_length": 3.0,
      "epoch": 2.447944006999125,
      "grad_norm": 0.12255859375,
      "learning_rate": 1e-06,
      "loss": -0.0009,
      "num_tokens": 152070211.0,
      "reward": 0.520089328289032,
      "reward_std": 0.20230887830257416,
      "rewards/verify_math_reward/mean": 0.5200892686843872,
      "rewards/verify_math_reward/std": 0.4998753070831299,
      "step": 262
    },
    {
      "clip_ratio/high_max": 0.001553309908558731,
      "clip_ratio/high_mean": 0.0005235942248873471,
      "clip_ratio/low_mean": 0.0003422477983576755,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000865842013809015,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3619.0,
      "completions/mean_length": 582.9564819335938,
      "completions/mean_terminated_length": 531.2355346679688,
      "completions/min_length": 78.0,
      "completions/min_terminated_length": 78.0,
      "epoch": 2.457276173811607,
      "grad_norm": 0.140625,
      "learning_rate": 1e-06,
      "loss": 0.0148,
      "num_tokens": 152624100.0,
      "reward": 0.5736607313156128,
      "reward_std": 0.20415011048316956,
      "rewards/verify_math_reward/mean": 0.5736607313156128,
      "rewards/verify_math_reward/std": 0.4948205351829529,
      "step": 263
    },
    {
      "clip_ratio/high_max": 0.0015362589047072106,
      "clip_ratio/high_mean": 0.0004781124704322792,
      "clip_ratio/low_mean": 0.0003200235181566313,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007981359863151738,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3408.0,
      "completions/mean_length": 593.8839721679688,
      "completions/mean_terminated_length": 546.3439331054688,
      "completions/min_length": 115.0,
      "completions/min_terminated_length": 115.0,
      "epoch": 2.466608340624089,
      "grad_norm": 0.142578125,
      "learning_rate": 1e-06,
      "loss": -0.0007,
      "num_tokens": 153201300.0,
      "reward": 0.5379464626312256,
      "reward_std": 0.2363481968641281,
      "rewards/verify_math_reward/mean": 0.5379464030265808,
      "rewards/verify_math_reward/std": 0.4988364577293396,
      "step": 264
    },
    {
      "clip_ratio/high_max": 0.0016864011940924684,
      "clip_ratio/high_mean": 0.0005238697140157456,
      "clip_ratio/low_mean": 0.00035080927580111165,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008746790044824593,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4062.0,
      "completions/mean_length": 624.6004638671875,
      "completions/mean_terminated_length": 565.4960327148438,
      "completions/min_length": 24.0,
      "completions/min_terminated_length": 24.0,
      "epoch": 2.4759405074365706,
      "grad_norm": 0.130859375,
      "learning_rate": 1e-06,
      "loss": 0.0091,
      "num_tokens": 153788246.0,
      "reward": 0.546875,
      "reward_std": 0.20685499906539917,
      "rewards/verify_math_reward/mean": 0.546875,
      "rewards/verify_math_reward/std": 0.4980759024620056,
      "step": 265
    },
    {
      "clip_ratio/high_max": 0.0018087032476614695,
      "clip_ratio/high_mean": 0.0006073099975765217,
      "clip_ratio/low_mean": 0.00028049693025877787,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008878069238562603,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2868.0,
      "completions/mean_length": 583.0424194335938,
      "completions/mean_terminated_length": 539.3785400390625,
      "completions/min_length": 4.0,
      "completions/min_terminated_length": 4.0,
      "epoch": 2.4852726742490523,
      "grad_norm": 0.1376953125,
      "learning_rate": 1e-06,
      "loss": 0.0251,
      "num_tokens": 154348940.0,
      "reward": 0.6116071939468384,
      "reward_std": 0.23770253360271454,
      "rewards/verify_math_reward/mean": 0.6116071343421936,
      "rewards/verify_math_reward/std": 0.48765692114830017,
      "step": 266
    },
    {
      "clip_ratio/high_max": 0.0017296092755714199,
      "clip_ratio/high_mean": 0.0006053657261873013,
      "clip_ratio/low_mean": 0.0003528901040681376,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009582558423062437,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3858.0,
      "completions/mean_length": 619.489990234375,
      "completions/mean_terminated_length": 568.306884765625,
      "completions/min_length": 4.0,
      "completions/min_terminated_length": 4.0,
      "epoch": 2.494604841061534,
      "grad_norm": 0.134765625,
      "learning_rate": 1e-06,
      "loss": -0.0018,
      "num_tokens": 154936963.0,
      "reward": 0.5424107313156128,
      "reward_std": 0.2424006164073944,
      "rewards/verify_math_reward/mean": 0.5424107313156128,
      "rewards/verify_math_reward/std": 0.4984763562679291,
      "step": 267
    },
    {
      "clip_ratio/high_max": 0.0015104042849998223,
      "clip_ratio/high_mean": 0.0004024213969842094,
      "clip_ratio/low_mean": 0.00026178848742119953,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006642098906013416,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4018.0,
      "completions/mean_length": 668.2701416015625,
      "completions/mean_terminated_length": 601.9772338867188,
      "completions/min_length": 135.0,
      "completions/min_terminated_length": 135.0,
      "epoch": 2.5039370078740157,
      "grad_norm": 0.12255859375,
      "learning_rate": 1e-06,
      "loss": -0.0003,
      "num_tokens": 155565077.0,
      "reward": 0.5145089626312256,
      "reward_std": 0.19558146595954895,
      "rewards/verify_math_reward/mean": 0.5145089030265808,
      "rewards/verify_math_reward/std": 0.5000685453414917,
      "step": 268
    },
    {
      "clip_ratio/high_max": 0.0015643926399206975,
      "clip_ratio/high_mean": 0.0005585485578194493,
      "clip_ratio/low_mean": 0.0002530500935336022,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000811598647487699,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3530.0,
      "completions/mean_length": 594.646240234375,
      "completions/mean_terminated_length": 551.1265869140625,
      "completions/min_length": 77.0,
      "completions/min_terminated_length": 77.0,
      "epoch": 2.5132691746864975,
      "grad_norm": 0.1435546875,
      "learning_rate": 1e-06,
      "loss": 0.0055,
      "num_tokens": 156137664.0,
      "reward": 0.5881696939468384,
      "reward_std": 0.22582674026489258,
      "rewards/verify_math_reward/mean": 0.5881696343421936,
      "rewards/verify_math_reward/std": 0.4924395978450775,
      "step": 269
    },
    {
      "clip_ratio/high_max": 0.0015179792881099274,
      "clip_ratio/high_mean": 0.0004894791999277004,
      "clip_ratio/low_mean": 0.0003458185989302365,
      "clip_ratio/low_min": 1.2322555448918138e-05,
      "clip_ratio/region_mean": 0.0008352977865797584,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3682.0,
      "completions/mean_length": 581.9085083007812,
      "completions/mean_terminated_length": 546.2525024414062,
      "completions/min_length": 76.0,
      "completions/min_terminated_length": 76.0,
      "epoch": 2.522601341498979,
      "grad_norm": 0.1455078125,
      "learning_rate": 1e-06,
      "loss": -0.0098,
      "num_tokens": 156715966.0,
      "reward": 0.5491071939468384,
      "reward_std": 0.22838753461837769,
      "rewards/verify_math_reward/mean": 0.5491071343421936,
      "rewards/verify_math_reward/std": 0.49786055088043213,
      "step": 270
    },
    {
      "clip_ratio/high_max": 0.001546673691336764,
      "clip_ratio/high_mean": 0.0004874001408552431,
      "clip_ratio/low_mean": 0.0003322237616885104,
      "clip_ratio/low_min": 9.648039849707857e-06,
      "clip_ratio/region_mean": 0.0008196238959499169,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3253.0,
      "completions/mean_length": 640.7533569335938,
      "completions/mean_terminated_length": 605.6944580078125,
      "completions/min_length": 102.0,
      "completions/min_terminated_length": 102.0,
      "epoch": 2.531933508311461,
      "grad_norm": 0.12158203125,
      "learning_rate": 1e-06,
      "loss": -0.0059,
      "num_tokens": 157364217.0,
      "reward": 0.4966517984867096,
      "reward_std": 0.21767067909240723,
      "rewards/verify_math_reward/mean": 0.4966517984867096,
      "rewards/verify_math_reward/std": 0.5002680420875549,
      "step": 271
    },
    {
      "clip_ratio/high_max": 0.0017599499005882535,
      "clip_ratio/high_mean": 0.0005300088896547095,
      "clip_ratio/low_mean": 0.00037613722565765784,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009061461050805519,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3020.0,
      "completions/mean_length": 600.2355346679688,
      "completions/mean_terminated_length": 548.7689819335938,
      "completions/min_length": 136.0,
      "completions/min_terminated_length": 136.0,
      "epoch": 2.5412656751239426,
      "grad_norm": 0.134765625,
      "learning_rate": 1e-06,
      "loss": -0.0015,
      "num_tokens": 157943868.0,
      "reward": 0.5580357313156128,
      "reward_std": 0.2154243439435959,
      "rewards/verify_math_reward/mean": 0.5580357313156128,
      "rewards/verify_math_reward/std": 0.49689778685569763,
      "step": 272
    },
    {
      "clip_ratio/high_max": 0.0017446381225454388,
      "clip_ratio/high_mean": 0.0005404869284575398,
      "clip_ratio/low_mean": 0.00038403678422582743,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009245237024515518,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2131.0,
      "completions/mean_length": 592.7109375,
      "completions/mean_terminated_length": 557.16455078125,
      "completions/min_length": 4.0,
      "completions/min_terminated_length": 4.0,
      "epoch": 2.5505978419364244,
      "grad_norm": 0.1337890625,
      "learning_rate": 1e-06,
      "loss": 0.0117,
      "num_tokens": 158520209.0,
      "reward": 0.5502232313156128,
      "reward_std": 0.25667428970336914,
      "rewards/verify_math_reward/mean": 0.5502232313156128,
      "rewards/verify_math_reward/std": 0.49774909019470215,
      "step": 273
    },
    {
      "clip_ratio/high_max": 0.0017314323304162826,
      "clip_ratio/high_mean": 0.000502947056247649,
      "clip_ratio/low_mean": 0.0003295998160410818,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008325468934344826,
      "completions/clipped_ratio": 0.0323660714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3852.0,
      "completions/mean_length": 747.3739013671875,
      "completions/mean_terminated_length": 635.3667602539062,
      "completions/min_length": 96.0,
      "completions/min_terminated_length": 96.0,
      "epoch": 2.5599300087489065,
      "grad_norm": 0.125,
      "learning_rate": 1e-06,
      "loss": -0.0062,
      "num_tokens": 159155880.0,
      "reward": 0.4910714626312256,
      "reward_std": 0.22075042128562927,
      "rewards/verify_math_reward/mean": 0.4910714328289032,
      "rewards/verify_math_reward/std": 0.5001994967460632,
      "step": 274
    },
    {
      "clip_ratio/high_max": 0.0017812741334637394,
      "clip_ratio/high_mean": 0.0005633969503833214,
      "clip_ratio/low_mean": 0.0003263460660036799,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008897430261640693,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2630.0,
      "completions/mean_length": 623.4408569335938,
      "completions/mean_terminated_length": 572.3159790039062,
      "completions/min_length": 113.0,
      "completions/min_terminated_length": 113.0,
      "epoch": 2.5692621755613883,
      "grad_norm": 0.1376953125,
      "learning_rate": 1e-06,
      "loss": 0.0221,
      "num_tokens": 159748315.0,
      "reward": 0.527901828289032,
      "reward_std": 0.24987341463565826,
      "rewards/verify_math_reward/mean": 0.5279017686843872,
      "rewards/verify_math_reward/std": 0.49949970841407776,
      "step": 275
    },
    {
      "clip_ratio/high_max": 0.0015368411059171194,
      "clip_ratio/high_mean": 0.0005303337151190135,
      "clip_ratio/low_mean": 0.00035316439902999264,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008834981335894554,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2727.0,
      "completions/mean_length": 619.3694458007812,
      "completions/mean_terminated_length": 564.184814453125,
      "completions/min_length": 153.0,
      "completions/min_terminated_length": 153.0,
      "epoch": 2.57859434237387,
      "grad_norm": 0.1435546875,
      "learning_rate": 1e-06,
      "loss": 0.0162,
      "num_tokens": 160343198.0,
      "reward": 0.5792410969734192,
      "reward_std": 0.23514537513256073,
      "rewards/verify_math_reward/mean": 0.5792410969734192,
      "rewards/verify_math_reward/std": 0.49395665526390076,
      "step": 276
    },
    {
      "clip_ratio/high_max": 0.0014041623089724453,
      "clip_ratio/high_mean": 0.0003957954627367144,
      "clip_ratio/low_mean": 0.00030164567294832523,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006974411371629685,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2043.0,
      "completions/mean_length": 586.8873291015625,
      "completions/mean_terminated_length": 531.1870727539062,
      "completions/min_length": 29.0,
      "completions/min_terminated_length": 29.0,
      "epoch": 2.5879265091863517,
      "grad_norm": 0.126953125,
      "learning_rate": 1e-06,
      "loss": -0.0043,
      "num_tokens": 160898641.0,
      "reward": 0.5546875,
      "reward_std": 0.19204705953598022,
      "rewards/verify_math_reward/mean": 0.5546875,
      "rewards/verify_math_reward/std": 0.4972778558731079,
      "step": 277
    },
    {
      "clip_ratio/high_max": 0.0016594494354649214,
      "clip_ratio/high_mean": 0.0005165247798686323,
      "clip_ratio/low_mean": 0.0003543018701748224,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008708266668691067,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3373.0,
      "completions/mean_length": 611.4765625,
      "completions/mean_terminated_length": 580.08447265625,
      "completions/min_length": 7.0,
      "completions/min_terminated_length": 7.0,
      "epoch": 2.5972586759988334,
      "grad_norm": 0.1279296875,
      "learning_rate": 1e-06,
      "loss": -0.0113,
      "num_tokens": 161506348.0,
      "reward": 0.4910714626312256,
      "reward_std": 0.23747360706329346,
      "rewards/verify_math_reward/mean": 0.4910714328289032,
      "rewards/verify_math_reward/std": 0.5001994967460632,
      "step": 278
    },
    {
      "clip_ratio/high_max": 0.0018392898800811963,
      "clip_ratio/high_mean": 0.0004801133568435034,
      "clip_ratio/low_mean": 0.00031793066432328487,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007980440245773934,
      "completions/clipped_ratio": 0.0200892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3392.0,
      "completions/mean_length": 605.0592041015625,
      "completions/mean_terminated_length": 533.4909057617188,
      "completions/min_length": 11.0,
      "completions/min_terminated_length": 11.0,
      "epoch": 2.606590842811315,
      "grad_norm": 0.1396484375,
      "learning_rate": 1e-06,
      "loss": -0.0002,
      "num_tokens": 162061777.0,
      "reward": 0.5892857313156128,
      "reward_std": 0.21335633099079132,
      "rewards/verify_math_reward/mean": 0.5892857313156128,
      "rewards/verify_math_reward/std": 0.49223825335502625,
      "step": 279
    },
    {
      "clip_ratio/high_max": 0.00174964958387136,
      "clip_ratio/high_mean": 0.00047927262903613155,
      "clip_ratio/low_mean": 0.00037942975109217514,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000858702378536691,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3995.0,
      "completions/mean_length": 587.880615234375,
      "completions/mean_terminated_length": 548.2855834960938,
      "completions/min_length": 5.0,
      "completions/min_terminated_length": 5.0,
      "epoch": 2.615923009623797,
      "grad_norm": 0.125,
      "learning_rate": 1e-06,
      "loss": -0.0013,
      "num_tokens": 162631942.0,
      "reward": 0.515625,
      "reward_std": 0.2096005529165268,
      "rewards/verify_math_reward/mean": 0.515625,
      "rewards/verify_math_reward/std": 0.5000349283218384,
      "step": 280
    },
    {
      "clip_ratio/high_max": 0.001631348744922434,
      "clip_ratio/high_mean": 0.0004491139058018234,
      "clip_ratio/low_mean": 0.0002772787868252635,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007263926991072367,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3838.0,
      "completions/mean_length": 571.825927734375,
      "completions/mean_terminated_length": 515.8866577148438,
      "completions/min_length": 5.0,
      "completions/min_terminated_length": 5.0,
      "epoch": 2.625255176436279,
      "grad_norm": 0.12890625,
      "learning_rate": 1e-06,
      "loss": 0.0104,
      "num_tokens": 163167874.0,
      "reward": 0.6238839626312256,
      "reward_std": 0.21635474264621735,
      "rewards/verify_math_reward/mean": 0.6238839030265808,
      "rewards/verify_math_reward/std": 0.4846802353858948,
      "step": 281
    },
    {
      "clip_ratio/high_max": 0.0016738859203542233,
      "clip_ratio/high_mean": 0.0005313563602840077,
      "clip_ratio/low_mean": 0.00029245962582535867,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008238159780376009,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2468.0,
      "completions/mean_length": 576.1864013671875,
      "completions/mean_terminated_length": 532.4373168945312,
      "completions/min_length": 3.0,
      "completions/min_terminated_length": 3.0,
      "epoch": 2.6345873432487608,
      "grad_norm": 0.130859375,
      "learning_rate": 1e-06,
      "loss": -0.0056,
      "num_tokens": 163723745.0,
      "reward": 0.613839328289032,
      "reward_std": 0.21819807589054108,
      "rewards/verify_math_reward/mean": 0.6138392686843872,
      "rewards/verify_math_reward/std": 0.48714008927345276,
      "step": 282
    },
    {
      "clip_ratio/high_max": 0.0019118020027235616,
      "clip_ratio/high_mean": 0.0005473132416682347,
      "clip_ratio/low_mean": 0.0004529294806161488,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010002427243307466,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3604.0,
      "completions/mean_length": 578.46875,
      "completions/mean_terminated_length": 526.6817626953125,
      "completions/min_length": 77.0,
      "completions/min_terminated_length": 77.0,
      "epoch": 2.6439195100612425,
      "grad_norm": 0.1513671875,
      "learning_rate": 1e-06,
      "loss": 0.0039,
      "num_tokens": 164269317.0,
      "reward": 0.5725446939468384,
      "reward_std": 0.2469463050365448,
      "rewards/verify_math_reward/mean": 0.5725446343421936,
      "rewards/verify_math_reward/std": 0.49498558044433594,
      "step": 283
    },
    {
      "clip_ratio/high_max": 0.0017921685775945662,
      "clip_ratio/high_mean": 0.0006251538120523037,
      "clip_ratio/low_mean": 0.0003910708369403437,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001016224638078711,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3993.0,
      "completions/mean_length": 572.0592041015625,
      "completions/mean_terminated_length": 520.1777954101562,
      "completions/min_length": 146.0,
      "completions/min_terminated_length": 146.0,
      "epoch": 2.653251676873724,
      "grad_norm": 0.146484375,
      "learning_rate": 1e-06,
      "loss": 0.0088,
      "num_tokens": 164823890.0,
      "reward": 0.5970982313156128,
      "reward_std": 0.2508198022842407,
      "rewards/verify_math_reward/mean": 0.5970982313156128,
      "rewards/verify_math_reward/std": 0.4907552897930145,
      "step": 284
    },
    {
      "clip_ratio/high_max": 0.0013512569712474942,
      "clip_ratio/high_mean": 0.00037087427836013376,
      "clip_ratio/low_mean": 0.00030325797808927746,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006741322567904717,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2597.0,
      "completions/mean_length": 607.693115234375,
      "completions/mean_terminated_length": 560.3405151367188,
      "completions/min_length": 75.0,
      "completions/min_terminated_length": 75.0,
      "epoch": 2.662583843686206,
      "grad_norm": 0.1220703125,
      "learning_rate": 1e-06,
      "loss": 0.0007,
      "num_tokens": 165408799.0,
      "reward": 0.5446428656578064,
      "reward_std": 0.1853991150856018,
      "rewards/verify_math_reward/mean": 0.5446428656578064,
      "rewards/verify_math_reward/std": 0.4982811510562897,
      "step": 285
    },
    {
      "clip_ratio/high_max": 0.0012537253724076436,
      "clip_ratio/high_mean": 0.0003354836278504081,
      "clip_ratio/low_mean": 0.00029683948127967597,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006323231064015999,
      "completions/clipped_ratio": 0.0234375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3307.0,
      "completions/mean_length": 684.5391235351562,
      "completions/mean_terminated_length": 602.6640014648438,
      "completions/min_length": 94.0,
      "completions/min_terminated_length": 94.0,
      "epoch": 2.6719160104986877,
      "grad_norm": 0.11962890625,
      "learning_rate": 1e-06,
      "loss": -0.0023,
      "num_tokens": 166025090.0,
      "reward": 0.4285714626312256,
      "reward_std": 0.2080891877412796,
      "rewards/verify_math_reward/mean": 0.4285714328289032,
      "rewards/verify_math_reward/std": 0.49514806270599365,
      "step": 286
    },
    {
      "clip_ratio/high_max": 0.0015210921355901519,
      "clip_ratio/high_mean": 0.0004264088308900682,
      "clip_ratio/low_mean": 0.0003292609922027623,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007556698219559621,
      "completions/clipped_ratio": 0.0200892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3677.0,
      "completions/mean_length": 643.9553833007812,
      "completions/mean_terminated_length": 573.1845092773438,
      "completions/min_length": 89.0,
      "completions/min_terminated_length": 89.0,
      "epoch": 2.6812481773111694,
      "grad_norm": 0.1357421875,
      "learning_rate": 1e-06,
      "loss": 0.0093,
      "num_tokens": 166622962.0,
      "reward": 0.4988839626312256,
      "reward_std": 0.23044700920581818,
      "rewards/verify_math_reward/mean": 0.4988839328289032,
      "rewards/verify_math_reward/std": 0.5002779960632324,
      "step": 287
    },
    {
      "clip_ratio/high_max": 0.0014823366091150092,
      "clip_ratio/high_mean": 0.000417918061657474,
      "clip_ratio/low_mean": 0.0003382189736385044,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007561370305211312,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4003.0,
      "completions/mean_length": 620.40625,
      "completions/mean_terminated_length": 581.1783447265625,
      "completions/min_length": 107.0,
      "completions/min_terminated_length": 107.0,
      "epoch": 2.690580344123651,
      "grad_norm": 0.1240234375,
      "learning_rate": 1e-06,
      "loss": 0.0053,
      "num_tokens": 167222734.0,
      "reward": 0.5379464626312256,
      "reward_std": 0.20869651436805725,
      "rewards/verify_math_reward/mean": 0.5379464030265808,
      "rewards/verify_math_reward/std": 0.4988364577293396,
      "step": 288
    },
    {
      "clip_ratio/high_max": 0.0015053520455694525,
      "clip_ratio/high_mean": 0.00043842528748427867,
      "clip_ratio/low_mean": 0.0003198034838760577,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007582287707919022,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2497.0,
      "completions/mean_length": 627.0245971679688,
      "completions/mean_terminated_length": 567.96142578125,
      "completions/min_length": 100.0,
      "completions/min_terminated_length": 100.0,
      "epoch": 2.699912510936133,
      "grad_norm": 0.1396484375,
      "learning_rate": 1e-06,
      "loss": 0.0093,
      "num_tokens": 167807396.0,
      "reward": 0.5022321939468384,
      "reward_std": 0.22823528945446014,
      "rewards/verify_math_reward/mean": 0.5022321343421936,
      "rewards/verify_math_reward/std": 0.5002743005752563,
      "step": 289
    },
    {
      "clip_ratio/high_max": 0.0016437880767625757,
      "clip_ratio/high_mean": 0.0005367208000279788,
      "clip_ratio/low_mean": 0.0003510263488806231,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008877471500454703,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2576.0,
      "completions/mean_length": 576.654052734375,
      "completions/mean_terminated_length": 532.9107666015625,
      "completions/min_length": 97.0,
      "completions/min_terminated_length": 97.0,
      "epoch": 2.7092446777486145,
      "grad_norm": 0.1298828125,
      "learning_rate": 1e-06,
      "loss": -0.0043,
      "num_tokens": 168366470.0,
      "reward": 0.5837053656578064,
      "reward_std": 0.23623982071876526,
      "rewards/verify_math_reward/mean": 0.5837053656578064,
      "rewards/verify_math_reward/std": 0.49321895837783813,
      "step": 290
    },
    {
      "clip_ratio/high_max": 0.0017529187853142503,
      "clip_ratio/high_mean": 0.00048686784452911525,
      "clip_ratio/low_mean": 0.00035784269800842594,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008447105346931494,
      "completions/clipped_ratio": 0.0078125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2241.0,
      "completions/mean_length": 580.7421875,
      "completions/mean_terminated_length": 553.06298828125,
      "completions/min_length": 140.0,
      "completions/min_terminated_length": 140.0,
      "epoch": 2.7185768445610963,
      "grad_norm": 0.123046875,
      "learning_rate": 1e-06,
      "loss": -0.0033,
      "num_tokens": 168948439.0,
      "reward": 0.559151828289032,
      "reward_std": 0.20779791474342346,
      "rewards/verify_math_reward/mean": 0.5591517686843872,
      "rewards/verify_math_reward/std": 0.496766060590744,
      "step": 291
    },
    {
      "clip_ratio/high_max": 0.0014450591979766614,
      "clip_ratio/high_mean": 0.00043080291516162106,
      "clip_ratio/low_mean": 0.0003529568850808573,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007837598013793468,
      "completions/clipped_ratio": 0.0234375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3306.0,
      "completions/mean_length": 639.0078125,
      "completions/mean_terminated_length": 556.0399780273438,
      "completions/min_length": 111.0,
      "completions/min_terminated_length": 111.0,
      "epoch": 2.7279090113735784,
      "grad_norm": 0.1416015625,
      "learning_rate": 1e-06,
      "loss": -0.0169,
      "num_tokens": 169537342.0,
      "reward": 0.4888392984867096,
      "reward_std": 0.23206281661987305,
      "rewards/verify_math_reward/mean": 0.4888392984867096,
      "rewards/verify_math_reward/std": 0.5001546144485474,
      "step": 292
    },
    {
      "clip_ratio/high_max": 0.0015294742570404196,
      "clip_ratio/high_mean": 0.0005287959324959957,
      "clip_ratio/low_mean": 0.000296823709390992,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008256196379079483,
      "completions/clipped_ratio": 0.0279017857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3879.0,
      "completions/mean_length": 701.5647583007812,
      "completions/mean_terminated_length": 604.1354370117188,
      "completions/min_length": 140.0,
      "completions/min_terminated_length": 140.0,
      "epoch": 2.73724117818606,
      "grad_norm": 0.1279296875,
      "learning_rate": 1e-06,
      "loss": 0.0048,
      "num_tokens": 170154504.0,
      "reward": 0.5055803656578064,
      "reward_std": 0.2500934600830078,
      "rewards/verify_math_reward/mean": 0.5055803656578064,
      "rewards/verify_math_reward/std": 0.5002480745315552,
      "step": 293
    },
    {
      "clip_ratio/high_max": 0.0019843373520416208,
      "clip_ratio/high_mean": 0.0005925050747919158,
      "clip_ratio/low_mean": 0.00025544197194449225,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008479470420752477,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4007.0,
      "completions/mean_length": 610.2779541015625,
      "completions/mean_terminated_length": 550.9296264648438,
      "completions/min_length": 24.0,
      "completions/min_terminated_length": 24.0,
      "epoch": 2.746573344998542,
      "grad_norm": 0.1328125,
      "learning_rate": 1e-06,
      "loss": 0.0038,
      "num_tokens": 170740705.0,
      "reward": 0.5066964626312256,
      "reward_std": 0.22233231365680695,
      "rewards/verify_math_reward/mean": 0.5066964030265808,
      "rewards/verify_math_reward/std": 0.5002344250679016,
      "step": 294
    },
    {
      "clip_ratio/high_max": 0.0014366603463713545,
      "clip_ratio/high_mean": 0.0004662901551455434,
      "clip_ratio/low_mean": 0.00031543318300464307,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000781723333602713,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3181.0,
      "completions/mean_length": 591.6484375,
      "completions/mean_terminated_length": 548.091552734375,
      "completions/min_length": 55.0,
      "completions/min_terminated_length": 55.0,
      "epoch": 2.7559055118110236,
      "grad_norm": 0.12353515625,
      "learning_rate": 1e-06,
      "loss": 0.0062,
      "num_tokens": 171311686.0,
      "reward": 0.5892857313156128,
      "reward_std": 0.22834154963493347,
      "rewards/verify_math_reward/mean": 0.5892857313156128,
      "rewards/verify_math_reward/std": 0.49223825335502625,
      "step": 295
    },
    {
      "clip_ratio/high_max": 0.0017532859492348507,
      "clip_ratio/high_mean": 0.0005667617406288628,
      "clip_ratio/low_mean": 0.0003789789525399101,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009457406877118046,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3122.0,
      "completions/mean_length": 601.7902221679688,
      "completions/mean_terminated_length": 562.3521728515625,
      "completions/min_length": 24.0,
      "completions/min_terminated_length": 24.0,
      "epoch": 2.7652376786235053,
      "grad_norm": 0.1416015625,
      "learning_rate": 1e-06,
      "loss": 0.0161,
      "num_tokens": 171898594.0,
      "reward": 0.5424107313156128,
      "reward_std": 0.25690504908561707,
      "rewards/verify_math_reward/mean": 0.5424107313156128,
      "rewards/verify_math_reward/std": 0.4984763562679291,
      "step": 296
    },
    {
      "clip_ratio/high_max": 0.0013215181261330144,
      "clip_ratio/high_mean": 0.00039051591647876194,
      "clip_ratio/low_mean": 0.00028563219234456483,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006761481117791845,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3446.0,
      "completions/mean_length": 607.9944458007812,
      "completions/mean_terminated_length": 560.6459350585938,
      "completions/min_length": 125.0,
      "completions/min_terminated_length": 125.0,
      "epoch": 2.774569845435987,
      "grad_norm": 0.1328125,
      "learning_rate": 1e-06,
      "loss": 0.009,
      "num_tokens": 172482493.0,
      "reward": 0.494419664144516,
      "reward_std": 0.2008771300315857,
      "rewards/verify_math_reward/mean": 0.4944196343421936,
      "rewards/verify_math_reward/std": 0.5002480745315552,
      "step": 297
    },
    {
      "clip_ratio/high_max": 0.0014070765564611065,
      "clip_ratio/high_mean": 0.0004336246713592118,
      "clip_ratio/low_mean": 0.00030252068540903565,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007361453563135001,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2354.0,
      "completions/mean_length": 580.5770263671875,
      "completions/mean_terminated_length": 540.8995361328125,
      "completions/min_length": 65.0,
      "completions/min_terminated_length": 65.0,
      "epoch": 2.783902012248469,
      "grad_norm": 0.11572265625,
      "learning_rate": 1e-06,
      "loss": -0.0086,
      "num_tokens": 173054002.0,
      "reward": 0.5189732313156128,
      "reward_std": 0.20038999617099762,
      "rewards/verify_math_reward/mean": 0.5189732313156128,
      "rewards/verify_math_reward/std": 0.49991893768310547,
      "step": 298
    },
    {
      "clip_ratio/high_max": 0.0015851540683797793,
      "clip_ratio/high_mean": 0.0005031180047581074,
      "clip_ratio/low_mean": 0.0003453926424299425,
      "clip_ratio/low_min": 8.398280442634132e-06,
      "clip_ratio/region_mean": 0.0008485106491207262,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1963.0,
      "completions/mean_length": 601.7824096679688,
      "completions/mean_terminated_length": 542.2894897460938,
      "completions/min_length": 138.0,
      "completions/min_terminated_length": 138.0,
      "epoch": 2.793234179060951,
      "grad_norm": 0.14453125,
      "learning_rate": 1e-06,
      "loss": 0.0011,
      "num_tokens": 173619591.0,
      "reward": 0.5133928656578064,
      "reward_std": 0.23067525029182434,
      "rewards/verify_math_reward/mean": 0.5133928656578064,
      "rewards/verify_math_reward/std": 0.500099778175354,
      "step": 299
    },
    {
      "clip_ratio/high_max": 0.0016521747693332145,
      "clip_ratio/high_mean": 0.0004650787705031689,
      "clip_ratio/low_mean": 0.0003833997711808479,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008484785412292695,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3692.0,
      "completions/mean_length": 659.65625,
      "completions/mean_terminated_length": 593.19677734375,
      "completions/min_length": 131.0,
      "completions/min_terminated_length": 131.0,
      "epoch": 2.8025663458734327,
      "grad_norm": 0.126953125,
      "learning_rate": 1e-06,
      "loss": 0.0187,
      "num_tokens": 174224971.0,
      "reward": 0.5524553656578064,
      "reward_std": 0.2390919178724289,
      "rewards/verify_math_reward/mean": 0.5524553656578064,
      "rewards/verify_math_reward/std": 0.49751853942871094,
      "step": 300
    },
    {
      "clip_ratio/high_max": 0.001523267292213859,
      "clip_ratio/high_mean": 0.0004439122083113034,
      "clip_ratio/low_mean": 0.00029721465909915423,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007411268616124289,
      "completions/clipped_ratio": 0.021205357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2847.0,
      "completions/mean_length": 612.0513916015625,
      "completions/mean_terminated_length": 536.5723876953125,
      "completions/min_length": 156.0,
      "completions/min_terminated_length": 156.0,
      "epoch": 2.8118985126859144,
      "grad_norm": 0.1220703125,
      "learning_rate": 1e-06,
      "loss": 0.0033,
      "num_tokens": 174776193.0,
      "reward": 0.5368303656578064,
      "reward_std": 0.20902322232723236,
      "rewards/verify_math_reward/mean": 0.5368303656578064,
      "rewards/verify_math_reward/std": 0.49892017245292664,
      "step": 301
    },
    {
      "clip_ratio/high_max": 0.0013422923657344654,
      "clip_ratio/high_mean": 0.00042776693385349063,
      "clip_ratio/low_mean": 0.0003904350706989135,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008182020037565962,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2936.0,
      "completions/mean_length": 616.65625,
      "completions/mean_terminated_length": 573.41015625,
      "completions/min_length": 126.0,
      "completions/min_terminated_length": 126.0,
      "epoch": 2.821230679498396,
      "grad_norm": 0.130859375,
      "learning_rate": 1e-06,
      "loss": 0.0149,
      "num_tokens": 175367349.0,
      "reward": 0.5,
      "reward_std": 0.21286533772945404,
      "rewards/verify_math_reward/mean": 0.5,
      "rewards/verify_math_reward/std": 0.5002792477607727,
      "step": 302
    },
    {
      "clip_ratio/high_max": 0.0016865780526131857,
      "clip_ratio/high_mean": 0.0005325157746938203,
      "clip_ratio/low_mean": 0.0002991496540971639,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008316654452755756,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2618.0,
      "completions/mean_length": 659.0714721679688,
      "completions/mean_terminated_length": 600.553955078125,
      "completions/min_length": 5.0,
      "completions/min_terminated_length": 5.0,
      "epoch": 2.830562846310878,
      "grad_norm": 0.12451171875,
      "learning_rate": 1e-06,
      "loss": -0.0035,
      "num_tokens": 175986301.0,
      "reward": 0.520089328289032,
      "reward_std": 0.2272149920463562,
      "rewards/verify_math_reward/mean": 0.5200892686843872,
      "rewards/verify_math_reward/std": 0.4998753070831299,
      "step": 303
    },
    {
      "clip_ratio/high_max": 0.0018782893257593969,
      "clip_ratio/high_mean": 0.0005835897909491905,
      "clip_ratio/low_mean": 0.0004582951835345739,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010418849533380126,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3538.0,
      "completions/mean_length": 611.466552734375,
      "completions/mean_terminated_length": 560.1653442382812,
      "completions/min_length": 111.0,
      "completions/min_terminated_length": 111.0,
      "epoch": 2.8398950131233596,
      "grad_norm": 0.140625,
      "learning_rate": 1e-06,
      "loss": -0.0037,
      "num_tokens": 176571951.0,
      "reward": 0.5714285969734192,
      "reward_std": 0.2577284872531891,
      "rewards/verify_math_reward/mean": 0.5714285969734192,
      "rewards/verify_math_reward/std": 0.49514803290367126,
      "step": 304
    },
    {
      "clip_ratio/high_max": 0.001666442349232966,
      "clip_ratio/high_mean": 0.0005039190423303808,
      "clip_ratio/low_mean": 0.00028067684303323404,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007845958907637396,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3629.0,
      "completions/mean_length": 607.333740234375,
      "completions/mean_terminated_length": 547.935302734375,
      "completions/min_length": 73.0,
      "completions/min_terminated_length": 73.0,
      "epoch": 2.8492271799358413,
      "grad_norm": 0.138671875,
      "learning_rate": 1e-06,
      "loss": -0.0075,
      "num_tokens": 177153946.0,
      "reward": 0.5446428656578064,
      "reward_std": 0.2236124873161316,
      "rewards/verify_math_reward/mean": 0.5446428656578064,
      "rewards/verify_math_reward/std": 0.4982811510562897,
      "step": 305
    },
    {
      "clip_ratio/high_max": 0.0017134395202447195,
      "clip_ratio/high_mean": 0.0004923922685975413,
      "clip_ratio/low_mean": 0.0003737313411420473,
      "clip_ratio/low_min": 1.3075314200250432e-05,
      "clip_ratio/region_mean": 0.0008661236079205992,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2530.0,
      "completions/mean_length": 589.0379638671875,
      "completions/mean_terminated_length": 529.3280639648438,
      "completions/min_length": 61.0,
      "completions/min_terminated_length": 61.0,
      "epoch": 2.858559346748323,
      "grad_norm": 0.150390625,
      "learning_rate": 1e-06,
      "loss": -0.0043,
      "num_tokens": 177703420.0,
      "reward": 0.5558035969734192,
      "reward_std": 0.2413061559200287,
      "rewards/verify_math_reward/mean": 0.5558035969734192,
      "rewards/verify_math_reward/std": 0.49715372920036316,
      "step": 306
    },
    {
      "clip_ratio/high_max": 0.0014759283094463171,
      "clip_ratio/high_mean": 0.0004467450282845675,
      "clip_ratio/low_mean": 0.00032371382712881314,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007704588579144911,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3726.0,
      "completions/mean_length": 579.2489013671875,
      "completions/mean_terminated_length": 543.56591796875,
      "completions/min_length": 125.0,
      "completions/min_terminated_length": 125.0,
      "epoch": 2.8678915135608047,
      "grad_norm": 0.12890625,
      "learning_rate": 1e-06,
      "loss": -0.0031,
      "num_tokens": 178269123.0,
      "reward": 0.512276828289032,
      "reward_std": 0.2036968618631363,
      "rewards/verify_math_reward/mean": 0.5122767686843872,
      "rewards/verify_math_reward/std": 0.500128448009491,
      "step": 307
    },
    {
      "clip_ratio/high_max": 0.00149100732687657,
      "clip_ratio/high_mean": 0.00047716773906358867,
      "clip_ratio/low_mean": 0.0002795190918050139,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007566868334833998,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2938.0,
      "completions/mean_length": 575.779052734375,
      "completions/mean_terminated_length": 532.0248413085938,
      "completions/min_length": 141.0,
      "completions/min_terminated_length": 141.0,
      "epoch": 2.8772236803732865,
      "grad_norm": 0.134765625,
      "learning_rate": 1e-06,
      "loss": -0.0076,
      "num_tokens": 178826773.0,
      "reward": 0.5859375,
      "reward_std": 0.20287089049816132,
      "rewards/verify_math_reward/mean": 0.5859375,
      "rewards/verify_math_reward/std": 0.4928344786167145,
      "step": 308
    },
    {
      "clip_ratio/high_max": 0.0019092849361186381,
      "clip_ratio/high_mean": 0.0005840047251695069,
      "clip_ratio/low_mean": 0.000348398102801184,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009324028178525623,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2049.0,
      "completions/mean_length": 591.1138916015625,
      "completions/mean_terminated_length": 539.5130004882812,
      "completions/min_length": 92.0,
      "completions/min_terminated_length": 92.0,
      "epoch": 2.886555847185768,
      "grad_norm": 0.13671875,
      "learning_rate": 1e-06,
      "loss": -0.0027,
      "num_tokens": 179390299.0,
      "reward": 0.5022321939468384,
      "reward_std": 0.23154105246067047,
      "rewards/verify_math_reward/mean": 0.5022321343421936,
      "rewards/verify_math_reward/std": 0.5002743005752563,
      "step": 309
    },
    {
      "clip_ratio/high_max": 0.0016030447877710685,
      "clip_ratio/high_mean": 0.0005661971076733607,
      "clip_ratio/low_mean": 0.00038362684472303954,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009498239314780221,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3257.0,
      "completions/mean_length": 623.732177734375,
      "completions/mean_terminated_length": 592.450439453125,
      "completions/min_length": 120.0,
      "completions/min_terminated_length": 120.0,
      "epoch": 2.8958880139982504,
      "grad_norm": 0.1337890625,
      "learning_rate": 1e-06,
      "loss": -0.0087,
      "num_tokens": 180001731.0,
      "reward": 0.4810267984867096,
      "reward_std": 0.2676451504230499,
      "rewards/verify_math_reward/mean": 0.4810267984867096,
      "rewards/verify_math_reward/std": 0.49991893768310547,
      "step": 310
    },
    {
      "clip_ratio/high_max": 0.0015500138479183079,
      "clip_ratio/high_mean": 0.0004708632347956154,
      "clip_ratio/low_mean": 0.00030234170117182657,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007732049239166372,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3854.0,
      "completions/mean_length": 570.9799194335938,
      "completions/mean_terminated_length": 531.1941528320312,
      "completions/min_length": 27.0,
      "completions/min_terminated_length": 27.0,
      "epoch": 2.905220180810732,
      "grad_norm": 0.1357421875,
      "learning_rate": 1e-06,
      "loss": 0.0169,
      "num_tokens": 180571553.0,
      "reward": 0.606026828289032,
      "reward_std": 0.20308955013751984,
      "rewards/verify_math_reward/mean": 0.6060267686843872,
      "rewards/verify_math_reward/std": 0.48890191316604614,
      "step": 311
    },
    {
      "clip_ratio/high_max": 0.0016935139556153445,
      "clip_ratio/high_mean": 0.0004774006470142922,
      "clip_ratio/low_mean": 0.00027716780800801644,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007545684447904932,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1988.0,
      "completions/mean_length": 585.8671875,
      "completions/mean_terminated_length": 546.2494506835938,
      "completions/min_length": 14.0,
      "completions/min_terminated_length": 14.0,
      "epoch": 2.914552347623214,
      "grad_norm": 0.1259765625,
      "learning_rate": 1e-06,
      "loss": -0.0118,
      "num_tokens": 181145874.0,
      "reward": 0.5803571939468384,
      "reward_std": 0.2156084030866623,
      "rewards/verify_math_reward/mean": 0.5803571343421936,
      "rewards/verify_math_reward/std": 0.4937761425971985,
      "step": 312
    },
    {
      "clip_ratio/high_max": 0.0015024951590021374,
      "clip_ratio/high_mean": 0.0003828913951338109,
      "clip_ratio/low_mean": 0.00031090754453089176,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006937989546713652,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4087.0,
      "completions/mean_length": 664.521240234375,
      "completions/mean_terminated_length": 598.1558227539062,
      "completions/min_length": 126.0,
      "completions/min_terminated_length": 126.0,
      "epoch": 2.9238845144356955,
      "grad_norm": 0.123046875,
      "learning_rate": 1e-06,
      "loss": -0.0064,
      "num_tokens": 181765309.0,
      "reward": 0.5189732313156128,
      "reward_std": 0.1989218294620514,
      "rewards/verify_math_reward/mean": 0.5189732313156128,
      "rewards/verify_math_reward/std": 0.49991893768310547,
      "step": 313
    },
    {
      "clip_ratio/high_max": 0.001601365249371156,
      "clip_ratio/high_mean": 0.000455309519338698,
      "clip_ratio/low_mean": 0.00038884761158897163,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008441571353614563,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3978.0,
      "completions/mean_length": 650.693115234375,
      "completions/mean_terminated_length": 592.032958984375,
      "completions/min_length": 22.0,
      "completions/min_terminated_length": 22.0,
      "epoch": 2.9332166812481772,
      "grad_norm": 0.1484375,
      "learning_rate": 1e-06,
      "loss": 0.0047,
      "num_tokens": 182376362.0,
      "reward": 0.4799107313156128,
      "reward_std": 0.25197917222976685,
      "rewards/verify_math_reward/mean": 0.4799107015132904,
      "rewards/verify_math_reward/std": 0.4998753070831299,
      "step": 314
    },
    {
      "clip_ratio/high_max": 0.0015134224504436133,
      "clip_ratio/high_mean": 0.0004907421830466774,
      "clip_ratio/low_mean": 0.00037144607688333053,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008621882589068264,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2644.0,
      "completions/mean_length": 599.0692138671875,
      "completions/mean_terminated_length": 543.5623779296875,
      "completions/min_length": 138.0,
      "completions/min_terminated_length": 138.0,
      "epoch": 2.942548848060659,
      "grad_norm": 0.142578125,
      "learning_rate": 1e-06,
      "loss": 0.0087,
      "num_tokens": 182950496.0,
      "reward": 0.5111607313156128,
      "reward_std": 0.23799677193164825,
      "rewards/verify_math_reward/mean": 0.5111607313156128,
      "rewards/verify_math_reward/std": 0.5001546144485474,
      "step": 315
    },
    {
      "clip_ratio/high_max": 0.0018595209185150452,
      "clip_ratio/high_mean": 0.0005672425918419322,
      "clip_ratio/low_mean": 0.0004028926704222613,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009701352637421223,
      "completions/clipped_ratio": 0.0078125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3730.0,
      "completions/mean_length": 561.0502319335938,
      "completions/mean_terminated_length": 533.2160034179688,
      "completions/min_length": 11.0,
      "completions/min_terminated_length": 11.0,
      "epoch": 2.9518810148731407,
      "grad_norm": 0.14453125,
      "learning_rate": 1e-06,
      "loss": 0.0041,
      "num_tokens": 183519597.0,
      "reward": 0.5613839626312256,
      "reward_std": 0.23755210638046265,
      "rewards/verify_math_reward/mean": 0.5613839030265808,
      "rewards/verify_math_reward/std": 0.496494859457016,
      "step": 316
    },
    {
      "clip_ratio/high_max": 0.0015648174157831818,
      "clip_ratio/high_mean": 0.000499258840022776,
      "clip_ratio/low_mean": 0.0003862829264562606,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008855417518134345,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4018.0,
      "completions/mean_length": 655.7176513671875,
      "completions/mean_terminated_length": 601.1099853515625,
      "completions/min_length": 3.0,
      "completions/min_terminated_length": 3.0,
      "epoch": 2.961213181685623,
      "grad_norm": 0.12890625,
      "learning_rate": 1e-06,
      "loss": -0.0054,
      "num_tokens": 184137088.0,
      "reward": 0.4743303656578064,
      "reward_std": 0.2368360310792923,
      "rewards/verify_math_reward/mean": 0.4743303656578064,
      "rewards/verify_math_reward/std": 0.4996195137500763,
      "step": 317
    },
    {
      "clip_ratio/high_max": 0.001962169610123965,
      "clip_ratio/high_mean": 0.0005945045068074251,
      "clip_ratio/low_mean": 0.00036597634982626914,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009604808565200074,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2564.0,
      "completions/mean_length": 570.4230346679688,
      "completions/mean_terminated_length": 522.5645141601562,
      "completions/min_length": 144.0,
      "completions/min_terminated_length": 144.0,
      "epoch": 2.9705453484981046,
      "grad_norm": 0.1318359375,
      "learning_rate": 1e-06,
      "loss": -0.0053,
      "num_tokens": 184688187.0,
      "reward": 0.5569196939468384,
      "reward_std": 0.2202625721693039,
      "rewards/verify_math_reward/mean": 0.5569196343421936,
      "rewards/verify_math_reward/std": 0.4970270097255707,
      "step": 318
    },
    {
      "clip_ratio/high_max": 0.0014619004105043132,
      "clip_ratio/high_mean": 0.0004460722163912578,
      "clip_ratio/low_mean": 0.00025319012502222904,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006992623475525761,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3325.0,
      "completions/mean_length": 603.9296875,
      "completions/mean_terminated_length": 548.5,
      "completions/min_length": 137.0,
      "completions/min_terminated_length": 137.0,
      "epoch": 2.9798775153105863,
      "grad_norm": 0.12158203125,
      "learning_rate": 1e-06,
      "loss": -0.0156,
      "num_tokens": 185261748.0,
      "reward": 0.5479910969734192,
      "reward_std": 0.20369574427604675,
      "rewards/verify_math_reward/mean": 0.5479910969734192,
      "rewards/verify_math_reward/std": 0.49796950817108154,
      "step": 319
    },
    {
      "clip_ratio/high_max": 0.001867756514911889,
      "clip_ratio/high_mean": 0.000636332901649439,
      "clip_ratio/low_mean": 0.00027814046939056425,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009144733539869776,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3300.0,
      "completions/mean_length": 596.3292846679688,
      "completions/mean_terminated_length": 552.8305053710938,
      "completions/min_length": 98.0,
      "completions/min_terminated_length": 98.0,
      "epoch": 2.989209682123068,
      "grad_norm": 0.1396484375,
      "learning_rate": 1e-06,
      "loss": -0.0067,
      "num_tokens": 185847707.0,
      "reward": 0.5837053656578064,
      "reward_std": 0.24923540651798248,
      "rewards/verify_math_reward/mean": 0.5837053656578064,
      "rewards/verify_math_reward/std": 0.49321895837783813,
      "step": 320
    },
    {
      "clip_ratio/high_max": 0.0017106063587561948,
      "clip_ratio/high_mean": 0.000480181593047746,
      "clip_ratio/low_mean": 0.00027902893793907424,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007592105284857098,
      "completions/clipped_ratio": 0.011363636363636354,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2428.0,
      "completions/mean_length": 591.7415161132812,
      "completions/mean_terminated_length": 551.462646484375,
      "completions/min_length": 111.0,
      "completions/min_terminated_length": 111.0,
      "epoch": 2.9985418489355498,
      "grad_norm": 0.1298828125,
      "learning_rate": 1e-06,
      "loss": 0.0014,
      "num_tokens": 186435154.0,
      "reward": 0.582589328289032,
      "reward_std": 0.21676844358444214,
      "rewards/verify_math_reward/mean": 0.5825892686843872,
      "rewards/verify_math_reward/std": 0.4934072494506836,
      "step": 321
    },
    {
      "clip_ratio/high_max": 0.0014145847071631579,
      "clip_ratio/high_mean": 0.00041427204268984497,
      "clip_ratio/low_mean": 0.00032360194654756924,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007378739865089301,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2879.0,
      "completions/mean_length": 694.2511596679688,
      "completions/mean_terminated_length": 632.401123046875,
      "completions/min_length": 44.0,
      "completions/min_terminated_length": 44.0,
      "epoch": 3.0093321668124817,
      "grad_norm": 0.125,
      "learning_rate": 1e-06,
      "loss": -0.0156,
      "num_tokens": 187081955.0,
      "reward": 0.4564732313156128,
      "reward_std": 0.2112138420343399,
      "rewards/verify_math_reward/mean": 0.4564732015132904,
      "rewards/verify_math_reward/std": 0.49838000535964966,
      "step": 322
    },
    {
      "clip_ratio/high_max": 0.001394630447975942,
      "clip_ratio/high_mean": 0.00043612389333702595,
      "clip_ratio/low_mean": 0.00036425040411813825,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008003742923392565,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3180.0,
      "completions/mean_length": 638.3192138671875,
      "completions/mean_terminated_length": 599.29345703125,
      "completions/min_length": 31.0,
      "completions/min_terminated_length": 31.0,
      "epoch": 3.0186643336249634,
      "grad_norm": 0.1328125,
      "learning_rate": 1e-06,
      "loss": -0.0028,
      "num_tokens": 187695425.0,
      "reward": 0.5401785969734192,
      "reward_std": 0.24562332034111023,
      "rewards/verify_math_reward/mean": 0.5401785969734192,
      "rewards/verify_math_reward/std": 0.49866142868995667,
      "step": 323
    },
    {
      "clip_ratio/high_max": 0.0017467351999584935,
      "clip_ratio/high_mean": 0.0005857529299646558,
      "clip_ratio/low_mean": 0.00039468116983698565,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009804340907066944,
      "completions/clipped_ratio": 0.006696428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3849.0,
      "completions/mean_length": 589.4152221679688,
      "completions/mean_terminated_length": 565.7752685546875,
      "completions/min_length": 75.0,
      "completions/min_terminated_length": 75.0,
      "epoch": 3.027996500437445,
      "grad_norm": 0.1328125,
      "learning_rate": 1e-06,
      "loss": 0.0039,
      "num_tokens": 188288629.0,
      "reward": 0.5022321939468384,
      "reward_std": 0.25806480646133423,
      "rewards/verify_math_reward/mean": 0.5022321343421936,
      "rewards/verify_math_reward/std": 0.5002742409706116,
      "step": 324
    },
    {
      "clip_ratio/high_max": 0.0014759719633730128,
      "clip_ratio/high_mean": 0.00043675317124325375,
      "clip_ratio/low_mean": 0.0002437118456555254,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006804650251979183,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2493.0,
      "completions/mean_length": 593.7053833007812,
      "completions/mean_terminated_length": 542.1427001953125,
      "completions/min_length": 77.0,
      "completions/min_terminated_length": 77.0,
      "epoch": 3.037328667249927,
      "grad_norm": 0.12451171875,
      "learning_rate": 1e-06,
      "loss": -0.0028,
      "num_tokens": 188857933.0,
      "reward": 0.543526828289032,
      "reward_std": 0.19704709947109222,
      "rewards/verify_math_reward/mean": 0.5435267686843872,
      "rewards/verify_math_reward/std": 0.49838000535964966,
      "step": 325
    },
    {
      "clip_ratio/high_max": 0.0016783677301646094,
      "clip_ratio/high_mean": 0.0005184165333957935,
      "clip_ratio/low_mean": 0.00023275836247194093,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007511748958677344,
      "completions/clipped_ratio": 0.0022321428571429047,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2205.0,
      "completions/mean_length": 545.5535888671875,
      "completions/mean_terminated_length": 537.6107177734375,
      "completions/min_length": 130.0,
      "completions/min_terminated_length": 130.0,
      "epoch": 3.046660834062409,
      "grad_norm": 0.1279296875,
      "learning_rate": 1e-06,
      "loss": -0.0005,
      "num_tokens": 189418821.0,
      "reward": 0.5926339626312256,
      "reward_std": 0.21091073751449585,
      "rewards/verify_math_reward/mean": 0.5926339030265808,
      "rewards/verify_math_reward/std": 0.49161848425865173,
      "step": 326
    },
    {
      "clip_ratio/high_max": 0.0015624871757609071,
      "clip_ratio/high_mean": 0.0004476380358937604,
      "clip_ratio/low_mean": 0.00041369982841388264,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008613378604422905,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3347.0,
      "completions/mean_length": 631.15625,
      "completions/mean_terminated_length": 580.1449584960938,
      "completions/min_length": 3.0,
      "completions/min_terminated_length": 3.0,
      "epoch": 3.055993000874891,
      "grad_norm": 0.140625,
      "learning_rate": 1e-06,
      "loss": 0.0063,
      "num_tokens": 190023241.0,
      "reward": 0.543526828289032,
      "reward_std": 0.22337539494037628,
      "rewards/verify_math_reward/mean": 0.5435267686843872,
      "rewards/verify_math_reward/std": 0.49838000535964966,
      "step": 327
    },
    {
      "clip_ratio/high_max": 0.0014935850049369037,
      "clip_ratio/high_mean": 0.0004928659504912503,
      "clip_ratio/low_mean": 0.0002954453170787019,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007883112648414681,
      "completions/clipped_ratio": 0.0267857142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3430.0,
      "completions/mean_length": 646.3527221679688,
      "completions/mean_terminated_length": 551.408203125,
      "completions/min_length": 60.0,
      "completions/min_terminated_length": 60.0,
      "epoch": 3.0653251676873725,
      "grad_norm": 0.140625,
      "learning_rate": 1e-06,
      "loss": 0.0041,
      "num_tokens": 190596821.0,
      "reward": 0.5491071939468384,
      "reward_std": 0.21553170680999756,
      "rewards/verify_math_reward/mean": 0.5491071343421936,
      "rewards/verify_math_reward/std": 0.49786055088043213,
      "step": 328
    },
    {
      "clip_ratio/high_max": 0.0013526212587748887,
      "clip_ratio/high_mean": 0.00037775289058572525,
      "clip_ratio/low_mean": 0.00024532223949336185,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000623075127350603,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4073.0,
      "completions/mean_length": 570.247802734375,
      "completions/mean_terminated_length": 530.4537353515625,
      "completions/min_length": 129.0,
      "completions/min_terminated_length": 129.0,
      "epoch": 3.0746573344998542,
      "grad_norm": 0.12109375,
      "learning_rate": 1e-06,
      "loss": -0.0031,
      "num_tokens": 191157611.0,
      "reward": 0.5814732313156128,
      "reward_std": 0.19223138689994812,
      "rewards/verify_math_reward/mean": 0.5814732313156128,
      "rewards/verify_math_reward/std": 0.4935929775238037,
      "step": 329
    },
    {
      "clip_ratio/high_max": 0.001386022498991224,
      "clip_ratio/high_mean": 0.00037963793920425815,
      "clip_ratio/low_mean": 0.00035208107738071703,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000731719011128007,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2418.0,
      "completions/mean_length": 642.7254638671875,
      "completions/mean_terminated_length": 575.9385375976562,
      "completions/min_length": 36.0,
      "completions/min_terminated_length": 36.0,
      "epoch": 3.083989501312336,
      "grad_norm": 0.1279296875,
      "learning_rate": 1e-06,
      "loss": -0.0036,
      "num_tokens": 191759597.0,
      "reward": 0.4676339626312256,
      "reward_std": 0.21019576489925385,
      "rewards/verify_math_reward/mean": 0.4676339328289032,
      "rewards/verify_math_reward/std": 0.4992299973964691,
      "step": 330
    },
    {
      "clip_ratio/high_max": 0.0016220136021729559,
      "clip_ratio/high_mean": 0.000503526099919327,
      "clip_ratio/low_mean": 0.0004214132576407792,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009249393442587461,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2543.0,
      "completions/mean_length": 606.6038208007812,
      "completions/mean_terminated_length": 539.1182861328125,
      "completions/min_length": 38.0,
      "completions/min_terminated_length": 38.0,
      "epoch": 3.0933216681248177,
      "grad_norm": 0.1455078125,
      "learning_rate": 1e-06,
      "loss": 0.023,
      "num_tokens": 192320258.0,
      "reward": 0.5569196939468384,
      "reward_std": 0.2525743842124939,
      "rewards/verify_math_reward/mean": 0.5569196343421936,
      "rewards/verify_math_reward/std": 0.49702703952789307,
      "step": 331
    },
    {
      "clip_ratio/high_max": 0.0015725383964309003,
      "clip_ratio/high_mean": 0.0004890870800409175,
      "clip_ratio/low_mean": 0.0002551917355049227,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007442788146363455,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3548.0,
      "completions/mean_length": 590.8939819335938,
      "completions/mean_terminated_length": 547.3276977539062,
      "completions/min_length": 52.0,
      "completions/min_terminated_length": 52.0,
      "epoch": 3.1026538349372994,
      "grad_norm": 0.123046875,
      "learning_rate": 1e-06,
      "loss": -0.0035,
      "num_tokens": 192891859.0,
      "reward": 0.5870535969734192,
      "reward_std": 0.19257839024066925,
      "rewards/verify_math_reward/mean": 0.5870535969734192,
      "rewards/verify_math_reward/std": 0.49263834953308105,
      "step": 332
    },
    {
      "clip_ratio/high_max": 0.0016490092166350223,
      "clip_ratio/high_mean": 0.00044072518517168646,
      "clip_ratio/low_mean": 0.0002751491902017733,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007158743751460861,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3858.0,
      "completions/mean_length": 568.625,
      "completions/mean_terminated_length": 508.56756591796875,
      "completions/min_length": 20.0,
      "completions/min_terminated_length": 20.0,
      "epoch": 3.111986001749781,
      "grad_norm": 0.1279296875,
      "learning_rate": 1e-06,
      "loss": -0.0062,
      "num_tokens": 193435235.0,
      "reward": 0.6082589626312256,
      "reward_std": 0.1978398710489273,
      "rewards/verify_math_reward/mean": 0.6082589030265808,
      "rewards/verify_math_reward/std": 0.4884119927883148,
      "step": 333
    },
    {
      "clip_ratio/high_max": 0.001815153741517861,
      "clip_ratio/high_mean": 0.0004776723098984803,
      "clip_ratio/low_mean": 0.00034385452943297423,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008215268439926149,
      "completions/clipped_ratio": 0.0200892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2269.0,
      "completions/mean_length": 628.724365234375,
      "completions/mean_terminated_length": 557.6412353515625,
      "completions/min_length": 49.0,
      "completions/min_terminated_length": 49.0,
      "epoch": 3.121318168562263,
      "grad_norm": 0.1279296875,
      "learning_rate": 1e-06,
      "loss": 0.008,
      "num_tokens": 194025676.0,
      "reward": 0.5479910969734192,
      "reward_std": 0.20440296828746796,
      "rewards/verify_math_reward/mean": 0.5479910969734192,
      "rewards/verify_math_reward/std": 0.49796950817108154,
      "step": 334
    },
    {
      "clip_ratio/high_max": 0.001681742455730273,
      "clip_ratio/high_mean": 0.0004999326515644498,
      "clip_ratio/low_mean": 0.0003150533422058288,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008149859904733603,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3684.0,
      "completions/mean_length": 553.552490234375,
      "completions/mean_terminated_length": 505.4649658203125,
      "completions/min_length": 3.0,
      "completions/min_terminated_length": 3.0,
      "epoch": 3.130650335374745,
      "grad_norm": 0.1396484375,
      "learning_rate": 1e-06,
      "loss": -0.0122,
      "num_tokens": 194554051.0,
      "reward": 0.606026828289032,
      "reward_std": 0.2215416580438614,
      "rewards/verify_math_reward/mean": 0.6060267686843872,
      "rewards/verify_math_reward/std": 0.48890191316604614,
      "step": 335
    },
    {
      "clip_ratio/high_max": 0.0015570206323900493,
      "clip_ratio/high_mean": 0.00044933508752365015,
      "clip_ratio/low_mean": 0.00035593529878497066,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000805270383352763,
      "completions/clipped_ratio": 0.0078125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2289.0,
      "completions/mean_length": 592.3214721679688,
      "completions/mean_terminated_length": 564.7334594726562,
      "completions/min_length": 145.0,
      "completions/min_terminated_length": 145.0,
      "epoch": 3.1399825021872267,
      "grad_norm": 0.12353515625,
      "learning_rate": 1e-06,
      "loss": -0.0031,
      "num_tokens": 195146435.0,
      "reward": 0.486607164144516,
      "reward_std": 0.22462141513824463,
      "rewards/verify_math_reward/mean": 0.4866071343421936,
      "rewards/verify_math_reward/std": 0.500099778175354,
      "step": 336
    },
    {
      "clip_ratio/high_max": 0.001941438624271541,
      "clip_ratio/high_mean": 0.0005635270540551574,
      "clip_ratio/low_mean": 0.0002845019428150408,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008480289925500983,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2034.0,
      "completions/mean_length": 568.0357666015625,
      "completions/mean_terminated_length": 507.9682312011719,
      "completions/min_length": 5.0,
      "completions/min_terminated_length": 5.0,
      "epoch": 3.1493146689997085,
      "grad_norm": 0.138671875,
      "learning_rate": 1e-06,
      "loss": -0.0014,
      "num_tokens": 195681259.0,
      "reward": 0.6205357313156128,
      "reward_std": 0.2012576162815094,
      "rewards/verify_math_reward/mean": 0.6205357313156128,
      "rewards/verify_math_reward/std": 0.4855247139930725,
      "step": 337
    },
    {
      "clip_ratio/high_max": 0.001428396155461087,
      "clip_ratio/high_mean": 0.0004483592942960968,
      "clip_ratio/low_mean": 0.0003310731822239177,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007794324692440568,
      "completions/clipped_ratio": 0.025669642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3295.0,
      "completions/mean_length": 707.8125610351562,
      "completions/mean_terminated_length": 618.5475463867188,
      "completions/min_length": 97.0,
      "completions/min_terminated_length": 97.0,
      "epoch": 3.15864683581219,
      "grad_norm": 0.12451171875,
      "learning_rate": 1e-06,
      "loss": 0.0008,
      "num_tokens": 196335835.0,
      "reward": 0.4386160969734192,
      "reward_std": 0.1998247504234314,
      "rewards/verify_math_reward/mean": 0.4386160671710968,
      "rewards/verify_math_reward/std": 0.496494859457016,
      "step": 338
    },
    {
      "clip_ratio/high_max": 0.0014731534920429112,
      "clip_ratio/high_mean": 0.0004133693425956153,
      "clip_ratio/low_mean": 0.0002422568936708558,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006556262449066708,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4068.0,
      "completions/mean_length": 638.1674194335938,
      "completions/mean_terminated_length": 591.228515625,
      "completions/min_length": 135.0,
      "completions/min_terminated_length": 135.0,
      "epoch": 3.167979002624672,
      "grad_norm": 0.11181640625,
      "learning_rate": 1e-06,
      "loss": 0.0063,
      "num_tokens": 196951009.0,
      "reward": 0.5491071939468384,
      "reward_std": 0.19087491929531097,
      "rewards/verify_math_reward/mean": 0.5491071343421936,
      "rewards/verify_math_reward/std": 0.49786055088043213,
      "step": 339
    },
    {
      "clip_ratio/high_max": 0.0015800321816641372,
      "clip_ratio/high_mean": 0.0004552013997454196,
      "clip_ratio/low_mean": 0.00030304992742458126,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007582513308079797,
      "completions/clipped_ratio": 0.0200892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2668.0,
      "completions/mean_length": 634.8002319335938,
      "completions/mean_terminated_length": 563.8417358398438,
      "completions/min_length": 144.0,
      "completions/min_terminated_length": 144.0,
      "epoch": 3.1773111694371536,
      "grad_norm": 0.1318359375,
      "learning_rate": 1e-06,
      "loss": 0.0116,
      "num_tokens": 197530958.0,
      "reward": 0.5870535969734192,
      "reward_std": 0.21887320280075073,
      "rewards/verify_math_reward/mean": 0.5870535969734192,
      "rewards/verify_math_reward/std": 0.49263837933540344,
      "step": 340
    },
    {
      "clip_ratio/high_max": 0.0016613913776382105,
      "clip_ratio/high_mean": 0.0005399099186433887,
      "clip_ratio/low_mean": 0.00029779120700368367,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008377011199627304,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2085.0,
      "completions/mean_length": 605.421875,
      "completions/mean_terminated_length": 554.0316772460938,
      "completions/min_length": 42.0,
      "completions/min_terminated_length": 42.0,
      "epoch": 3.1866433362496354,
      "grad_norm": 0.1376953125,
      "learning_rate": 1e-06,
      "loss": -0.0059,
      "num_tokens": 198102840.0,
      "reward": 0.5613839626312256,
      "reward_std": 0.2239074409008026,
      "rewards/verify_math_reward/mean": 0.5613839030265808,
      "rewards/verify_math_reward/std": 0.496494859457016,
      "step": 341
    },
    {
      "clip_ratio/high_max": 0.0015391380329674575,
      "clip_ratio/high_mean": 0.00045375877505193785,
      "clip_ratio/low_mean": 0.0003501511055219453,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008039098701146941,
      "completions/clipped_ratio": 0.021205357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3684.0,
      "completions/mean_length": 662.763427734375,
      "completions/mean_terminated_length": 588.3831176757812,
      "completions/min_length": 44.0,
      "completions/min_terminated_length": 44.0,
      "epoch": 3.195975503062117,
      "grad_norm": 0.11865234375,
      "learning_rate": 1e-06,
      "loss": 0.0004,
      "num_tokens": 198712324.0,
      "reward": 0.4765625298023224,
      "reward_std": 0.2266170084476471,
      "rewards/verify_math_reward/mean": 0.4765625,
      "rewards/verify_math_reward/std": 0.49972933530807495,
      "step": 342
    },
    {
      "clip_ratio/high_max": 0.0016844553128976258,
      "clip_ratio/high_mean": 0.0005326514890953149,
      "clip_ratio/low_mean": 0.0004135742810831289,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000946225770348974,
      "completions/clipped_ratio": 0.024553571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3723.0,
      "completions/mean_length": 651.4006958007812,
      "completions/mean_terminated_length": 564.6944580078125,
      "completions/min_length": 122.0,
      "completions/min_terminated_length": 122.0,
      "epoch": 3.205307669874599,
      "grad_norm": 0.134765625,
      "learning_rate": 1e-06,
      "loss": -0.0026,
      "num_tokens": 199291971.0,
      "reward": 0.5725446939468384,
      "reward_std": 0.2266169786453247,
      "rewards/verify_math_reward/mean": 0.5725446343421936,
      "rewards/verify_math_reward/std": 0.49498558044433594,
      "step": 343
    },
    {
      "clip_ratio/high_max": 0.0014262949243857292,
      "clip_ratio/high_mean": 0.0004605766582699289,
      "clip_ratio/low_mean": 0.0003218647548237641,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007824414187780349,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2371.0,
      "completions/mean_length": 605.4564819335938,
      "completions/mean_terminated_length": 570.0394287109375,
      "completions/min_length": 3.0,
      "completions/min_terminated_length": 3.0,
      "epoch": 3.214639836687081,
      "grad_norm": 0.130859375,
      "learning_rate": 1e-06,
      "loss": 0.0025,
      "num_tokens": 199886156.0,
      "reward": 0.4799107313156128,
      "reward_std": 0.22515344619750977,
      "rewards/verify_math_reward/mean": 0.4799107015132904,
      "rewards/verify_math_reward/std": 0.4998752772808075,
      "step": 344
    },
    {
      "clip_ratio/high_max": 0.0017159509225166403,
      "clip_ratio/high_mean": 0.00045555809583675,
      "clip_ratio/low_mean": 0.00033502772180327156,
      "clip_ratio/low_min": 1.3053467228019144e-05,
      "clip_ratio/region_mean": 0.0007905858237791108,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3325.0,
      "completions/mean_length": 657.5770263671875,
      "completions/mean_terminated_length": 610.901611328125,
      "completions/min_length": 7.0,
      "completions/min_terminated_length": 7.0,
      "epoch": 3.2239720034995627,
      "grad_norm": 0.115234375,
      "learning_rate": 1e-06,
      "loss": 0.0012,
      "num_tokens": 200520977.0,
      "reward": 0.4955357313156128,
      "reward_std": 0.2032090127468109,
      "rewards/verify_math_reward/mean": 0.4955357015132904,
      "rewards/verify_math_reward/std": 0.500259280204773,
      "step": 345
    },
    {
      "clip_ratio/high_max": 0.0014997234084148658,
      "clip_ratio/high_mean": 0.00043576945154200075,
      "clip_ratio/low_mean": 0.0002325831610505702,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006683526182769128,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4005.0,
      "completions/mean_length": 603.0859375,
      "completions/mean_terminated_length": 547.6428833007812,
      "completions/min_length": 124.0,
      "completions/min_terminated_length": 124.0,
      "epoch": 3.2333041703120444,
      "grad_norm": 0.1103515625,
      "learning_rate": 1e-06,
      "loss": -0.0022,
      "num_tokens": 201087950.0,
      "reward": 0.625,
      "reward_std": 0.18663331866264343,
      "rewards/verify_math_reward/mean": 0.625,
      "rewards/verify_math_reward/std": 0.48439329862594604,
      "step": 346
    },
    {
      "clip_ratio/high_max": 0.0015245356153172906,
      "clip_ratio/high_mean": 0.0005462753662186515,
      "clip_ratio/low_mean": 0.0003204307129180961,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008667060842526553,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2766.0,
      "completions/mean_length": 591.3772583007812,
      "completions/mean_terminated_length": 547.8169555664062,
      "completions/min_length": 14.0,
      "completions/min_terminated_length": 14.0,
      "epoch": 3.242636337124526,
      "grad_norm": 0.1318359375,
      "learning_rate": 1e-06,
      "loss": 0.003,
      "num_tokens": 201664000.0,
      "reward": 0.5502232313156128,
      "reward_std": 0.21154901385307312,
      "rewards/verify_math_reward/mean": 0.5502232313156128,
      "rewards/verify_math_reward/std": 0.49774909019470215,
      "step": 347
    },
    {
      "clip_ratio/high_max": 0.0017170511309814174,
      "clip_ratio/high_mean": 0.0005302536926592438,
      "clip_ratio/low_mean": 0.00031125928774144995,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008415129768764018,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3363.0,
      "completions/mean_length": 576.482177734375,
      "completions/mean_terminated_length": 536.7584838867188,
      "completions/min_length": 11.0,
      "completions/min_terminated_length": 11.0,
      "epoch": 3.251968503937008,
      "grad_norm": 0.1220703125,
      "learning_rate": 1e-06,
      "loss": 0.0122,
      "num_tokens": 202230768.0,
      "reward": 0.5535714626312256,
      "reward_std": 0.2273990511894226,
      "rewards/verify_math_reward/mean": 0.5535714030265808,
      "rewards/verify_math_reward/std": 0.4973994791507721,
      "step": 348
    },
    {
      "clip_ratio/high_max": 0.0014262903450799058,
      "clip_ratio/high_mean": 0.0004359676765943732,
      "clip_ratio/low_mean": 0.0002852419337386891,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007212096061266493,
      "completions/clipped_ratio": 0.0279017857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3873.0,
      "completions/mean_length": 720.9888916015625,
      "completions/mean_terminated_length": 624.1170654296875,
      "completions/min_length": 7.0,
      "completions/min_terminated_length": 7.0,
      "epoch": 3.2613006707494896,
      "grad_norm": 0.11669921875,
      "learning_rate": 1e-06,
      "loss": 0.0132,
      "num_tokens": 202869222.0,
      "reward": 0.424107164144516,
      "reward_std": 0.21331927180290222,
      "rewards/verify_math_reward/mean": 0.4241071343421936,
      "rewards/verify_math_reward/std": 0.49448272585868835,
      "step": 349
    },
    {
      "clip_ratio/high_max": 0.0014594297417716007,
      "clip_ratio/high_mean": 0.0003818393024062061,
      "clip_ratio/low_mean": 0.00033925501043086115,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007210943153950211,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3572.0,
      "completions/mean_length": 605.5770263671875,
      "completions/mean_terminated_length": 562.1932373046875,
      "completions/min_length": 105.0,
      "completions/min_terminated_length": 105.0,
      "epoch": 3.2706328375619713,
      "grad_norm": 0.126953125,
      "learning_rate": 1e-06,
      "loss": -0.0054,
      "num_tokens": 203459227.0,
      "reward": 0.5066964626312256,
      "reward_std": 0.21271197497844696,
      "rewards/verify_math_reward/mean": 0.5066964030265808,
      "rewards/verify_math_reward/std": 0.5002344250679016,
      "step": 350
    },
    {
      "clip_ratio/high_max": 0.0013140371429471998,
      "clip_ratio/high_mean": 0.0003632106477198249,
      "clip_ratio/low_mean": 0.0003321894366763445,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006954000928089954,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2725.0,
      "completions/mean_length": 612.2332763671875,
      "completions/mean_terminated_length": 560.943359375,
      "completions/min_length": 120.0,
      "completions/min_terminated_length": 120.0,
      "epoch": 3.279965004374453,
      "grad_norm": 0.1240234375,
      "learning_rate": 1e-06,
      "loss": 0.0024,
      "num_tokens": 204041196.0,
      "reward": 0.5,
      "reward_std": 0.1934378445148468,
      "rewards/verify_math_reward/mean": 0.5,
      "rewards/verify_math_reward/std": 0.5002792477607727,
      "step": 351
    },
    {
      "clip_ratio/high_max": 0.0015405017084049177,
      "clip_ratio/high_mean": 0.0004661469188249612,
      "clip_ratio/low_mean": 0.00035996260032788996,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008261095308625954,
      "completions/clipped_ratio": 0.021205357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3609.0,
      "completions/mean_length": 703.1707763671875,
      "completions/mean_terminated_length": 629.6658935546875,
      "completions/min_length": 63.0,
      "completions/min_terminated_length": 63.0,
      "epoch": 3.289297171186935,
      "grad_norm": 0.119140625,
      "learning_rate": 1e-06,
      "loss": -0.0113,
      "num_tokens": 204688061.0,
      "reward": 0.5234375,
      "reward_std": 0.22301450371742249,
      "rewards/verify_math_reward/mean": 0.5234375,
      "rewards/verify_math_reward/std": 0.49972933530807495,
      "step": 352
    },
    {
      "clip_ratio/high_max": 0.001756315132297459,
      "clip_ratio/high_mean": 0.0005779319496923563,
      "clip_ratio/low_mean": 0.0003970873019625287,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009750192630235688,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3336.0,
      "completions/mean_length": 614.4296875,
      "completions/mean_terminated_length": 547.0955200195312,
      "completions/min_length": 119.0,
      "completions/min_terminated_length": 119.0,
      "epoch": 3.298629337999417,
      "grad_norm": 0.15234375,
      "learning_rate": 1e-06,
      "loss": -0.0024,
      "num_tokens": 205262478.0,
      "reward": 0.5569196939468384,
      "reward_std": 0.23830027878284454,
      "rewards/verify_math_reward/mean": 0.5569196343421936,
      "rewards/verify_math_reward/std": 0.49702703952789307,
      "step": 353
    },
    {
      "clip_ratio/high_max": 0.0017022573820213438,
      "clip_ratio/high_mean": 0.0005431572393490569,
      "clip_ratio/low_mean": 0.0003468133288606623,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008899705635485589,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3990.0,
      "completions/mean_length": 645.8560791015625,
      "completions/mean_terminated_length": 587.113525390625,
      "completions/min_length": 146.0,
      "completions/min_terminated_length": 146.0,
      "epoch": 3.3079615048118987,
      "grad_norm": 0.134765625,
      "learning_rate": 1e-06,
      "loss": 0.0024,
      "num_tokens": 205870645.0,
      "reward": 0.4966517984867096,
      "reward_std": 0.2380392700433731,
      "rewards/verify_math_reward/mean": 0.4966517984867096,
      "rewards/verify_math_reward/std": 0.5002680420875549,
      "step": 354
    },
    {
      "clip_ratio/high_max": 0.0016648589744363562,
      "clip_ratio/high_mean": 0.00046666631124026026,
      "clip_ratio/low_mean": 0.0002961184809464612,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007627847935509635,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2953.0,
      "completions/mean_length": 532.0748291015625,
      "completions/mean_terminated_length": 495.9131774902344,
      "completions/min_length": 99.0,
      "completions/min_terminated_length": 99.0,
      "epoch": 3.3172936716243804,
      "grad_norm": 0.126953125,
      "learning_rate": 1e-06,
      "loss": 0.0079,
      "num_tokens": 206387888.0,
      "reward": 0.6350446939468384,
      "reward_std": 0.19020302593708038,
      "rewards/verify_math_reward/mean": 0.6350446343421936,
      "rewards/verify_math_reward/std": 0.481686532497406,
      "step": 355
    },
    {
      "clip_ratio/high_max": 0.0017283953184232814,
      "clip_ratio/high_mean": 0.0005860750586634822,
      "clip_ratio/low_mean": 0.0003859317884007396,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009720068519527558,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3219.0,
      "completions/mean_length": 619.8225708007812,
      "completions/mean_terminated_length": 568.6444091796875,
      "completions/min_length": 120.0,
      "completions/min_terminated_length": 120.0,
      "epoch": 3.326625838436862,
      "grad_norm": 0.138671875,
      "learning_rate": 1e-06,
      "loss": -0.0031,
      "num_tokens": 206980761.0,
      "reward": 0.5803571939468384,
      "reward_std": 0.24964657425880432,
      "rewards/verify_math_reward/mean": 0.5803571343421936,
      "rewards/verify_math_reward/std": 0.4937761425971985,
      "step": 356
    },
    {
      "clip_ratio/high_max": 0.0013587948415079154,
      "clip_ratio/high_mean": 0.0004088918583420309,
      "clip_ratio/low_mean": 0.00033309950413240585,
      "clip_ratio/low_min": 2.6799684746947605e-05,
      "clip_ratio/region_mean": 0.0007419913554258528,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3829.0,
      "completions/mean_length": 653.4263916015625,
      "completions/mean_terminated_length": 602.742919921875,
      "completions/min_length": 136.0,
      "completions/min_terminated_length": 136.0,
      "epoch": 3.335958005249344,
      "grad_norm": 0.11474609375,
      "learning_rate": 1e-06,
      "loss": 0.0071,
      "num_tokens": 207610087.0,
      "reward": 0.4988839626312256,
      "reward_std": 0.2051931917667389,
      "rewards/verify_math_reward/mean": 0.4988839328289032,
      "rewards/verify_math_reward/std": 0.5002779960632324,
      "step": 357
    },
    {
      "clip_ratio/high_max": 0.0013561228170146933,
      "clip_ratio/high_mean": 0.0004154040818775684,
      "clip_ratio/low_mean": 0.00034203096652163367,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007574350465802127,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3759.0,
      "completions/mean_length": 605.6194458007812,
      "completions/mean_terminated_length": 546.1918334960938,
      "completions/min_length": 87.0,
      "completions/min_terminated_length": 87.0,
      "epoch": 3.3452901720618256,
      "grad_norm": 0.12890625,
      "learning_rate": 1e-06,
      "loss": 0.009,
      "num_tokens": 208175050.0,
      "reward": 0.5725446939468384,
      "reward_std": 0.21132118999958038,
      "rewards/verify_math_reward/mean": 0.5725446343421936,
      "rewards/verify_math_reward/std": 0.49498558044433594,
      "step": 358
    },
    {
      "clip_ratio/high_max": 0.0013102528355375398,
      "clip_ratio/high_mean": 0.0004396332717533369,
      "clip_ratio/low_mean": 0.00024482973651629436,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006844630124760442,
      "completions/clipped_ratio": 0.0234375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2814.0,
      "completions/mean_length": 674.3873291015625,
      "completions/mean_terminated_length": 592.2685546875,
      "completions/min_length": 113.0,
      "completions/min_terminated_length": 113.0,
      "epoch": 3.3546223388743073,
      "grad_norm": 0.11376953125,
      "learning_rate": 1e-06,
      "loss": 0.0072,
      "num_tokens": 208788005.0,
      "reward": 0.5290178656578064,
      "reward_std": 0.203317791223526,
      "rewards/verify_math_reward/mean": 0.5290178656578064,
      "rewards/verify_math_reward/std": 0.49943605065345764,
      "step": 359
    },
    {
      "clip_ratio/high_max": 0.0014575891673302976,
      "clip_ratio/high_mean": 0.000394679750229443,
      "clip_ratio/low_mean": 0.00023881431718564272,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006334940627539254,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4064.0,
      "completions/mean_length": 614.2355346679688,
      "completions/mean_terminated_length": 558.9694213867188,
      "completions/min_length": 145.0,
      "completions/min_terminated_length": 145.0,
      "epoch": 3.363954505686789,
      "grad_norm": 0.12109375,
      "learning_rate": 1e-06,
      "loss": -0.0014,
      "num_tokens": 209371928.0,
      "reward": 0.5658482313156128,
      "reward_std": 0.1863730102777481,
      "rewards/verify_math_reward/mean": 0.5658482313156128,
      "rewards/verify_math_reward/std": 0.49592188000679016,
      "step": 360
    },
    {
      "clip_ratio/high_max": 0.0019375069632587838,
      "clip_ratio/high_mean": 0.000640851886373639,
      "clip_ratio/low_mean": 0.0002994591994820439,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009403110825587646,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3413.0,
      "completions/mean_length": 596.9989013671875,
      "completions/mean_terminated_length": 529.32763671875,
      "completions/min_length": 14.0,
      "completions/min_terminated_length": 14.0,
      "epoch": 3.3732866724992707,
      "grad_norm": 0.1328125,
      "learning_rate": 1e-06,
      "loss": 0.0065,
      "num_tokens": 209937927.0,
      "reward": 0.5401785969734192,
      "reward_std": 0.21286281943321228,
      "rewards/verify_math_reward/mean": 0.5401785969734192,
      "rewards/verify_math_reward/std": 0.49866142868995667,
      "step": 361
    },
    {
      "clip_ratio/high_max": 0.0016875027740752557,
      "clip_ratio/high_mean": 0.0005038644831074635,
      "clip_ratio/low_mean": 0.0003414921168314322,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008453565860691015,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4068.0,
      "completions/mean_length": 591.1004638671875,
      "completions/mean_terminated_length": 531.4256591796875,
      "completions/min_length": 139.0,
      "completions/min_terminated_length": 139.0,
      "epoch": 3.382618839311753,
      "grad_norm": 0.138671875,
      "learning_rate": 1e-06,
      "loss": 0.0016,
      "num_tokens": 210492265.0,
      "reward": 0.5535714626312256,
      "reward_std": 0.23942752182483673,
      "rewards/verify_math_reward/mean": 0.5535714030265808,
      "rewards/verify_math_reward/std": 0.4973995089530945,
      "step": 362
    },
    {
      "clip_ratio/high_max": 0.001530025368083443,
      "clip_ratio/high_mean": 0.00047298084245994687,
      "clip_ratio/low_mean": 0.00033506173508612846,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008080425795924384,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3205.0,
      "completions/mean_length": 608.78125,
      "completions/mean_terminated_length": 573.39794921875,
      "completions/min_length": 18.0,
      "completions/min_terminated_length": 18.0,
      "epoch": 3.3919510061242346,
      "grad_norm": 0.12255859375,
      "learning_rate": 1e-06,
      "loss": 0.0033,
      "num_tokens": 211090965.0,
      "reward": 0.4854910969734192,
      "reward_std": 0.20549741387367249,
      "rewards/verify_math_reward/mean": 0.4854910671710968,
      "rewards/verify_math_reward/std": 0.5000686049461365,
      "step": 363
    },
    {
      "clip_ratio/high_max": 0.0013068964781268733,
      "clip_ratio/high_mean": 0.00035726715623241034,
      "clip_ratio/low_mean": 0.0003496256874768733,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007068928348417103,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3769.0,
      "completions/mean_length": 575.950927734375,
      "completions/mean_terminated_length": 532.1988525390625,
      "completions/min_length": 11.0,
      "completions/min_terminated_length": 11.0,
      "epoch": 3.4012831729367163,
      "grad_norm": 0.11962890625,
      "learning_rate": 1e-06,
      "loss": -0.004,
      "num_tokens": 211659809.0,
      "reward": 0.5267857313156128,
      "reward_std": 0.20223219692707062,
      "rewards/verify_math_reward/mean": 0.5267857313156128,
      "rewards/verify_math_reward/std": 0.4995608627796173,
      "step": 364
    },
    {
      "clip_ratio/high_max": 0.0015464695907212445,
      "clip_ratio/high_mean": 0.000393204986494311,
      "clip_ratio/low_mean": 0.00035978218443233345,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007529871804763388,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2180.0,
      "completions/mean_length": 605.3080444335938,
      "completions/mean_terminated_length": 549.9002075195312,
      "completions/min_length": 94.0,
      "completions/min_terminated_length": 94.0,
      "epoch": 3.410615339749198,
      "grad_norm": 0.12451171875,
      "learning_rate": 1e-06,
      "loss": -0.0062,
      "num_tokens": 212229781.0,
      "reward": 0.5390625,
      "reward_std": 0.2008771300315857,
      "rewards/verify_math_reward/mean": 0.5390625,
      "rewards/verify_math_reward/std": 0.4987502098083496,
      "step": 365
    },
    {
      "clip_ratio/high_max": 0.0016385521175834583,
      "clip_ratio/high_mean": 0.0005387498508753197,
      "clip_ratio/low_mean": 0.0003505492582007719,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008892991136235651,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3251.0,
      "completions/mean_length": 626.3426513671875,
      "completions/mean_terminated_length": 591.1375122070312,
      "completions/min_length": 149.0,
      "completions/min_terminated_length": 149.0,
      "epoch": 3.41994750656168,
      "grad_norm": 0.14453125,
      "learning_rate": 1e-06,
      "loss": 0.0031,
      "num_tokens": 212856120.0,
      "reward": 0.5022321939468384,
      "reward_std": 0.2511875033378601,
      "rewards/verify_math_reward/mean": 0.5022321343421936,
      "rewards/verify_math_reward/std": 0.5002742409706116,
      "step": 366
    },
    {
      "clip_ratio/high_max": 0.0014653511052529211,
      "clip_ratio/high_mean": 0.00045386035731098673,
      "clip_ratio/low_mean": 0.0003321712921433573,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007860316327423789,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4017.0,
      "completions/mean_length": 626.8504638671875,
      "completions/mean_terminated_length": 575.7757568359375,
      "completions/min_length": 122.0,
      "completions/min_terminated_length": 122.0,
      "epoch": 3.4292796733741615,
      "grad_norm": 0.1298828125,
      "learning_rate": 1e-06,
      "loss": 0.0079,
      "num_tokens": 213462738.0,
      "reward": 0.5290178656578064,
      "reward_std": 0.22079278528690338,
      "rewards/verify_math_reward/mean": 0.5290178656578064,
      "rewards/verify_math_reward/std": 0.49943605065345764,
      "step": 367
    },
    {
      "clip_ratio/high_max": 0.002329958844711655,
      "clip_ratio/high_mean": 0.0008108197534966166,
      "clip_ratio/low_mean": 0.00036530438001136645,
      "clip_ratio/low_min": 1.1512249329825863e-05,
      "clip_ratio/region_mean": 0.0011761241366912145,
      "completions/clipped_ratio": 0.006696428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3308.0,
      "completions/mean_length": 538.3538208007812,
      "completions/mean_terminated_length": 514.3696899414062,
      "completions/min_length": 122.0,
      "completions/min_terminated_length": 122.0,
      "epoch": 3.4386118401866432,
      "grad_norm": 0.1455078125,
      "learning_rate": 1e-06,
      "loss": 0.0112,
      "num_tokens": 214004607.0,
      "reward": 0.6439732313156128,
      "reward_std": 0.2364267110824585,
      "rewards/verify_math_reward/mean": 0.6439732313156128,
      "rewards/verify_math_reward/std": 0.47909072041511536,
      "step": 368
    },
    {
      "clip_ratio/high_max": 0.0015956845563778188,
      "clip_ratio/high_mean": 0.0004438259143171308,
      "clip_ratio/low_mean": 0.00036947598960068717,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008133019109664019,
      "completions/clipped_ratio": 0.0234375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2603.0,
      "completions/mean_length": 623.9017944335938,
      "completions/mean_terminated_length": 540.5714111328125,
      "completions/min_length": 152.0,
      "completions/min_terminated_length": 152.0,
      "epoch": 3.447944006999125,
      "grad_norm": 0.1376953125,
      "learning_rate": 1e-06,
      "loss": -0.0055,
      "num_tokens": 214577959.0,
      "reward": 0.5011160969734192,
      "reward_std": 0.22281017899513245,
      "rewards/verify_math_reward/mean": 0.5011160969734192,
      "rewards/verify_math_reward/std": 0.5002780556678772,
      "step": 369
    },
    {
      "clip_ratio/high_max": 0.001967671241800417,
      "clip_ratio/high_mean": 0.0005917860780755291,
      "clip_ratio/low_mean": 0.0002516439379860458,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008434300088993041,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4071.0,
      "completions/mean_length": 629.9620971679688,
      "completions/mean_terminated_length": 582.9118041992188,
      "completions/min_length": 86.0,
      "completions/min_terminated_length": 86.0,
      "epoch": 3.457276173811607,
      "grad_norm": 0.1318359375,
      "learning_rate": 1e-06,
      "loss": -0.0158,
      "num_tokens": 215179117.0,
      "reward": 0.5223214626312256,
      "reward_std": 0.2123001217842102,
      "rewards/verify_math_reward/mean": 0.5223214030265808,
      "rewards/verify_math_reward/std": 0.49978047609329224,
      "step": 370
    },
    {
      "clip_ratio/high_max": 0.001820850586227607,
      "clip_ratio/high_mean": 0.0005464704081532545,
      "clip_ratio/low_mean": 0.00031654952817916637,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008630199354229262,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2399.0,
      "completions/mean_length": 567.0625,
      "completions/mean_terminated_length": 515.1076049804688,
      "completions/min_length": 7.0,
      "completions/min_terminated_length": 7.0,
      "epoch": 3.466608340624089,
      "grad_norm": 0.1416015625,
      "learning_rate": 1e-06,
      "loss": -0.0019,
      "num_tokens": 215719165.0,
      "reward": 0.6037946939468384,
      "reward_std": 0.22116298973560333,
      "rewards/verify_math_reward/mean": 0.6037946343421936,
      "rewards/verify_math_reward/std": 0.48938122391700745,
      "step": 371
    },
    {
      "clip_ratio/high_max": 0.0015019711145214387,
      "clip_ratio/high_mean": 0.0004210207941923727,
      "clip_ratio/low_mean": 0.0004358102618198245,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008568310363443743,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3960.0,
      "completions/mean_length": 615.7645263671875,
      "completions/mean_terminated_length": 556.5097045898438,
      "completions/min_length": 98.0,
      "completions/min_terminated_length": 98.0,
      "epoch": 3.4759405074365706,
      "grad_norm": 0.1357421875,
      "learning_rate": 1e-06,
      "loss": 0.0035,
      "num_tokens": 216305442.0,
      "reward": 0.5290178656578064,
      "reward_std": 0.22484782338142395,
      "rewards/verify_math_reward/mean": 0.5290178656578064,
      "rewards/verify_math_reward/std": 0.49943605065345764,
      "step": 372
    },
    {
      "clip_ratio/high_max": 0.0015328749641412287,
      "clip_ratio/high_mean": 0.0004805345110980852,
      "clip_ratio/low_mean": 0.00033163407965730585,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008121686014419538,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2962.0,
      "completions/mean_length": 620.708740234375,
      "completions/mean_terminated_length": 553.4959716796875,
      "completions/min_length": 151.0,
      "completions/min_terminated_length": 151.0,
      "epoch": 3.4852726742490523,
      "grad_norm": 0.130859375,
      "learning_rate": 1e-06,
      "loss": 0.0134,
      "num_tokens": 216881333.0,
      "reward": 0.5502232313156128,
      "reward_std": 0.2137625515460968,
      "rewards/verify_math_reward/mean": 0.5502232313156128,
      "rewards/verify_math_reward/std": 0.49774909019470215,
      "step": 373
    },
    {
      "clip_ratio/high_max": 0.0015800654764461797,
      "clip_ratio/high_mean": 0.00048514291279389,
      "clip_ratio/low_mean": 0.0002821739765295206,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007673168915971473,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3587.0,
      "completions/mean_length": 604.5245971679688,
      "completions/mean_terminated_length": 545.078369140625,
      "completions/min_length": 5.0,
      "completions/min_terminated_length": 5.0,
      "epoch": 3.494604841061534,
      "grad_norm": 0.1259765625,
      "learning_rate": 1e-06,
      "loss": 0.0043,
      "num_tokens": 217459131.0,
      "reward": 0.5234375,
      "reward_std": 0.20031329989433289,
      "rewards/verify_math_reward/mean": 0.5234375,
      "rewards/verify_math_reward/std": 0.49972933530807495,
      "step": 374
    },
    {
      "clip_ratio/high_max": 0.00146641343144438,
      "clip_ratio/high_mean": 0.0004182629563729279,
      "clip_ratio/low_mean": 0.0002642707102040731,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006825336649853853,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3947.0,
      "completions/mean_length": 590.1908569335938,
      "completions/mean_terminated_length": 558.6069946289062,
      "completions/min_length": 114.0,
      "completions/min_terminated_length": 114.0,
      "epoch": 3.5039370078740157,
      "grad_norm": 0.1064453125,
      "learning_rate": 1e-06,
      "loss": -0.0007,
      "num_tokens": 218033150.0,
      "reward": 0.5078125,
      "reward_std": 0.1702958196401596,
      "rewards/verify_math_reward/mean": 0.5078125,
      "rewards/verify_math_reward/std": 0.5002182126045227,
      "step": 375
    },
    {
      "clip_ratio/high_max": 0.0013207476849856903,
      "clip_ratio/high_mean": 0.00044950790788789163,
      "clip_ratio/low_mean": 0.00037110483003743866,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008206127286030096,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4013.0,
      "completions/mean_length": 641.2890625,
      "completions/mean_terminated_length": 602.296875,
      "completions/min_length": 123.0,
      "completions/min_terminated_length": 123.0,
      "epoch": 3.5132691746864975,
      "grad_norm": 0.1318359375,
      "learning_rate": 1e-06,
      "loss": 0.0065,
      "num_tokens": 218655201.0,
      "reward": 0.5535714626312256,
      "reward_std": 0.24956989288330078,
      "rewards/verify_math_reward/mean": 0.5535714030265808,
      "rewards/verify_math_reward/std": 0.4973994493484497,
      "step": 376
    },
    {
      "clip_ratio/high_max": 0.0015803857331775362,
      "clip_ratio/high_mean": 0.0004891510018296685,
      "clip_ratio/low_mean": 0.0003099080593074177,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007990590806912223,
      "completions/clipped_ratio": 0.0234375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3329.0,
      "completions/mean_length": 662.146240234375,
      "completions/mean_terminated_length": 579.7337036132812,
      "completions/min_length": 85.0,
      "completions/min_terminated_length": 85.0,
      "epoch": 3.522601341498979,
      "grad_norm": 0.12451171875,
      "learning_rate": 1e-06,
      "loss": -0.0178,
      "num_tokens": 219258764.0,
      "reward": 0.5301339626312256,
      "reward_std": 0.2094796746969223,
      "rewards/verify_math_reward/mean": 0.5301339030265808,
      "rewards/verify_math_reward/std": 0.49936985969543457,
      "step": 377
    },
    {
      "clip_ratio/high_max": 0.0017390277644153684,
      "clip_ratio/high_mean": 0.0004640953416128468,
      "clip_ratio/low_mean": 0.00038881219870745554,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.00085290753213485,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4024.0,
      "completions/mean_length": 633.2020263671875,
      "completions/mean_terminated_length": 566.23095703125,
      "completions/min_length": 115.0,
      "completions/min_terminated_length": 115.0,
      "epoch": 3.531933508311461,
      "grad_norm": 0.1279296875,
      "learning_rate": 1e-06,
      "loss": 0.0114,
      "num_tokens": 219851337.0,
      "reward": 0.486607164144516,
      "reward_std": 0.2292456030845642,
      "rewards/verify_math_reward/mean": 0.4866071343421936,
      "rewards/verify_math_reward/std": 0.500099778175354,
      "step": 378
    },
    {
      "clip_ratio/high_max": 0.0017334263684460893,
      "clip_ratio/high_mean": 0.0005789322231066762,
      "clip_ratio/low_mean": 0.0003493214683203405,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009282536930186325,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3009.0,
      "completions/mean_length": 622.6808471679688,
      "completions/mean_terminated_length": 559.529541015625,
      "completions/min_length": 50.0,
      "completions/min_terminated_length": 50.0,
      "epoch": 3.5412656751239426,
      "grad_norm": 0.140625,
      "learning_rate": 1e-06,
      "loss": -0.0052,
      "num_tokens": 220428363.0,
      "reward": 0.5625,
      "reward_std": 0.2352944165468216,
      "rewards/verify_math_reward/mean": 0.5625,
      "rewards/verify_math_reward/std": 0.49635544419288635,
      "step": 379
    },
    {
      "clip_ratio/high_max": 0.0017076302010536892,
      "clip_ratio/high_mean": 0.0005019646705477498,
      "clip_ratio/low_mean": 0.00030041423542570556,
      "clip_ratio/low_min": 2.3854961909819394e-05,
      "clip_ratio/region_mean": 0.0008023789068829501,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2646.0,
      "completions/mean_length": 595.015625,
      "completions/mean_terminated_length": 555.5011596679688,
      "completions/min_length": 106.0,
      "completions/min_terminated_length": 106.0,
      "epoch": 3.5505978419364244,
      "grad_norm": 0.140625,
      "learning_rate": 1e-06,
      "loss": 0.0008,
      "num_tokens": 221020617.0,
      "reward": 0.5368303656578064,
      "reward_std": 0.22045326232910156,
      "rewards/verify_math_reward/mean": 0.5368303656578064,
      "rewards/verify_math_reward/std": 0.49892017245292664,
      "step": 380
    },
    {
      "clip_ratio/high_max": 0.0017493419800302945,
      "clip_ratio/high_mean": 0.0005218662131483143,
      "clip_ratio/low_mean": 0.000359154598868372,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000881020800989063,
      "completions/clipped_ratio": 0.0078125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3898.0,
      "completions/mean_length": 558.4017944335938,
      "completions/mean_terminated_length": 530.5466918945312,
      "completions/min_length": 111.0,
      "completions/min_terminated_length": 111.0,
      "epoch": 3.5599300087489065,
      "grad_norm": 0.1416015625,
      "learning_rate": 1e-06,
      "loss": 0.0239,
      "num_tokens": 221575505.0,
      "reward": 0.5915178656578064,
      "reward_std": 0.24036367237567902,
      "rewards/verify_math_reward/mean": 0.5915178656578064,
      "rewards/verify_math_reward/std": 0.49182769656181335,
      "step": 381
    },
    {
      "clip_ratio/high_max": 0.0020407741503731813,
      "clip_ratio/high_mean": 0.000637064392776665,
      "clip_ratio/low_mean": 0.0004148168359279225,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001051881230523577,
      "completions/clipped_ratio": 0.0078125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2145.0,
      "completions/mean_length": 565.5245971679688,
      "completions/mean_terminated_length": 537.7255859375,
      "completions/min_length": 108.0,
      "completions/min_terminated_length": 108.0,
      "epoch": 3.5692621755613883,
      "grad_norm": 0.19921875,
      "learning_rate": 1e-06,
      "loss": 0.0002,
      "num_tokens": 222144375.0,
      "reward": 0.5412946939468384,
      "reward_std": 0.257803350687027,
      "rewards/verify_math_reward/mean": 0.5412946343421936,
      "rewards/verify_math_reward/std": 0.49857014417648315,
      "step": 382
    },
    {
      "clip_ratio/high_max": 0.0013758735576629988,
      "clip_ratio/high_mean": 0.0004422849675620455,
      "clip_ratio/low_mean": 0.00032788527369120857,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007701702406848199,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2782.0,
      "completions/mean_length": 594.3627319335938,
      "completions/mean_terminated_length": 534.7434692382812,
      "completions/min_length": 11.0,
      "completions/min_terminated_length": 11.0,
      "epoch": 3.57859434237387,
      "grad_norm": 0.1396484375,
      "learning_rate": 1e-06,
      "loss": -0.0094,
      "num_tokens": 222711492.0,
      "reward": 0.5345982313156128,
      "reward_std": 0.22773607075214386,
      "rewards/verify_math_reward/mean": 0.5345982313156128,
      "rewards/verify_math_reward/std": 0.4990801215171814,
      "step": 383
    },
    {
      "clip_ratio/high_max": 0.0015417123049701331,
      "clip_ratio/high_mean": 0.0005001279358793909,
      "clip_ratio/low_mean": 0.00032574273166119383,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008258706702690688,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3672.0,
      "completions/mean_length": 587.8560791015625,
      "completions/mean_terminated_length": 528.1260375976562,
      "completions/min_length": 30.0,
      "completions/min_terminated_length": 30.0,
      "epoch": 3.5879265091863517,
      "grad_norm": 0.142578125,
      "learning_rate": 1e-06,
      "loss": 0.0124,
      "num_tokens": 223270291.0,
      "reward": 0.5502232313156128,
      "reward_std": 0.24626091122627258,
      "rewards/verify_math_reward/mean": 0.5502232313156128,
      "rewards/verify_math_reward/std": 0.49774909019470215,
      "step": 384
    },
    {
      "clip_ratio/high_max": 0.0015141884978220332,
      "clip_ratio/high_mean": 0.00044295220095591503,
      "clip_ratio/low_mean": 0.00036933203512035107,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008122842345983372,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2543.0,
      "completions/mean_length": 574.6607666015625,
      "completions/mean_terminated_length": 510.6363525390625,
      "completions/min_length": 116.0,
      "completions/min_terminated_length": 116.0,
      "epoch": 3.5972586759988334,
      "grad_norm": 0.1337890625,
      "learning_rate": 1e-06,
      "loss": 0.0034,
      "num_tokens": 223812923.0,
      "reward": 0.5412946939468384,
      "reward_std": 0.1931735873222351,
      "rewards/verify_math_reward/mean": 0.5412946343421936,
      "rewards/verify_math_reward/std": 0.49857014417648315,
      "step": 385
    },
    {
      "clip_ratio/high_max": 0.0015524083719355986,
      "clip_ratio/high_mean": 0.0005091902276035398,
      "clip_ratio/low_mean": 0.0003068782958735028,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008160685219991137,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3821.0,
      "completions/mean_length": 616.1517944335938,
      "completions/mean_terminated_length": 572.8994750976562,
      "completions/min_length": 102.0,
      "completions/min_terminated_length": 102.0,
      "epoch": 3.606590842811315,
      "grad_norm": 0.130859375,
      "learning_rate": 1e-06,
      "loss": 0.0039,
      "num_tokens": 224403227.0,
      "reward": 0.515625,
      "reward_std": 0.22012129426002502,
      "rewards/verify_math_reward/mean": 0.515625,
      "rewards/verify_math_reward/std": 0.5000349283218384,
      "step": 386
    },
    {
      "clip_ratio/high_max": 0.0015967811632435769,
      "clip_ratio/high_mean": 0.000492642038352642,
      "clip_ratio/low_mean": 0.00031995178494526044,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008125938293233048,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2418.0,
      "completions/mean_length": 624.2824096679688,
      "completions/mean_terminated_length": 569.1757202148438,
      "completions/min_length": 160.0,
      "completions/min_terminated_length": 160.0,
      "epoch": 3.615923009623797,
      "grad_norm": 0.11865234375,
      "learning_rate": 1e-06,
      "loss": -0.0015,
      "num_tokens": 224995624.0,
      "reward": 0.5546875,
      "reward_std": 0.21925367414951324,
      "rewards/verify_math_reward/mean": 0.5546875,
      "rewards/verify_math_reward/std": 0.4972778558731079,
      "step": 387
    },
    {
      "clip_ratio/high_max": 0.0016285891024381272,
      "clip_ratio/high_mean": 0.0005083081557586411,
      "clip_ratio/low_mean": 0.00033271119912114955,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008410193522649934,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2419.0,
      "completions/mean_length": 615.1964721679688,
      "completions/mean_terminated_length": 555.9319458007812,
      "completions/min_length": 138.0,
      "completions/min_terminated_length": 138.0,
      "epoch": 3.625255176436279,
      "grad_norm": 0.1357421875,
      "learning_rate": 1e-06,
      "loss": -0.0039,
      "num_tokens": 225574736.0,
      "reward": 0.5234375,
      "reward_std": 0.23361219465732574,
      "rewards/verify_math_reward/mean": 0.5234375,
      "rewards/verify_math_reward/std": 0.49972933530807495,
      "step": 388
    },
    {
      "clip_ratio/high_max": 0.0016358569091607933,
      "clip_ratio/high_mean": 0.000530800161186562,
      "clip_ratio/low_mean": 0.0003404246615446027,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008712248186384386,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3656.0,
      "completions/mean_length": 539.0245971679688,
      "completions/mean_terminated_length": 498.87811279296875,
      "completions/min_length": 16.0,
      "completions/min_terminated_length": 16.0,
      "epoch": 3.6345873432487608,
      "grad_norm": 0.1396484375,
      "learning_rate": 1e-06,
      "loss": 0.014,
      "num_tokens": 226106062.0,
      "reward": 0.5926339626312256,
      "reward_std": 0.22236622869968414,
      "rewards/verify_math_reward/mean": 0.5926339030265808,
      "rewards/verify_math_reward/std": 0.49161845445632935,
      "step": 389
    },
    {
      "clip_ratio/high_max": 0.001320096257586556,
      "clip_ratio/high_mean": 0.0004383282646358566,
      "clip_ratio/low_mean": 0.0002619372946810472,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007002655456744833,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1914.0,
      "completions/mean_length": 628.404052734375,
      "completions/mean_terminated_length": 573.36279296875,
      "completions/min_length": 141.0,
      "completions/min_terminated_length": 141.0,
      "epoch": 3.6439195100612425,
      "grad_norm": 0.1142578125,
      "learning_rate": 1e-06,
      "loss": 0.0002,
      "num_tokens": 226692000.0,
      "reward": 0.4754464626312256,
      "reward_std": 0.21357779204845428,
      "rewards/verify_math_reward/mean": 0.4754464328289032,
      "rewards/verify_math_reward/std": 0.4996756613254547,
      "step": 390
    },
    {
      "clip_ratio/high_max": 0.0016108330692077288,
      "clip_ratio/high_mean": 0.0004955009296736534,
      "clip_ratio/low_mean": 0.00030426281182371895,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007997637512744404,
      "completions/clipped_ratio": 0.021205357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2909.0,
      "completions/mean_length": 626.8326416015625,
      "completions/mean_terminated_length": 551.6738891601562,
      "completions/min_length": 62.0,
      "completions/min_terminated_length": 62.0,
      "epoch": 3.653251676873724,
      "grad_norm": 0.134765625,
      "learning_rate": 1e-06,
      "loss": -0.012,
      "num_tokens": 227262362.0,
      "reward": 0.5870535969734192,
      "reward_std": 0.2295461893081665,
      "rewards/verify_math_reward/mean": 0.5870535969734192,
      "rewards/verify_math_reward/std": 0.49263834953308105,
      "step": 391
    },
    {
      "clip_ratio/high_max": 0.0015622963219357189,
      "clip_ratio/high_mean": 0.000444982869339583,
      "clip_ratio/low_mean": 0.00027761796604863775,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.00072260084152731,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2569.0,
      "completions/mean_length": 629.1897583007812,
      "completions/mean_terminated_length": 570.1634521484375,
      "completions/min_length": 33.0,
      "completions/min_terminated_length": 33.0,
      "epoch": 3.662583843686206,
      "grad_norm": 0.1337890625,
      "learning_rate": 1e-06,
      "loss": -0.0143,
      "num_tokens": 227863660.0,
      "reward": 0.5133928656578064,
      "reward_std": 0.18840178847312927,
      "rewards/verify_math_reward/mean": 0.5133928656578064,
      "rewards/verify_math_reward/std": 0.500099778175354,
      "step": 392
    },
    {
      "clip_ratio/high_max": 0.001648791678235284,
      "clip_ratio/high_mean": 0.00042685270636866335,
      "clip_ratio/low_mean": 0.00025807614952100266,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006849288547527976,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3245.0,
      "completions/mean_length": 567.2902221679688,
      "completions/mean_terminated_length": 527.4627685546875,
      "completions/min_length": 114.0,
      "completions/min_terminated_length": 114.0,
      "epoch": 3.6719160104986877,
      "grad_norm": 0.12158203125,
      "learning_rate": 1e-06,
      "loss": 0.0124,
      "num_tokens": 228426176.0,
      "reward": 0.621651828289032,
      "reward_std": 0.19114413857460022,
      "rewards/verify_math_reward/mean": 0.6216517686843872,
      "rewards/verify_math_reward/std": 0.4852459728717804,
      "step": 393
    },
    {
      "clip_ratio/high_max": 0.0016169582395377802,
      "clip_ratio/high_mean": 0.0004791138062500977,
      "clip_ratio/low_mean": 0.00034066148032252386,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000819775290437974,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3942.0,
      "completions/mean_length": 626.880615234375,
      "completions/mean_terminated_length": 575.8063354492188,
      "completions/min_length": 139.0,
      "completions/min_terminated_length": 139.0,
      "epoch": 3.6812481773111694,
      "grad_norm": 0.134765625,
      "learning_rate": 1e-06,
      "loss": -0.0062,
      "num_tokens": 229027141.0,
      "reward": 0.5379464626312256,
      "reward_std": 0.21989238262176514,
      "rewards/verify_math_reward/mean": 0.5379464030265808,
      "rewards/verify_math_reward/std": 0.4988364279270172,
      "step": 394
    },
    {
      "clip_ratio/high_max": 0.0017965815895877313,
      "clip_ratio/high_mean": 0.00053020203131382,
      "clip_ratio/low_mean": 0.000366269854453094,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008964718890638324,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3633.0,
      "completions/mean_length": 607.7232666015625,
      "completions/mean_terminated_length": 560.37109375,
      "completions/min_length": 96.0,
      "completions/min_terminated_length": 96.0,
      "epoch": 3.690580344123651,
      "grad_norm": 0.12109375,
      "learning_rate": 1e-06,
      "loss": 0.0107,
      "num_tokens": 229606733.0,
      "reward": 0.5133928656578064,
      "reward_std": 0.20493288338184357,
      "rewards/verify_math_reward/mean": 0.5133928656578064,
      "rewards/verify_math_reward/std": 0.500099778175354,
      "step": 395
    },
    {
      "clip_ratio/high_max": 0.0014480613535852171,
      "clip_ratio/high_mean": 0.0004475641899261973,
      "clip_ratio/low_mean": 0.00039229245521710254,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008398566287723952,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3653.0,
      "completions/mean_length": 585.700927734375,
      "completions/mean_terminated_length": 534.0203857421875,
      "completions/min_length": 161.0,
      "completions/min_terminated_length": 161.0,
      "epoch": 3.699912510936133,
      "grad_norm": 0.134765625,
      "learning_rate": 1e-06,
      "loss": -0.002,
      "num_tokens": 230167217.0,
      "reward": 0.566964328289032,
      "reward_std": 0.2270306646823883,
      "rewards/verify_math_reward/mean": 0.5669642686843872,
      "rewards/verify_math_reward/std": 0.49577224254608154,
      "step": 396
    },
    {
      "clip_ratio/high_max": 0.0014812636945862323,
      "clip_ratio/high_mean": 0.00048173480990953976,
      "clip_ratio/low_mean": 0.0003426109453812387,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008243457532444154,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2302.0,
      "completions/mean_length": 560.0045166015625,
      "completions/mean_terminated_length": 516.0542602539062,
      "completions/min_length": 85.0,
      "completions/min_terminated_length": 85.0,
      "epoch": 3.7092446777486145,
      "grad_norm": 0.1376953125,
      "learning_rate": 1e-06,
      "loss": 0.0075,
      "num_tokens": 230710285.0,
      "reward": 0.578125,
      "reward_std": 0.22939532995224,
      "rewards/verify_math_reward/mean": 0.578125,
      "rewards/verify_math_reward/std": 0.4941346049308777,
      "step": 397
    },
    {
      "clip_ratio/high_max": 0.0014677215967822121,
      "clip_ratio/high_mean": 0.0004499109904827492,
      "clip_ratio/low_mean": 0.00034677758276302484,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007966885686983005,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2903.0,
      "completions/mean_length": 656.2310791015625,
      "completions/mean_terminated_length": 577.6974487304688,
      "completions/min_length": 69.0,
      "completions/min_terminated_length": 69.0,
      "epoch": 3.7185768445610963,
      "grad_norm": 0.125,
      "learning_rate": 1e-06,
      "loss": -0.0014,
      "num_tokens": 231306964.0,
      "reward": 0.494419664144516,
      "reward_std": 0.22244179248809814,
      "rewards/verify_math_reward/mean": 0.4944196343421936,
      "rewards/verify_math_reward/std": 0.5002480745315552,
      "step": 398
    },
    {
      "clip_ratio/high_max": 0.001415038406776148,
      "clip_ratio/high_mean": 0.0004363909156381851,
      "clip_ratio/low_mean": 0.00031491438573993946,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007513053005823167,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2049.0,
      "completions/mean_length": 545.9955444335938,
      "completions/mean_terminated_length": 509.97515869140625,
      "completions/min_length": 4.0,
      "completions/min_terminated_length": 4.0,
      "epoch": 3.7279090113735784,
      "grad_norm": 0.1279296875,
      "learning_rate": 1e-06,
      "loss": 0.0086,
      "num_tokens": 231839504.0,
      "reward": 0.6261160969734192,
      "reward_std": 0.19663412868976593,
      "rewards/verify_math_reward/mean": 0.6261160969734192,
      "rewards/verify_math_reward/std": 0.48410359025001526,
      "step": 399
    },
    {
      "clip_ratio/high_max": 0.0020534365648927633,
      "clip_ratio/high_mean": 0.0006376208657457028,
      "clip_ratio/low_mean": 0.00036247534490030375,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010000962010963121,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3426.0,
      "completions/mean_length": 612.484375,
      "completions/mean_terminated_length": 577.138671875,
      "completions/min_length": 124.0,
      "completions/min_terminated_length": 124.0,
      "epoch": 3.73724117818606,
      "grad_norm": 0.14453125,
      "learning_rate": 1e-06,
      "loss": 0.0101,
      "num_tokens": 232431410.0,
      "reward": 0.5814732313156128,
      "reward_std": 0.2620042860507965,
      "rewards/verify_math_reward/mean": 0.5814732313156128,
      "rewards/verify_math_reward/std": 0.4935930073261261,
      "step": 400
    },
    {
      "clip_ratio/high_max": 0.0014936348379706033,
      "clip_ratio/high_mean": 0.0003989423350958532,
      "clip_ratio/low_mean": 0.0002663618034830506,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006653041418758221,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3471.0,
      "completions/mean_length": 607.7154541015625,
      "completions/mean_terminated_length": 560.3631591796875,
      "completions/min_length": 150.0,
      "completions/min_terminated_length": 150.0,
      "epoch": 3.746573344998542,
      "grad_norm": 0.12890625,
      "learning_rate": 1e-06,
      "loss": 0.0035,
      "num_tokens": 233008435.0,
      "reward": 0.5959821939468384,
      "reward_std": 0.21324008703231812,
      "rewards/verify_math_reward/mean": 0.5959821343421936,
      "rewards/verify_math_reward/std": 0.490975022315979,
      "step": 401
    },
    {
      "clip_ratio/high_max": 0.0015859054765314795,
      "clip_ratio/high_mean": 0.0004815630691155093,
      "clip_ratio/low_mean": 0.0003511838276608614,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008327468967763707,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2521.0,
      "completions/mean_length": 609.8292846679688,
      "completions/mean_terminated_length": 578.4223022460938,
      "completions/min_length": 133.0,
      "completions/min_terminated_length": 133.0,
      "epoch": 3.7559055118110236,
      "grad_norm": 0.11865234375,
      "learning_rate": 1e-06,
      "loss": 0.0078,
      "num_tokens": 233607274.0,
      "reward": 0.5691964626312256,
      "reward_std": 0.21654202044010162,
      "rewards/verify_math_reward/mean": 0.5691964030265808,
      "rewards/verify_math_reward/std": 0.4954652488231659,
      "step": 402
    },
    {
      "clip_ratio/high_max": 0.001635469800021383,
      "clip_ratio/high_mean": 0.00048614210186315177,
      "clip_ratio/low_mean": 0.0004072176566296548,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000893359760993917,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3190.0,
      "completions/mean_length": 589.989990234375,
      "completions/mean_terminated_length": 550.4187622070312,
      "completions/min_length": 51.0,
      "completions/min_terminated_length": 51.0,
      "epoch": 3.7652376786235053,
      "grad_norm": 0.140625,
      "learning_rate": 1e-06,
      "loss": 0.0123,
      "num_tokens": 234175689.0,
      "reward": 0.5334821939468384,
      "reward_std": 0.24494822323322296,
      "rewards/verify_math_reward/mean": 0.5334821343421936,
      "rewards/verify_math_reward/std": 0.49915632605552673,
      "step": 403
    },
    {
      "clip_ratio/high_max": 0.0014582817402697401,
      "clip_ratio/high_mean": 0.00039351377563434653,
      "clip_ratio/low_mean": 0.0002876310138617555,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006811447938162019,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3502.0,
      "completions/mean_length": 620.8973388671875,
      "completions/mean_terminated_length": 565.7369995117188,
      "completions/min_length": 124.0,
      "completions/min_terminated_length": 124.0,
      "epoch": 3.774569845435987,
      "grad_norm": 0.11376953125,
      "learning_rate": 1e-06,
      "loss": -0.0116,
      "num_tokens": 234759253.0,
      "reward": 0.6004464626312256,
      "reward_std": 0.18690545856952667,
      "rewards/verify_math_reward/mean": 0.6004464030265808,
      "rewards/verify_math_reward/std": 0.49008017778396606,
      "step": 404
    },
    {
      "clip_ratio/high_max": 0.0019488130801619263,
      "clip_ratio/high_mean": 0.0006539549137869471,
      "clip_ratio/low_mean": 0.00035105997562823177,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010050149021481047,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4026.0,
      "completions/mean_length": 625.8449096679688,
      "completions/mean_terminated_length": 566.7616577148438,
      "completions/min_length": 131.0,
      "completions/min_terminated_length": 131.0,
      "epoch": 3.783902012248469,
      "grad_norm": 0.1279296875,
      "learning_rate": 1e-06,
      "loss": -0.0019,
      "num_tokens": 235359882.0,
      "reward": 0.566964328289032,
      "reward_std": 0.23953881859779358,
      "rewards/verify_math_reward/mean": 0.5669642686843872,
      "rewards/verify_math_reward/std": 0.49577224254608154,
      "step": 405
    },
    {
      "clip_ratio/high_max": 0.0019392261774555664,
      "clip_ratio/high_mean": 0.0005943076939729508,
      "clip_ratio/low_mean": 0.0002923669904930648,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000886674691173539,
      "completions/clipped_ratio": 0.0200892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3921.0,
      "completions/mean_length": 620.9442138671875,
      "completions/mean_terminated_length": 549.7015991210938,
      "completions/min_length": 99.0,
      "completions/min_terminated_length": 99.0,
      "epoch": 3.793234179060951,
      "grad_norm": 0.1494140625,
      "learning_rate": 1e-06,
      "loss": -0.0238,
      "num_tokens": 235922384.0,
      "reward": 0.5323660969734192,
      "reward_std": 0.2530961036682129,
      "rewards/verify_math_reward/mean": 0.5323660969734192,
      "rewards/verify_math_reward/std": 0.4992299973964691,
      "step": 406
    },
    {
      "clip_ratio/high_max": 0.001731750751787331,
      "clip_ratio/high_mean": 0.0006225847790801708,
      "clip_ratio/low_mean": 0.00038206763542802946,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010046524057543138,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3661.0,
      "completions/mean_length": 601.0435791015625,
      "completions/mean_terminated_length": 545.5680541992188,
      "completions/min_length": 3.0,
      "completions/min_terminated_length": 3.0,
      "epoch": 3.8025663458734327,
      "grad_norm": 0.1376953125,
      "learning_rate": 1e-06,
      "loss": -0.0022,
      "num_tokens": 236485407.0,
      "reward": 0.5837053656578064,
      "reward_std": 0.24588504433631897,
      "rewards/verify_math_reward/mean": 0.5837053656578064,
      "rewards/verify_math_reward/std": 0.49321895837783813,
      "step": 407
    },
    {
      "clip_ratio/high_max": 0.0017494614930910757,
      "clip_ratio/high_mean": 0.0005477968547893397,
      "clip_ratio/low_mean": 0.00029243565404613037,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000840232525661122,
      "completions/clipped_ratio": 0.0078125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3526.0,
      "completions/mean_length": 553.638427734375,
      "completions/mean_terminated_length": 525.7457885742188,
      "completions/min_length": 20.0,
      "completions/min_terminated_length": 20.0,
      "epoch": 3.8118985126859144,
      "grad_norm": 0.1328125,
      "learning_rate": 1e-06,
      "loss": -0.0089,
      "num_tokens": 237031867.0,
      "reward": 0.5770089626312256,
      "reward_std": 0.21098490059375763,
      "rewards/verify_math_reward/mean": 0.5770089030265808,
      "rewards/verify_math_reward/std": 0.4943099319934845,
      "step": 408
    },
    {
      "clip_ratio/high_max": 0.001624859107323573,
      "clip_ratio/high_mean": 0.00047867421790215303,
      "clip_ratio/low_mean": 0.0002843144603730252,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000762988667247555,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3868.0,
      "completions/mean_length": 675.4029541015625,
      "completions/mean_terminated_length": 621.1077270507812,
      "completions/min_length": 132.0,
      "completions/min_terminated_length": 132.0,
      "epoch": 3.821230679498396,
      "grad_norm": 0.1259765625,
      "learning_rate": 1e-06,
      "loss": 0.0004,
      "num_tokens": 237666148.0,
      "reward": 0.4821428656578064,
      "reward_std": 0.22394628822803497,
      "rewards/verify_math_reward/mean": 0.4821428656578064,
      "rewards/verify_math_reward/std": 0.4999600946903229,
      "step": 409
    },
    {
      "clip_ratio/high_max": 0.0013347278263609041,
      "clip_ratio/high_mean": 0.0003814470600218556,
      "clip_ratio/low_mean": 0.00021836383575646323,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0005998109022584686,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3994.0,
      "completions/mean_length": 638.505615234375,
      "completions/mean_terminated_length": 587.6024780273438,
      "completions/min_length": 69.0,
      "completions/min_terminated_length": 69.0,
      "epoch": 3.830562846310878,
      "grad_norm": 0.12060546875,
      "learning_rate": 1e-06,
      "loss": 0.002,
      "num_tokens": 238276521.0,
      "reward": 0.5078125,
      "reward_std": 0.17033155262470245,
      "rewards/verify_math_reward/mean": 0.5078125,
      "rewards/verify_math_reward/std": 0.5002182126045227,
      "step": 410
    },
    {
      "clip_ratio/high_max": 0.0016647904694764293,
      "clip_ratio/high_mean": 0.00046309007188938267,
      "clip_ratio/low_mean": 0.0002389593830685044,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007020494558673818,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3988.0,
      "completions/mean_length": 591.6506958007812,
      "completions/mean_terminated_length": 540.0577392578125,
      "completions/min_length": 83.0,
      "completions/min_terminated_length": 83.0,
      "epoch": 3.8398950131233596,
      "grad_norm": 0.126953125,
      "learning_rate": 1e-06,
      "loss": -0.0117,
      "num_tokens": 238838208.0,
      "reward": 0.566964328289032,
      "reward_std": 0.18570081889629364,
      "rewards/verify_math_reward/mean": 0.5669642686843872,
      "rewards/verify_math_reward/std": 0.49577224254608154,
      "step": 411
    },
    {
      "clip_ratio/high_max": 0.001612948665751901,
      "clip_ratio/high_mean": 0.0005077806624740333,
      "clip_ratio/low_mean": 0.000302331301668346,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008101119651655608,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2635.0,
      "completions/mean_length": 601.8460083007812,
      "completions/mean_terminated_length": 546.3832397460938,
      "completions/min_length": 115.0,
      "completions/min_terminated_length": 115.0,
      "epoch": 3.8492271799358413,
      "grad_norm": 0.1201171875,
      "learning_rate": 1e-06,
      "loss": 0.0111,
      "num_tokens": 239404694.0,
      "reward": 0.5613839626312256,
      "reward_std": 0.2007237821817398,
      "rewards/verify_math_reward/mean": 0.5613839030265808,
      "rewards/verify_math_reward/std": 0.496494859457016,
      "step": 412
    },
    {
      "clip_ratio/high_max": 0.0017465831315348623,
      "clip_ratio/high_mean": 0.0005380814523050503,
      "clip_ratio/low_mean": 0.00022690568289363,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007649871331523173,
      "completions/clipped_ratio": 0.0078125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3951.0,
      "completions/mean_length": 529.2221069335938,
      "completions/mean_terminated_length": 501.13726806640625,
      "completions/min_length": 135.0,
      "completions/min_terminated_length": 135.0,
      "epoch": 3.858559346748323,
      "grad_norm": 0.1435546875,
      "learning_rate": 1e-06,
      "loss": 0.0124,
      "num_tokens": 239929485.0,
      "reward": 0.6540178656578064,
      "reward_std": 0.2108679562807083,
      "rewards/verify_math_reward/mean": 0.6540178656578064,
      "rewards/verify_math_reward/std": 0.4759531021118164,
      "step": 413
    },
    {
      "clip_ratio/high_max": 0.0016045059946918627,
      "clip_ratio/high_mean": 0.00044197996794537175,
      "clip_ratio/low_mean": 0.00037812140465121047,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008201013843063265,
      "completions/clipped_ratio": 0.006696428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3245.0,
      "completions/mean_length": 589.1328125,
      "completions/mean_terminated_length": 565.4910278320312,
      "completions/min_length": 112.0,
      "completions/min_terminated_length": 112.0,
      "epoch": 3.8678915135608047,
      "grad_norm": 0.1318359375,
      "learning_rate": 1e-06,
      "loss": 0.0096,
      "num_tokens": 240528444.0,
      "reward": 0.574776828289032,
      "reward_std": 0.22913500666618347,
      "rewards/verify_math_reward/mean": 0.5747767686843872,
      "rewards/verify_math_reward/std": 0.49465295672416687,
      "step": 414
    },
    {
      "clip_ratio/high_max": 0.0019203094470867654,
      "clip_ratio/high_mean": 0.0005664961618094821,
      "clip_ratio/low_mean": 0.0003205215285788654,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000887017688000924,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2075.0,
      "completions/mean_length": 554.265625,
      "completions/mean_terminated_length": 506.18780517578125,
      "completions/min_length": 61.0,
      "completions/min_terminated_length": 61.0,
      "epoch": 3.8772236803732865,
      "grad_norm": 0.13671875,
      "learning_rate": 1e-06,
      "loss": 0.0008,
      "num_tokens": 241059874.0,
      "reward": 0.629464328289032,
      "reward_std": 0.22195394337177277,
      "rewards/verify_math_reward/mean": 0.6294642686843872,
      "rewards/verify_math_reward/std": 0.4832179844379425,
      "step": 415
    },
    {
      "clip_ratio/high_max": 0.0016401830325776245,
      "clip_ratio/high_mean": 0.0004495620095212871,
      "clip_ratio/low_mean": 0.0003676475903375831,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000817209598608315,
      "completions/clipped_ratio": 0.0200892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3368.0,
      "completions/mean_length": 658.3069458007812,
      "completions/mean_terminated_length": 587.830322265625,
      "completions/min_length": 169.0,
      "completions/min_terminated_length": 169.0,
      "epoch": 3.886555847185768,
      "grad_norm": 0.1240234375,
      "learning_rate": 1e-06,
      "loss": 0.0003,
      "num_tokens": 241672093.0,
      "reward": 0.5066964626312256,
      "reward_std": 0.21293838322162628,
      "rewards/verify_math_reward/mean": 0.5066964030265808,
      "rewards/verify_math_reward/std": 0.5002344250679016,
      "step": 416
    },
    {
      "clip_ratio/high_max": 0.0015579139508190565,
      "clip_ratio/high_mean": 0.0003916371828154297,
      "clip_ratio/low_mean": 0.0003195531555775233,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007111903305485612,
      "completions/clipped_ratio": 0.021205357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2643.0,
      "completions/mean_length": 632.763427734375,
      "completions/mean_terminated_length": 557.733154296875,
      "completions/min_length": 47.0,
      "completions/min_terminated_length": 47.0,
      "epoch": 3.8958880139982504,
      "grad_norm": 0.1279296875,
      "learning_rate": 1e-06,
      "loss": -0.0119,
      "num_tokens": 242254433.0,
      "reward": 0.543526828289032,
      "reward_std": 0.19971348345279694,
      "rewards/verify_math_reward/mean": 0.5435267686843872,
      "rewards/verify_math_reward/std": 0.49838000535964966,
      "step": 417
    },
    {
      "clip_ratio/high_max": 0.001751403093294357,
      "clip_ratio/high_mean": 0.0005641086927425931,
      "clip_ratio/low_mean": 0.00022720499475781253,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007913136905699503,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2345.0,
      "completions/mean_length": 567.3192138671875,
      "completions/mean_terminated_length": 527.4921264648438,
      "completions/min_length": 130.0,
      "completions/min_terminated_length": 130.0,
      "epoch": 3.905220180810732,
      "grad_norm": 0.126953125,
      "learning_rate": 1e-06,
      "loss": 0.0059,
      "num_tokens": 242813303.0,
      "reward": 0.5792410969734192,
      "reward_std": 0.21053095161914825,
      "rewards/verify_math_reward/mean": 0.5792410969734192,
      "rewards/verify_math_reward/std": 0.49395665526390076,
      "step": 418
    },
    {
      "clip_ratio/high_max": 0.0013893151572119677,
      "clip_ratio/high_mean": 0.0004139341785958095,
      "clip_ratio/low_mean": 0.0003862174690993925,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008001516480362625,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3950.0,
      "completions/mean_length": 672.1395263671875,
      "completions/mean_terminated_length": 593.9691772460938,
      "completions/min_length": 105.0,
      "completions/min_terminated_length": 105.0,
      "epoch": 3.914552347623214,
      "grad_norm": 0.13671875,
      "learning_rate": 1e-06,
      "loss": 0.0163,
      "num_tokens": 243418828.0,
      "reward": 0.5245535969734192,
      "reward_std": 0.24765029549598694,
      "rewards/verify_math_reward/mean": 0.5245535969734192,
      "rewards/verify_math_reward/std": 0.4996756911277771,
      "step": 419
    },
    {
      "clip_ratio/high_max": 0.001797820483261603,
      "clip_ratio/high_mean": 0.000599380720359477,
      "clip_ratio/low_mean": 0.0003531840602590819,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009525647956252214,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2685.0,
      "completions/mean_length": 582.4788208007812,
      "completions/mean_terminated_length": 518.5965576171875,
      "completions/min_length": 60.0,
      "completions/min_terminated_length": 60.0,
      "epoch": 3.9238845144356955,
      "grad_norm": 0.146484375,
      "learning_rate": 1e-06,
      "loss": -0.0125,
      "num_tokens": 243963105.0,
      "reward": 0.5926339626312256,
      "reward_std": 0.24521991610527039,
      "rewards/verify_math_reward/mean": 0.5926339030265808,
      "rewards/verify_math_reward/std": 0.49161845445632935,
      "step": 420
    },
    {
      "clip_ratio/high_max": 0.0016470877526444383,
      "clip_ratio/high_mean": 0.0004988932628293696,
      "clip_ratio/low_mean": 0.000288941778762819,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007878350324972416,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3752.0,
      "completions/mean_length": 646.966552734375,
      "completions/mean_terminated_length": 580.2616577148438,
      "completions/min_length": 100.0,
      "completions/min_terminated_length": 100.0,
      "epoch": 3.9332166812481772,
      "grad_norm": 0.12451171875,
      "learning_rate": 1e-06,
      "loss": 0.0162,
      "num_tokens": 244566419.0,
      "reward": 0.5145089626312256,
      "reward_std": 0.2191769778728485,
      "rewards/verify_math_reward/mean": 0.5145089030265808,
      "rewards/verify_math_reward/std": 0.5000685453414917,
      "step": 421
    },
    {
      "clip_ratio/high_max": 0.0013836159505444812,
      "clip_ratio/high_mean": 0.00041446368686592905,
      "clip_ratio/low_mean": 0.00038482563741126796,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007992893088157871,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2508.0,
      "completions/mean_length": 617.4810791015625,
      "completions/mean_terminated_length": 570.2613525390625,
      "completions/min_length": 7.0,
      "completions/min_terminated_length": 7.0,
      "epoch": 3.942548848060659,
      "grad_norm": 0.1298828125,
      "learning_rate": 1e-06,
      "loss": -0.0099,
      "num_tokens": 245156538.0,
      "reward": 0.543526828289032,
      "reward_std": 0.2271055281162262,
      "rewards/verify_math_reward/mean": 0.5435267686843872,
      "rewards/verify_math_reward/std": 0.49838000535964966,
      "step": 422
    },
    {
      "clip_ratio/high_max": 0.0016540226624783827,
      "clip_ratio/high_mean": 0.0005068623531769845,
      "clip_ratio/low_mean": 0.00024002239706533146,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007468847365998954,
      "completions/clipped_ratio": 0.024553571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2556.0,
      "completions/mean_length": 639.5814819335938,
      "completions/mean_terminated_length": 552.5777587890625,
      "completions/min_length": 89.0,
      "completions/min_terminated_length": 89.0,
      "epoch": 3.9518810148731407,
      "grad_norm": 0.11865234375,
      "learning_rate": 1e-06,
      "loss": 0.0021,
      "num_tokens": 245730651.0,
      "reward": 0.5446428656578064,
      "reward_std": 0.186296746134758,
      "rewards/verify_math_reward/mean": 0.5446428656578064,
      "rewards/verify_math_reward/std": 0.4982811510562897,
      "step": 423
    },
    {
      "clip_ratio/high_max": 0.0014401540483959252,
      "clip_ratio/high_mean": 0.0004272742631883375,
      "clip_ratio/low_mean": 0.00035068887564193574,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007779631423545652,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2381.0,
      "completions/mean_length": 566.4263916015625,
      "completions/mean_terminated_length": 530.61328125,
      "completions/min_length": 64.0,
      "completions/min_terminated_length": 64.0,
      "epoch": 3.961213181685623,
      "grad_norm": 0.1318359375,
      "learning_rate": 1e-06,
      "loss": -0.0025,
      "num_tokens": 246283817.0,
      "reward": 0.5803571939468384,
      "reward_std": 0.24570778012275696,
      "rewards/verify_math_reward/mean": 0.5803571343421936,
      "rewards/verify_math_reward/std": 0.4937761425971985,
      "step": 424
    },
    {
      "clip_ratio/high_max": 0.0015108439911273308,
      "clip_ratio/high_mean": 0.000489139272644934,
      "clip_ratio/low_mean": 0.00032226459723005974,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000811403877378325,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2270.0,
      "completions/mean_length": 614.3761596679688,
      "completions/mean_terminated_length": 567.1142578125,
      "completions/min_length": 39.0,
      "completions/min_terminated_length": 39.0,
      "epoch": 3.9705453484981046,
      "grad_norm": 0.1357421875,
      "learning_rate": 1e-06,
      "loss": 0.0159,
      "num_tokens": 246873738.0,
      "reward": 0.5647321939468384,
      "reward_std": 0.23112311959266663,
      "rewards/verify_math_reward/mean": 0.5647321343421936,
      "rewards/verify_math_reward/std": 0.49606892466545105,
      "step": 425
    },
    {
      "clip_ratio/high_max": 0.0018837725056073396,
      "clip_ratio/high_mean": 0.0005286419341246074,
      "clip_ratio/low_mean": 0.0003960915551033395,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009247334883184521,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3416.0,
      "completions/mean_length": 567.536865234375,
      "completions/mean_terminated_length": 523.6802368164062,
      "completions/min_length": 144.0,
      "completions/min_terminated_length": 144.0,
      "epoch": 3.9798775153105863,
      "grad_norm": 0.14453125,
      "learning_rate": 1e-06,
      "loss": 0.0023,
      "num_tokens": 247427475.0,
      "reward": 0.5680803656578064,
      "reward_std": 0.22950975596904755,
      "rewards/verify_math_reward/mean": 0.5680803656578064,
      "rewards/verify_math_reward/std": 0.4956200122833252,
      "step": 426
    },
    {
      "clip_ratio/high_max": 0.0016168537194971577,
      "clip_ratio/high_mean": 0.000455863840670645,
      "clip_ratio/low_mean": 0.0003151222116457575,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007709860510658473,
      "completions/clipped_ratio": 0.0279017857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2253.0,
      "completions/mean_length": 644.3873291015625,
      "completions/mean_terminated_length": 545.31689453125,
      "completions/min_length": 109.0,
      "completions/min_terminated_length": 109.0,
      "epoch": 3.989209682123068,
      "grad_norm": 0.1259765625,
      "learning_rate": 1e-06,
      "loss": 0.0,
      "num_tokens": 247992814.0,
      "reward": 0.5837053656578064,
      "reward_std": 0.21294020116329193,
      "rewards/verify_math_reward/mean": 0.5837053656578064,
      "rewards/verify_math_reward/std": 0.49321892857551575,
      "step": 427
    },
    {
      "clip_ratio/high_max": 0.0016863984574229107,
      "clip_ratio/high_mean": 0.0005149487785729434,
      "clip_ratio/low_mean": 0.00029305350585673295,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008080022853391711,
      "completions/clipped_ratio": 0.011363636363636354,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2076.0,
      "completions/mean_length": 554.9915161132812,
      "completions/mean_terminated_length": 514.2902221679688,
      "completions/min_length": 155.0,
      "completions/min_terminated_length": 155.0,
      "epoch": 3.9985418489355498,
      "grad_norm": 0.126953125,
      "learning_rate": 1e-06,
      "loss": 0.0154,
      "num_tokens": 248560892.0,
      "reward": 0.5948660969734192,
      "reward_std": 0.20238234102725983,
      "rewards/verify_math_reward/mean": 0.5948660969734192,
      "rewards/verify_math_reward/std": 0.49119213223457336,
      "step": 428
    },
    {
      "clip_ratio/high_max": 0.0017036622812156565,
      "clip_ratio/high_mean": 0.000501368228924548,
      "clip_ratio/low_mean": 0.00033213314895874646,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008335013781106682,
      "completions/clipped_ratio": 0.004464285714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2681.0,
      "completions/mean_length": 559.75,
      "completions/mean_terminated_length": 543.8923950195312,
      "completions/min_length": 30.0,
      "completions/min_terminated_length": 30.0,
      "epoch": 4.009332166812482,
      "grad_norm": 0.134765625,
      "learning_rate": 1e-06,
      "loss": 0.0129,
      "num_tokens": 249140140.0,
      "reward": 0.5792410969734192,
      "reward_std": 0.21214744448661804,
      "rewards/verify_math_reward/mean": 0.5792410969734192,
      "rewards/verify_math_reward/std": 0.49395665526390076,
      "step": 429
    },
    {
      "clip_ratio/high_max": 0.0016583693195570959,
      "clip_ratio/high_mean": 0.00048098201523316675,
      "clip_ratio/low_mean": 0.00031456732381229813,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007955493456393015,
      "completions/clipped_ratio": 0.005580357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2533.0,
      "completions/mean_length": 568.6473388671875,
      "completions/mean_terminated_length": 548.85302734375,
      "completions/min_length": 122.0,
      "completions/min_terminated_length": 122.0,
      "epoch": 4.0186643336249634,
      "grad_norm": 0.1279296875,
      "learning_rate": 1e-06,
      "loss": 0.0052,
      "num_tokens": 249713904.0,
      "reward": 0.5535714626312256,
      "reward_std": 0.21654090285301208,
      "rewards/verify_math_reward/mean": 0.5535714030265808,
      "rewards/verify_math_reward/std": 0.4973994791507721,
      "step": 430
    },
    {
      "clip_ratio/high_max": 0.0015662377345506684,
      "clip_ratio/high_mean": 0.00041621800278335286,
      "clip_ratio/low_mean": 0.00026458191166511824,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006807999138800369,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3791.0,
      "completions/mean_length": 606.9453125,
      "completions/mean_terminated_length": 567.5654907226562,
      "completions/min_length": 126.0,
      "completions/min_terminated_length": 126.0,
      "epoch": 4.027996500437445,
      "grad_norm": 0.1171875,
      "learning_rate": 1e-06,
      "loss": 0.0143,
      "num_tokens": 250295159.0,
      "reward": 0.5535714626312256,
      "reward_std": 0.19129958748817444,
      "rewards/verify_math_reward/mean": 0.5535714030265808,
      "rewards/verify_math_reward/std": 0.4973995089530945,
      "step": 431
    },
    {
      "clip_ratio/high_max": 0.0017931187894646428,
      "clip_ratio/high_mean": 0.0005656477014781558,
      "clip_ratio/low_mean": 0.00038114246308396105,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009467901654716115,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4039.0,
      "completions/mean_length": 632.5859375,
      "completions/mean_terminated_length": 573.6174926757812,
      "completions/min_length": 100.0,
      "completions/min_terminated_length": 100.0,
      "epoch": 4.037328667249927,
      "grad_norm": 0.134765625,
      "learning_rate": 1e-06,
      "loss": 0.0163,
      "num_tokens": 250886500.0,
      "reward": 0.5625,
      "reward_std": 0.24784713983535767,
      "rewards/verify_math_reward/mean": 0.5625,
      "rewards/verify_math_reward/std": 0.49635544419288635,
      "step": 432
    },
    {
      "clip_ratio/high_max": 0.0014611402048103628,
      "clip_ratio/high_mean": 0.0005159039941418087,
      "clip_ratio/low_mean": 0.0003199606596808735,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008358646600754582,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2508.0,
      "completions/mean_length": 637.5491333007812,
      "completions/mean_terminated_length": 574.6681518554688,
      "completions/min_length": 136.0,
      "completions/min_terminated_length": 136.0,
      "epoch": 4.046660834062409,
      "grad_norm": 0.1201171875,
      "learning_rate": 1e-06,
      "loss": 0.0152,
      "num_tokens": 251477264.0,
      "reward": 0.5401785969734192,
      "reward_std": 0.2152363657951355,
      "rewards/verify_math_reward/mean": 0.5401785969734192,
      "rewards/verify_math_reward/std": 0.49866142868995667,
      "step": 433
    },
    {
      "clip_ratio/high_max": 0.0018375051686234656,
      "clip_ratio/high_mean": 0.0005550296677938604,
      "clip_ratio/low_mean": 0.00039655115983805445,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009515808333162568,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4068.0,
      "completions/mean_length": 661.5592041015625,
      "completions/mean_terminated_length": 583.147216796875,
      "completions/min_length": 129.0,
      "completions/min_terminated_length": 129.0,
      "epoch": 4.05599300087489,
      "grad_norm": 0.1376953125,
      "learning_rate": 1e-06,
      "loss": -0.0122,
      "num_tokens": 252087973.0,
      "reward": 0.504464328289032,
      "reward_std": 0.281840443611145,
      "rewards/verify_math_reward/mean": 0.5044642686843872,
      "rewards/verify_math_reward/std": 0.5002593398094177,
      "step": 434
    },
    {
      "clip_ratio/high_max": 0.002112425703671761,
      "clip_ratio/high_mean": 0.0006604397185583366,
      "clip_ratio/low_mean": 0.00025667889258329524,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009171186156891054,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2235.0,
      "completions/mean_length": 546.5245971679688,
      "completions/mean_terminated_length": 502.40679931640625,
      "completions/min_length": 124.0,
      "completions/min_terminated_length": 124.0,
      "epoch": 4.065325167687372,
      "grad_norm": 0.1318359375,
      "learning_rate": 1e-06,
      "loss": -0.0039,
      "num_tokens": 252616603.0,
      "reward": 0.5837053656578064,
      "reward_std": 0.208427295088768,
      "rewards/verify_math_reward/mean": 0.5837053656578064,
      "rewards/verify_math_reward/std": 0.49321895837783813,
      "step": 435
    },
    {
      "clip_ratio/high_max": 0.0015524970385740744,
      "clip_ratio/high_mean": 0.0004217042617256084,
      "clip_ratio/low_mean": 0.00025636546195073606,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000678069733112352,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4004.0,
      "completions/mean_length": 630.3504638671875,
      "completions/mean_terminated_length": 571.3439331054688,
      "completions/min_length": 131.0,
      "completions/min_terminated_length": 131.0,
      "epoch": 4.074657334499854,
      "grad_norm": 0.115234375,
      "learning_rate": 1e-06,
      "loss": 0.0052,
      "num_tokens": 253210261.0,
      "reward": 0.5993303656578064,
      "reward_std": 0.18606990575790405,
      "rewards/verify_math_reward/mean": 0.5993303656578064,
      "rewards/verify_math_reward/std": 0.49030786752700806,
      "step": 436
    },
    {
      "clip_ratio/high_max": 0.0017454762273700908,
      "clip_ratio/high_mean": 0.0005565919273067266,
      "clip_ratio/low_mean": 0.00034766365502036933,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000904255585737701,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3619.0,
      "completions/mean_length": 627.833740234375,
      "completions/mean_terminated_length": 568.7843627929688,
      "completions/min_length": 101.0,
      "completions/min_terminated_length": 101.0,
      "epoch": 4.083989501312336,
      "grad_norm": 0.1357421875,
      "learning_rate": 1e-06,
      "loss": -0.0069,
      "num_tokens": 253801016.0,
      "reward": 0.5011160969734192,
      "reward_std": 0.23149968683719635,
      "rewards/verify_math_reward/mean": 0.5011160969734192,
      "rewards/verify_math_reward/std": 0.5002779960632324,
      "step": 437
    },
    {
      "clip_ratio/high_max": 0.0015510613402511808,
      "clip_ratio/high_mean": 0.0004516644767136313,
      "clip_ratio/low_mean": 0.000284588589465784,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007362530659520417,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4074.0,
      "completions/mean_length": 638.171875,
      "completions/mean_terminated_length": 599.1444702148438,
      "completions/min_length": 138.0,
      "completions/min_terminated_length": 138.0,
      "epoch": 4.093321668124818,
      "grad_norm": 0.12890625,
      "learning_rate": 1e-06,
      "loss": 0.0048,
      "num_tokens": 254413066.0,
      "reward": 0.5,
      "reward_std": 0.226617693901062,
      "rewards/verify_math_reward/mean": 0.5,
      "rewards/verify_math_reward/std": 0.5002792477607727,
      "step": 438
    },
    {
      "clip_ratio/high_max": 0.0015267543431036756,
      "clip_ratio/high_mean": 0.0004476620911191276,
      "clip_ratio/low_mean": 0.00036250121388547996,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008101632975012762,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3829.0,
      "completions/mean_length": 663.5089721679688,
      "completions/mean_terminated_length": 609.0249633789062,
      "completions/min_length": 12.0,
      "completions/min_terminated_length": 12.0,
      "epoch": 4.1026538349373,
      "grad_norm": 0.11767578125,
      "learning_rate": 1e-06,
      "loss": 0.0021,
      "num_tokens": 255040826.0,
      "reward": 0.5223214626312256,
      "reward_std": 0.20516182482242584,
      "rewards/verify_math_reward/mean": 0.5223214030265808,
      "rewards/verify_math_reward/std": 0.49978047609329224,
      "step": 439
    },
    {
      "clip_ratio/high_max": 0.0015841341746636317,
      "clip_ratio/high_mean": 0.00048040379022040725,
      "clip_ratio/low_mean": 0.00030345916957230656,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007838629612706427,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3889.0,
      "completions/mean_length": 652.515625,
      "completions/mean_terminated_length": 601.8187866210938,
      "completions/min_length": 76.0,
      "completions/min_terminated_length": 76.0,
      "epoch": 4.111986001749782,
      "grad_norm": 0.1162109375,
      "learning_rate": 1e-06,
      "loss": 0.0137,
      "num_tokens": 255654320.0,
      "reward": 0.5078125,
      "reward_std": 0.20636852085590363,
      "rewards/verify_math_reward/mean": 0.5078125,
      "rewards/verify_math_reward/std": 0.5002182126045227,
      "step": 440
    },
    {
      "clip_ratio/high_max": 0.001609556543371582,
      "clip_ratio/high_mean": 0.00042774063945216767,
      "clip_ratio/low_mean": 0.0003944202933325869,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008221609241445549,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3666.0,
      "completions/mean_length": 633.2734375,
      "completions/mean_terminated_length": 586.2681274414062,
      "completions/min_length": 125.0,
      "completions/min_terminated_length": 125.0,
      "epoch": 4.121318168562263,
      "grad_norm": 0.1416015625,
      "learning_rate": 1e-06,
      "loss": 0.0039,
      "num_tokens": 256256301.0,
      "reward": 0.5558035969734192,
      "reward_std": 0.2284938097000122,
      "rewards/verify_math_reward/mean": 0.5558035969734192,
      "rewards/verify_math_reward/std": 0.49715372920036316,
      "step": 441
    },
    {
      "clip_ratio/high_max": 0.0014061840456633945,
      "clip_ratio/high_mean": 0.0004493843003956499,
      "clip_ratio/low_mean": 0.0002969730242057267,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007463573329005158,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2892.0,
      "completions/mean_length": 641.786865234375,
      "completions/mean_terminated_length": 586.9580688476562,
      "completions/min_length": 11.0,
      "completions/min_terminated_length": 11.0,
      "epoch": 4.130650335374745,
      "grad_norm": 0.1240234375,
      "learning_rate": 1e-06,
      "loss": 0.0091,
      "num_tokens": 256864294.0,
      "reward": 0.4921875298023224,
      "reward_std": 0.2139505296945572,
      "rewards/verify_math_reward/mean": 0.4921875,
      "rewards/verify_math_reward/std": 0.5002182126045227,
      "step": 442
    },
    {
      "clip_ratio/high_max": 0.0016358745269826613,
      "clip_ratio/high_mean": 0.0004645560889002809,
      "clip_ratio/low_mean": 0.00026109794521289587,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007256540357047925,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3155.0,
      "completions/mean_length": 637.7902221679688,
      "completions/mean_terminated_length": 570.9078369140625,
      "completions/min_length": 107.0,
      "completions/min_terminated_length": 107.0,
      "epoch": 4.139982502187227,
      "grad_norm": 0.12255859375,
      "learning_rate": 1e-06,
      "loss": -0.0083,
      "num_tokens": 257464482.0,
      "reward": 0.5580357313156128,
      "reward_std": 0.19016912579536438,
      "rewards/verify_math_reward/mean": 0.5580357313156128,
      "rewards/verify_math_reward/std": 0.49689781665802,
      "step": 443
    },
    {
      "clip_ratio/high_max": 0.0016762115683377488,
      "clip_ratio/high_mean": 0.0005131442762831284,
      "clip_ratio/low_mean": 0.0003506496645968582,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008637939467917022,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2173.0,
      "completions/mean_length": 593.4765625,
      "completions/mean_terminated_length": 533.8422241210938,
      "completions/min_length": 116.0,
      "completions/min_terminated_length": 116.0,
      "epoch": 4.1493146689997085,
      "grad_norm": 0.140625,
      "learning_rate": 1e-06,
      "loss": 0.0004,
      "num_tokens": 258031901.0,
      "reward": 0.5267857313156128,
      "reward_std": 0.23262692987918854,
      "rewards/verify_math_reward/mean": 0.5267857313156128,
      "rewards/verify_math_reward/std": 0.4995608627796173,
      "step": 444
    },
    {
      "clip_ratio/high_max": 0.0014192719418133493,
      "clip_ratio/high_mean": 0.0004401346074018875,
      "clip_ratio/low_mean": 0.0003835150885151961,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008236496951212757,
      "completions/clipped_ratio": 0.0200892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3408.0,
      "completions/mean_length": 610.4888916015625,
      "completions/mean_terminated_length": 539.0319213867188,
      "completions/min_length": 120.0,
      "completions/min_terminated_length": 120.0,
      "epoch": 4.15864683581219,
      "grad_norm": 0.1337890625,
      "learning_rate": 1e-06,
      "loss": 0.0029,
      "num_tokens": 258596179.0,
      "reward": 0.5758928656578064,
      "reward_std": 0.22518664598464966,
      "rewards/verify_math_reward/mean": 0.5758928656578064,
      "rewards/verify_math_reward/std": 0.49448272585868835,
      "step": 445
    },
    {
      "clip_ratio/high_max": 0.0015581911557092099,
      "clip_ratio/high_mean": 0.0004907202585400228,
      "clip_ratio/low_mean": 0.0003781174611958704,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008688377092767041,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2735.0,
      "completions/mean_length": 594.5167846679688,
      "completions/mean_terminated_length": 542.9660034179688,
      "completions/min_length": 71.0,
      "completions/min_terminated_length": 71.0,
      "epoch": 4.167979002624672,
      "grad_norm": 0.1435546875,
      "learning_rate": 1e-06,
      "loss": -0.0041,
      "num_tokens": 259161642.0,
      "reward": 0.5491071939468384,
      "reward_std": 0.24577559530735016,
      "rewards/verify_math_reward/mean": 0.5491071343421936,
      "rewards/verify_math_reward/std": 0.49786055088043213,
      "step": 446
    },
    {
      "clip_ratio/high_max": 0.0014534153542626882,
      "clip_ratio/high_mean": 0.0004460589500467904,
      "clip_ratio/low_mean": 0.00036021004962094594,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008062690130827832,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3412.0,
      "completions/mean_length": 657.3158569335938,
      "completions/mean_terminated_length": 590.8111572265625,
      "completions/min_length": 139.0,
      "completions/min_terminated_length": 139.0,
      "epoch": 4.177311169437154,
      "grad_norm": 0.12890625,
      "learning_rate": 1e-06,
      "loss": 0.0041,
      "num_tokens": 259771373.0,
      "reward": 0.520089328289032,
      "reward_std": 0.22277876734733582,
      "rewards/verify_math_reward/mean": 0.5200892686843872,
      "rewards/verify_math_reward/std": 0.4998753070831299,
      "step": 447
    },
    {
      "clip_ratio/high_max": 0.0016046481323428452,
      "clip_ratio/high_mean": 0.00048080523583848844,
      "clip_ratio/low_mean": 0.00036281801590121177,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008436232583335368,
      "completions/clipped_ratio": 0.005580357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3937.0,
      "completions/mean_length": 583.4933471679688,
      "completions/mean_terminated_length": 563.7822875976562,
      "completions/min_length": 137.0,
      "completions/min_terminated_length": 137.0,
      "epoch": 4.186643336249635,
      "grad_norm": 0.125,
      "learning_rate": 1e-06,
      "loss": 0.0096,
      "num_tokens": 260369807.0,
      "reward": 0.546875,
      "reward_std": 0.21391661465168,
      "rewards/verify_math_reward/mean": 0.546875,
      "rewards/verify_math_reward/std": 0.4980759024620056,
      "step": 448
    },
    {
      "clip_ratio/high_max": 0.001517039590908098,
      "clip_ratio/high_mean": 0.0004863342280714278,
      "clip_ratio/low_mean": 0.00039316169159064884,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.00087949591306824,
      "completions/clipped_ratio": 0.0234375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3586.0,
      "completions/mean_length": 651.0714721679688,
      "completions/mean_terminated_length": 568.3931274414062,
      "completions/min_length": 82.0,
      "completions/min_terminated_length": 82.0,
      "epoch": 4.195975503062117,
      "grad_norm": 0.1357421875,
      "learning_rate": 1e-06,
      "loss": 0.0118,
      "num_tokens": 260956271.0,
      "reward": 0.5189732313156128,
      "reward_std": 0.24201901257038116,
      "rewards/verify_math_reward/mean": 0.5189732313156128,
      "rewards/verify_math_reward/std": 0.49991893768310547,
      "step": 449
    },
    {
      "clip_ratio/high_max": 0.0017466909521317575,
      "clip_ratio/high_mean": 0.000555154106223199,
      "clip_ratio/low_mean": 0.00028920324893988436,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008443573506156099,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2951.0,
      "completions/mean_length": 580.5892944335938,
      "completions/mean_terminated_length": 544.919921875,
      "completions/min_length": 17.0,
      "completions/min_terminated_length": 17.0,
      "epoch": 4.205307669874599,
      "grad_norm": 0.1396484375,
      "learning_rate": 1e-06,
      "loss": -0.0079,
      "num_tokens": 261527951.0,
      "reward": 0.5658482313156128,
      "reward_std": 0.2472045123577118,
      "rewards/verify_math_reward/mean": 0.5658482313156128,
      "rewards/verify_math_reward/std": 0.49592188000679016,
      "step": 450
    },
    {
      "clip_ratio/high_max": 0.0015777427634020569,
      "clip_ratio/high_mean": 0.00042024009394481254,
      "clip_ratio/low_mean": 0.00032073673787635926,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007409768413708662,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2481.0,
      "completions/mean_length": 601.8527221679688,
      "completions/mean_terminated_length": 570.3739013671875,
      "completions/min_length": 116.0,
      "completions/min_terminated_length": 116.0,
      "epoch": 4.2146398366870805,
      "grad_norm": 0.11328125,
      "learning_rate": 1e-06,
      "loss": 0.0009,
      "num_tokens": 262125963.0,
      "reward": 0.5680803656578064,
      "reward_std": 0.19197037816047668,
      "rewards/verify_math_reward/mean": 0.5680803656578064,
      "rewards/verify_math_reward/std": 0.4956200420856476,
      "step": 451
    },
    {
      "clip_ratio/high_max": 0.0016140327397806686,
      "clip_ratio/high_mean": 0.0005076759837265854,
      "clip_ratio/low_mean": 0.00027791280172095867,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007855887779442128,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3606.0,
      "completions/mean_length": 600.4765625,
      "completions/mean_terminated_length": 557.0294189453125,
      "completions/min_length": 135.0,
      "completions/min_terminated_length": 135.0,
      "epoch": 4.223972003499562,
      "grad_norm": 0.12890625,
      "learning_rate": 1e-06,
      "loss": 0.0008,
      "num_tokens": 262705902.0,
      "reward": 0.543526828289032,
      "reward_std": 0.20700766146183014,
      "rewards/verify_math_reward/mean": 0.5435267686843872,
      "rewards/verify_math_reward/std": 0.49838000535964966,
      "step": 452
    },
    {
      "clip_ratio/high_max": 0.0014768202390769147,
      "clip_ratio/high_mean": 0.0004388366351122386,
      "clip_ratio/low_mean": 0.00034660389769669564,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007854405412217602,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2417.0,
      "completions/mean_length": 631.904052734375,
      "completions/mean_terminated_length": 592.805908203125,
      "completions/min_length": 131.0,
      "completions/min_terminated_length": 131.0,
      "epoch": 4.233304170312044,
      "grad_norm": 0.12255859375,
      "learning_rate": 1e-06,
      "loss": -0.0009,
      "num_tokens": 263325008.0,
      "reward": 0.4888392984867096,
      "reward_std": 0.22417522966861725,
      "rewards/verify_math_reward/mean": 0.4888392984867096,
      "rewards/verify_math_reward/std": 0.5001546144485474,
      "step": 453
    },
    {
      "clip_ratio/high_max": 0.0017326985635008896,
      "clip_ratio/high_mean": 0.0005633963983200374,
      "clip_ratio/low_mean": 0.00031264214987913874,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008760385499044787,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3285.0,
      "completions/mean_length": 590.2288208007812,
      "completions/mean_terminated_length": 546.6542358398438,
      "completions/min_length": 76.0,
      "completions/min_terminated_length": 76.0,
      "epoch": 4.242636337124526,
      "grad_norm": 0.1357421875,
      "learning_rate": 1e-06,
      "loss": 0.0117,
      "num_tokens": 263905565.0,
      "reward": 0.5502232313156128,
      "reward_std": 0.24048060178756714,
      "rewards/verify_math_reward/mean": 0.5502232313156128,
      "rewards/verify_math_reward/std": 0.49774909019470215,
      "step": 454
    },
    {
      "clip_ratio/high_max": 0.001486037075665081,
      "clip_ratio/high_mean": 0.000446635130060713,
      "clip_ratio/low_mean": 0.00038580304101287766,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000832438171528338,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2711.0,
      "completions/mean_length": 614.6295166015625,
      "completions/mean_terminated_length": 579.3054809570312,
      "completions/min_length": 95.0,
      "completions/min_terminated_length": 95.0,
      "epoch": 4.251968503937007,
      "grad_norm": 0.123046875,
      "learning_rate": 1e-06,
      "loss": 0.0089,
      "num_tokens": 264504393.0,
      "reward": 0.5970982313156128,
      "reward_std": 0.2015165388584137,
      "rewards/verify_math_reward/mean": 0.5970982313156128,
      "rewards/verify_math_reward/std": 0.4907552897930145,
      "step": 455
    },
    {
      "clip_ratio/high_max": 0.0016170304697880056,
      "clip_ratio/high_mean": 0.00039441224362235516,
      "clip_ratio/low_mean": 0.00030087512379850523,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006952873673071736,
      "completions/clipped_ratio": 0.021205357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2521.0,
      "completions/mean_length": 616.1295166015625,
      "completions/mean_terminated_length": 540.7388916015625,
      "completions/min_length": 121.0,
      "completions/min_terminated_length": 121.0,
      "epoch": 4.26130067074949,
      "grad_norm": 0.12109375,
      "learning_rate": 1e-06,
      "loss": -0.002,
      "num_tokens": 265063453.0,
      "reward": 0.5558035969734192,
      "reward_std": 0.18727383017539978,
      "rewards/verify_math_reward/mean": 0.5558035969734192,
      "rewards/verify_math_reward/std": 0.49715372920036316,
      "step": 456
    },
    {
      "clip_ratio/high_max": 0.0014758168308617314,
      "clip_ratio/high_mean": 0.0004300506926711023,
      "clip_ratio/low_mean": 0.0003016561261119932,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000731706818442035,
      "completions/clipped_ratio": 0.025669642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2437.0,
      "completions/mean_length": 639.622802734375,
      "completions/mean_terminated_length": 548.561279296875,
      "completions/min_length": 90.0,
      "completions/min_terminated_length": 90.0,
      "epoch": 4.270632837561972,
      "grad_norm": 0.1337890625,
      "learning_rate": 1e-06,
      "loss": -0.007,
      "num_tokens": 265627979.0,
      "reward": 0.5234375,
      "reward_std": 0.20357809960842133,
      "rewards/verify_math_reward/mean": 0.5234375,
      "rewards/verify_math_reward/std": 0.49972933530807495,
      "step": 457
    },
    {
      "clip_ratio/high_max": 0.0014188833247317234,
      "clip_ratio/high_mean": 0.00043401154653111007,
      "clip_ratio/low_mean": 0.00031582017356868164,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007498317309000413,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3527.0,
      "completions/mean_length": 641.505615234375,
      "completions/mean_terminated_length": 594.6119995117188,
      "completions/min_length": 103.0,
      "completions/min_terminated_length": 103.0,
      "epoch": 4.2799650043744535,
      "grad_norm": 0.1171875,
      "learning_rate": 1e-06,
      "loss": 0.0035,
      "num_tokens": 266239288.0,
      "reward": 0.5178571939468384,
      "reward_std": 0.2029048055410385,
      "rewards/verify_math_reward/mean": 0.5178571343421936,
      "rewards/verify_math_reward/std": 0.4999600946903229,
      "step": 458
    },
    {
      "clip_ratio/high_max": 0.001465431610995438,
      "clip_ratio/high_mean": 0.0004236939357724623,
      "clip_ratio/low_mean": 0.00031415979174198583,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007378537129625329,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2443.0,
      "completions/mean_length": 624.091552734375,
      "completions/mean_terminated_length": 556.9442138671875,
      "completions/min_length": 4.0,
      "completions/min_terminated_length": 4.0,
      "epoch": 4.289297171186935,
      "grad_norm": 0.12255859375,
      "learning_rate": 1e-06,
      "loss": 0.0074,
      "num_tokens": 266817618.0,
      "reward": 0.5401785969734192,
      "reward_std": 0.19892391562461853,
      "rewards/verify_math_reward/mean": 0.5401785969734192,
      "rewards/verify_math_reward/std": 0.49866142868995667,
      "step": 459
    },
    {
      "clip_ratio/high_max": 0.001607860074727796,
      "clip_ratio/high_mean": 0.0004799057594482292,
      "clip_ratio/low_mean": 0.00028580797902577615,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007657137480236997,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3891.0,
      "completions/mean_length": 656.0881958007812,
      "completions/mean_terminated_length": 601.4863891601562,
      "completions/min_length": 138.0,
      "completions/min_terminated_length": 138.0,
      "epoch": 4.298629337999417,
      "grad_norm": 0.115234375,
      "learning_rate": 1e-06,
      "loss": -0.0022,
      "num_tokens": 267448353.0,
      "reward": 0.5111607313156128,
      "reward_std": 0.21452394127845764,
      "rewards/verify_math_reward/mean": 0.5111607313156128,
      "rewards/verify_math_reward/std": 0.5001546144485474,
      "step": 460
    },
    {
      "clip_ratio/high_max": 0.0017103273767133942,
      "clip_ratio/high_mean": 0.0005531097926905204,
      "clip_ratio/low_mean": 0.0002648757543965985,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008179855553862581,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3580.0,
      "completions/mean_length": 626.6551513671875,
      "completions/mean_terminated_length": 591.4531860351562,
      "completions/min_length": 140.0,
      "completions/min_terminated_length": 140.0,
      "epoch": 4.307961504811899,
      "grad_norm": 0.138671875,
      "learning_rate": 1e-06,
      "loss": 0.0011,
      "num_tokens": 268064324.0,
      "reward": 0.4955357313156128,
      "reward_std": 0.22988249361515045,
      "rewards/verify_math_reward/mean": 0.4955357015132904,
      "rewards/verify_math_reward/std": 0.500259280204773,
      "step": 461
    },
    {
      "clip_ratio/high_max": 0.0013549203795264475,
      "clip_ratio/high_mean": 0.0004196689631044137,
      "clip_ratio/low_mean": 0.0002853072559219072,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007049762270980864,
      "completions/clipped_ratio": 0.005580357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2004.0,
      "completions/mean_length": 553.388427734375,
      "completions/mean_terminated_length": 533.5084228515625,
      "completions/min_length": 128.0,
      "completions/min_terminated_length": 128.0,
      "epoch": 4.31729367162438,
      "grad_norm": 0.125,
      "learning_rate": 1e-06,
      "loss": -0.0048,
      "num_tokens": 268634336.0,
      "reward": 0.5915178656578064,
      "reward_std": 0.19009242951869965,
      "rewards/verify_math_reward/mean": 0.5915178656578064,
      "rewards/verify_math_reward/std": 0.49182769656181335,
      "step": 462
    },
    {
      "clip_ratio/high_max": 0.0016750827362557175,
      "clip_ratio/high_mean": 0.0004493110382099985,
      "clip_ratio/low_mean": 0.00028353875381981197,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007328497913476895,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3642.0,
      "completions/mean_length": 611.6194458007812,
      "completions/mean_terminated_length": 556.3118286132812,
      "completions/min_length": 112.0,
      "completions/min_terminated_length": 112.0,
      "epoch": 4.326625838436862,
      "grad_norm": 0.12890625,
      "learning_rate": 1e-06,
      "loss": 0.003,
      "num_tokens": 269215531.0,
      "reward": 0.5267857313156128,
      "reward_std": 0.21793846786022186,
      "rewards/verify_math_reward/mean": 0.5267857313156128,
      "rewards/verify_math_reward/std": 0.4995608329772949,
      "step": 463
    },
    {
      "clip_ratio/high_max": 0.0016607362404101877,
      "clip_ratio/high_mean": 0.0005025412156101083,
      "clip_ratio/low_mean": 0.0003072622860145202,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008098035068542231,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3282.0,
      "completions/mean_length": 623.0814819335938,
      "completions/mean_terminated_length": 571.9512939453125,
      "completions/min_length": 50.0,
      "completions/min_terminated_length": 50.0,
      "epoch": 4.335958005249344,
      "grad_norm": 0.12353515625,
      "learning_rate": 1e-06,
      "loss": 0.0062,
      "num_tokens": 269807380.0,
      "reward": 0.5569196939468384,
      "reward_std": 0.22271278500556946,
      "rewards/verify_math_reward/mean": 0.5569196343421936,
      "rewards/verify_math_reward/std": 0.4970270097255707,
      "step": 464
    },
    {
      "clip_ratio/high_max": 0.001639027949750016,
      "clip_ratio/high_mean": 0.00046428004679910373,
      "clip_ratio/low_mean": 0.0003137677274480666,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007780477669712127,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3622.0,
      "completions/mean_length": 573.0803833007812,
      "completions/mean_terminated_length": 525.2579345703125,
      "completions/min_length": 107.0,
      "completions/min_terminated_length": 107.0,
      "epoch": 4.3452901720618256,
      "grad_norm": 0.134765625,
      "learning_rate": 1e-06,
      "loss": -0.0115,
      "num_tokens": 270358652.0,
      "reward": 0.6004464626312256,
      "reward_std": 0.22149746119976044,
      "rewards/verify_math_reward/mean": 0.6004464030265808,
      "rewards/verify_math_reward/std": 0.49008017778396606,
      "step": 465
    },
    {
      "clip_ratio/high_max": 0.0015635195068171015,
      "clip_ratio/high_mean": 0.00047154270578175783,
      "clip_ratio/low_mean": 0.00023050625850373763,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007020489679234743,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3958.0,
      "completions/mean_length": 575.216552734375,
      "completions/mean_terminated_length": 539.49267578125,
      "completions/min_length": 97.0,
      "completions/min_terminated_length": 97.0,
      "epoch": 4.354622338874307,
      "grad_norm": 0.125,
      "learning_rate": 1e-06,
      "loss": 0.003,
      "num_tokens": 270924830.0,
      "reward": 0.6238839626312256,
      "reward_std": 0.19557398557662964,
      "rewards/verify_math_reward/mean": 0.6238839030265808,
      "rewards/verify_math_reward/std": 0.4846802353858948,
      "step": 466
    },
    {
      "clip_ratio/high_max": 0.0016615856666248874,
      "clip_ratio/high_mean": 0.0004494111750545926,
      "clip_ratio/low_mean": 0.00027698924031938077,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007264004170792759,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1919.0,
      "completions/mean_length": 548.8582763671875,
      "completions/mean_terminated_length": 504.7695007324219,
      "completions/min_length": 3.0,
      "completions/min_terminated_length": 3.0,
      "epoch": 4.363954505686789,
      "grad_norm": 0.1484375,
      "learning_rate": 1e-06,
      "loss": 0.0062,
      "num_tokens": 271459015.0,
      "reward": 0.574776828289032,
      "reward_std": 0.18362995982170105,
      "rewards/verify_math_reward/mean": 0.5747767686843872,
      "rewards/verify_math_reward/std": 0.49465295672416687,
      "step": 467
    },
    {
      "clip_ratio/high_max": 0.0015913287443254376,
      "clip_ratio/high_mean": 0.000531323003997386,
      "clip_ratio/low_mean": 0.0003833229704923724,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000914645991542784,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3699.0,
      "completions/mean_length": 608.2745971679688,
      "completions/mean_terminated_length": 556.9263916015625,
      "completions/min_length": 124.0,
      "completions/min_terminated_length": 124.0,
      "epoch": 4.373286672499271,
      "grad_norm": 0.1484375,
      "learning_rate": 1e-06,
      "loss": -0.0092,
      "num_tokens": 272036405.0,
      "reward": 0.606026828289032,
      "reward_std": 0.24202153086662292,
      "rewards/verify_math_reward/mean": 0.6060267686843872,
      "rewards/verify_math_reward/std": 0.48890194296836853,
      "step": 468
    },
    {
      "clip_ratio/high_max": 0.0015565556441288209,
      "clip_ratio/high_mean": 0.0004704969373960921,
      "clip_ratio/low_mean": 0.000321143754035802,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007916406866570469,
      "completions/clipped_ratio": 0.024553571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2894.0,
      "completions/mean_length": 648.560302734375,
      "completions/mean_terminated_length": 561.7825927734375,
      "completions/min_length": 119.0,
      "completions/min_terminated_length": 119.0,
      "epoch": 4.3826188393117524,
      "grad_norm": 0.125,
      "learning_rate": 1e-06,
      "loss": -0.0001,
      "num_tokens": 272617323.0,
      "reward": 0.5412946939468384,
      "reward_std": 0.20662423968315125,
      "rewards/verify_math_reward/mean": 0.5412946343421936,
      "rewards/verify_math_reward/std": 0.49857014417648315,
      "step": 469
    },
    {
      "clip_ratio/high_max": 0.00128650848637335,
      "clip_ratio/high_mean": 0.00037658480539448647,
      "clip_ratio/low_mean": 0.00037177639205765445,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007483612062060274,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3493.0,
      "completions/mean_length": 630.4342041015625,
      "completions/mean_terminated_length": 595.2705688476562,
      "completions/min_length": 125.0,
      "completions/min_terminated_length": 125.0,
      "epoch": 4.391951006124234,
      "grad_norm": 0.1328125,
      "learning_rate": 1e-06,
      "loss": -0.0072,
      "num_tokens": 273241344.0,
      "reward": 0.5022321939468384,
      "reward_std": 0.1996411234140396,
      "rewards/verify_math_reward/mean": 0.5022321343421936,
      "rewards/verify_math_reward/std": 0.5002743005752563,
      "step": 470
    },
    {
      "clip_ratio/high_max": 0.001751421723383828,
      "clip_ratio/high_mean": 0.0005249783489489346,
      "clip_ratio/low_mean": 0.00033580776062080986,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008607861018390395,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4042.0,
      "completions/mean_length": 652.114990234375,
      "completions/mean_terminated_length": 589.4988403320312,
      "completions/min_length": 11.0,
      "completions/min_terminated_length": 11.0,
      "epoch": 4.401283172936716,
      "grad_norm": 0.126953125,
      "learning_rate": 1e-06,
      "loss": 0.0221,
      "num_tokens": 273857735.0,
      "reward": 0.5267857313156128,
      "reward_std": 0.20839229226112366,
      "rewards/verify_math_reward/mean": 0.5267857313156128,
      "rewards/verify_math_reward/std": 0.4995608627796173,
      "step": 471
    },
    {
      "clip_ratio/high_max": 0.0015658479151170468,
      "clip_ratio/high_mean": 0.0004453192345863499,
      "clip_ratio/low_mean": 0.00031153968529906706,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007568589285256166,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3565.0,
      "completions/mean_length": 588.75,
      "completions/mean_terminated_length": 537.1143798828125,
      "completions/min_length": 11.0,
      "completions/min_terminated_length": 11.0,
      "epoch": 4.410615339749198,
      "grad_norm": 0.1328125,
      "learning_rate": 1e-06,
      "loss": -0.0056,
      "num_tokens": 274423455.0,
      "reward": 0.5223214626312256,
      "reward_std": 0.22500258684158325,
      "rewards/verify_math_reward/mean": 0.5223214030265808,
      "rewards/verify_math_reward/std": 0.49978047609329224,
      "step": 472
    },
    {
      "clip_ratio/high_max": 0.0016656261632306268,
      "clip_ratio/high_mean": 0.0005626158235827461,
      "clip_ratio/low_mean": 0.0003227071864557729,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008853230140175583,
      "completions/clipped_ratio": 0.0200892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3935.0,
      "completions/mean_length": 609.8002319335938,
      "completions/mean_terminated_length": 538.3291625976562,
      "completions/min_length": 118.0,
      "completions/min_terminated_length": 118.0,
      "epoch": 4.41994750656168,
      "grad_norm": 0.13671875,
      "learning_rate": 1e-06,
      "loss": -0.0025,
      "num_tokens": 274987932.0,
      "reward": 0.543526828289032,
      "reward_std": 0.23848573863506317,
      "rewards/verify_math_reward/mean": 0.5435267686843872,
      "rewards/verify_math_reward/std": 0.49838000535964966,
      "step": 473
    },
    {
      "clip_ratio/high_max": 0.001721414178973646,
      "clip_ratio/high_mean": 0.0006090785182095715,
      "clip_ratio/low_mean": 0.00033543772758548585,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009445162511383387,
      "completions/clipped_ratio": 0.004464285714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3845.0,
      "completions/mean_length": 583.8248291015625,
      "completions/mean_terminated_length": 568.0751342773438,
      "completions/min_length": 110.0,
      "completions/min_terminated_length": 110.0,
      "epoch": 4.429279673374162,
      "grad_norm": 0.138671875,
      "learning_rate": 1e-06,
      "loss": 0.0236,
      "num_tokens": 275585623.0,
      "reward": 0.5256696939468384,
      "reward_std": 0.2573162019252777,
      "rewards/verify_math_reward/mean": 0.5256696343421936,
      "rewards/verify_math_reward/std": 0.4996195435523987,
      "step": 474
    },
    {
      "clip_ratio/high_max": 0.0012805054448108422,
      "clip_ratio/high_mean": 0.0003528819356688473,
      "clip_ratio/low_mean": 0.00023715787995115534,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0005900398082303582,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3561.0,
      "completions/mean_length": 630.552490234375,
      "completions/mean_terminated_length": 591.4390869140625,
      "completions/min_length": 98.0,
      "completions/min_terminated_length": 98.0,
      "epoch": 4.438611840186644,
      "grad_norm": 0.1171875,
      "learning_rate": 1e-06,
      "loss": 0.0065,
      "num_tokens": 276200886.0,
      "reward": 0.5100446939468384,
      "reward_std": 0.1762627214193344,
      "rewards/verify_math_reward/mean": 0.5100446343421936,
      "rewards/verify_math_reward/std": 0.5001782774925232,
      "step": 475
    },
    {
      "clip_ratio/high_max": 0.001785341470167623,
      "clip_ratio/high_mean": 0.0005500541788023838,
      "clip_ratio/low_mean": 0.0003359141497867313,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008859683184709866,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2889.0,
      "completions/mean_length": 601.2935791015625,
      "completions/mean_terminated_length": 541.7922973632812,
      "completions/min_length": 143.0,
      "completions/min_terminated_length": 143.0,
      "epoch": 4.447944006999125,
      "grad_norm": 0.125,
      "learning_rate": 1e-06,
      "loss": 0.0231,
      "num_tokens": 276767213.0,
      "reward": 0.5770089626312256,
      "reward_std": 0.20233887434005737,
      "rewards/verify_math_reward/mean": 0.5770089030265808,
      "rewards/verify_math_reward/std": 0.4943099319934845,
      "step": 476
    },
    {
      "clip_ratio/high_max": 0.0019143885692756157,
      "clip_ratio/high_mean": 0.0005483994686983351,
      "clip_ratio/low_mean": 0.00033269647110500955,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008810959270704188,
      "completions/clipped_ratio": 0.0078125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3979.0,
      "completions/mean_length": 555.7991333007812,
      "completions/mean_terminated_length": 527.9235229492188,
      "completions/min_length": 101.0,
      "completions/min_terminated_length": 101.0,
      "epoch": 4.457276173811607,
      "grad_norm": 0.1513671875,
      "learning_rate": 1e-06,
      "loss": 0.0053,
      "num_tokens": 277322377.0,
      "reward": 0.590401828289032,
      "reward_std": 0.23589354753494263,
      "rewards/verify_math_reward/mean": 0.5904017686843872,
      "rewards/verify_math_reward/std": 0.49203425645828247,
      "step": 477
    },
    {
      "clip_ratio/high_max": 0.0018167906964663416,
      "clip_ratio/high_mean": 0.0005764182369603077,
      "clip_ratio/low_mean": 0.0003979794187216612,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009743976543177268,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4093.0,
      "completions/mean_length": 617.3538208007812,
      "completions/mean_terminated_length": 570.1323852539062,
      "completions/min_length": 14.0,
      "completions/min_terminated_length": 14.0,
      "epoch": 4.466608340624089,
      "grad_norm": 0.1357421875,
      "learning_rate": 1e-06,
      "loss": -0.012,
      "num_tokens": 277913870.0,
      "reward": 0.5602678656578064,
      "reward_std": 0.25103527307510376,
      "rewards/verify_math_reward/mean": 0.5602678656578064,
      "rewards/verify_math_reward/std": 0.4966317415237427,
      "step": 478
    },
    {
      "clip_ratio/high_max": 0.0016934892501012655,
      "clip_ratio/high_mean": 0.0005674093749803433,
      "clip_ratio/low_mean": 0.0003575389092702608,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009249482809536858,
      "completions/clipped_ratio": 0.0078125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3373.0,
      "completions/mean_length": 569.872802734375,
      "completions/mean_terminated_length": 542.1080322265625,
      "completions/min_length": 3.0,
      "completions/min_terminated_length": 3.0,
      "epoch": 4.475940507436571,
      "grad_norm": 0.1376953125,
      "learning_rate": 1e-06,
      "loss": 0.0064,
      "num_tokens": 278486588.0,
      "reward": 0.5680803656578064,
      "reward_std": 0.25153452157974243,
      "rewards/verify_math_reward/mean": 0.5680803656578064,
      "rewards/verify_math_reward/std": 0.4956200420856476,
      "step": 479
    },
    {
      "clip_ratio/high_max": 0.0013954150272184052,
      "clip_ratio/high_mean": 0.00045888196564192185,
      "clip_ratio/low_mean": 0.00037074485692301096,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000829626822451246,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3491.0,
      "completions/mean_length": 662.6730346679688,
      "completions/mean_terminated_length": 584.2864990234375,
      "completions/min_length": 144.0,
      "completions/min_terminated_length": 144.0,
      "epoch": 4.485272674249052,
      "grad_norm": 0.12890625,
      "learning_rate": 1e-06,
      "loss": 0.0147,
      "num_tokens": 279092247.0,
      "reward": 0.5111607313156128,
      "reward_std": 0.23863616585731506,
      "rewards/verify_math_reward/mean": 0.5111607313156128,
      "rewards/verify_math_reward/std": 0.5001546144485474,
      "step": 480
    },
    {
      "clip_ratio/high_max": 0.0017112914138124324,
      "clip_ratio/high_mean": 0.0004472731827718235,
      "clip_ratio/low_mean": 0.00038033553357763594,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008276087160083989,
      "completions/clipped_ratio": 0.0234375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3956.0,
      "completions/mean_length": 594.5145263671875,
      "completions/mean_terminated_length": 510.4788513183594,
      "completions/min_length": 72.0,
      "completions/min_terminated_length": 72.0,
      "epoch": 4.494604841061534,
      "grad_norm": 0.154296875,
      "learning_rate": 1e-06,
      "loss": -0.0103,
      "num_tokens": 279625692.0,
      "reward": 0.5491071939468384,
      "reward_std": 0.22048968076705933,
      "rewards/verify_math_reward/mean": 0.5491071343421936,
      "rewards/verify_math_reward/std": 0.49786055088043213,
      "step": 481
    },
    {
      "clip_ratio/high_max": 0.001400075911078602,
      "clip_ratio/high_mean": 0.0003827551252015837,
      "clip_ratio/low_mean": 0.0003327557158172567,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007155108486358586,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3412.0,
      "completions/mean_length": 627.4420166015625,
      "completions/mean_terminated_length": 568.3859252929688,
      "completions/min_length": 97.0,
      "completions/min_terminated_length": 97.0,
      "epoch": 4.503937007874016,
      "grad_norm": 0.1259765625,
      "learning_rate": 1e-06,
      "loss": 0.0049,
      "num_tokens": 280218680.0,
      "reward": 0.5524553656578064,
      "reward_std": 0.19652535021305084,
      "rewards/verify_math_reward/mean": 0.5524553656578064,
      "rewards/verify_math_reward/std": 0.49751850962638855,
      "step": 482
    },
    {
      "clip_ratio/high_max": 0.0014981317344791023,
      "clip_ratio/high_mean": 0.0004190168824607099,
      "clip_ratio/low_mean": 0.00031817220553875813,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000737189082428813,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2517.0,
      "completions/mean_length": 571.9866333007812,
      "completions/mean_terminated_length": 507.91363525390625,
      "completions/min_length": 105.0,
      "completions/min_terminated_length": 105.0,
      "epoch": 4.5132691746864975,
      "grad_norm": 0.134765625,
      "learning_rate": 1e-06,
      "loss": 0.0009,
      "num_tokens": 280745812.0,
      "reward": 0.6015625,
      "reward_std": 0.1981329768896103,
      "rewards/verify_math_reward/mean": 0.6015625,
      "rewards/verify_math_reward/std": 0.48984986543655396,
      "step": 483
    },
    {
      "clip_ratio/high_max": 0.0018096462326866458,
      "clip_ratio/high_mean": 0.0005806429239783029,
      "clip_ratio/low_mean": 0.0002943162592146109,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008749591661398881,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3756.0,
      "completions/mean_length": 652.5301513671875,
      "completions/mean_terminated_length": 593.9013061523438,
      "completions/min_length": 11.0,
      "completions/min_terminated_length": 11.0,
      "epoch": 4.522601341498979,
      "grad_norm": 0.12060546875,
      "learning_rate": 1e-06,
      "loss": 0.0205,
      "num_tokens": 281351335.0,
      "reward": 0.5178571939468384,
      "reward_std": 0.21075664460659027,
      "rewards/verify_math_reward/mean": 0.5178571343421936,
      "rewards/verify_math_reward/std": 0.4999600946903229,
      "step": 484
    },
    {
      "clip_ratio/high_max": 0.0016633195700705983,
      "clip_ratio/high_mean": 0.0005281733760966745,
      "clip_ratio/low_mean": 0.00027120233403366,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007993757121766976,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2582.0,
      "completions/mean_length": 588.1317138671875,
      "completions/mean_terminated_length": 552.5388793945312,
      "completions/min_length": 19.0,
      "completions/min_terminated_length": 19.0,
      "epoch": 4.531933508311461,
      "grad_norm": 0.1259765625,
      "learning_rate": 1e-06,
      "loss": 0.0012,
      "num_tokens": 281934317.0,
      "reward": 0.504464328289032,
      "reward_std": 0.20793946087360382,
      "rewards/verify_math_reward/mean": 0.5044642686843872,
      "rewards/verify_math_reward/std": 0.5002593398094177,
      "step": 485
    },
    {
      "clip_ratio/high_max": 0.0014966129547246965,
      "clip_ratio/high_mean": 0.00048653811109033995,
      "clip_ratio/low_mean": 0.0003384272192761273,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000824965342417272,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2653.0,
      "completions/mean_length": 550.0223388671875,
      "completions/mean_terminated_length": 510.0,
      "completions/min_length": 4.0,
      "completions/min_terminated_length": 4.0,
      "epoch": 4.541265675123943,
      "grad_norm": 0.1396484375,
      "learning_rate": 1e-06,
      "loss": 0.0098,
      "num_tokens": 282464529.0,
      "reward": 0.5993303656578064,
      "reward_std": 0.215563103556633,
      "rewards/verify_math_reward/mean": 0.5993303656578064,
      "rewards/verify_math_reward/std": 0.49030786752700806,
      "step": 486
    },
    {
      "clip_ratio/high_max": 0.0014841586762486259,
      "clip_ratio/high_mean": 0.0004888288654001371,
      "clip_ratio/low_mean": 0.00035418342713455786,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000843012291170453,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2152.0,
      "completions/mean_length": 600.4442138671875,
      "completions/mean_terminated_length": 556.9966430664062,
      "completions/min_length": 104.0,
      "completions/min_terminated_length": 104.0,
      "epoch": 4.550597841936424,
      "grad_norm": 0.123046875,
      "learning_rate": 1e-06,
      "loss": 0.0009,
      "num_tokens": 283043479.0,
      "reward": 0.566964328289032,
      "reward_std": 0.23120476305484772,
      "rewards/verify_math_reward/mean": 0.5669642686843872,
      "rewards/verify_math_reward/std": 0.49577224254608154,
      "step": 487
    },
    {
      "clip_ratio/high_max": 0.0017360884721711045,
      "clip_ratio/high_mean": 0.0005348529837192473,
      "clip_ratio/low_mean": 0.0003435390121921955,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008783919965935638,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3012.0,
      "completions/mean_length": 560.4955444335938,
      "completions/mean_terminated_length": 520.5914306640625,
      "completions/min_length": 134.0,
      "completions/min_terminated_length": 134.0,
      "epoch": 4.559930008748906,
      "grad_norm": 0.1357421875,
      "learning_rate": 1e-06,
      "loss": 0.0125,
      "num_tokens": 283590619.0,
      "reward": 0.6484375,
      "reward_std": 0.2168864905834198,
      "rewards/verify_math_reward/mean": 0.6484375,
      "rewards/verify_math_reward/std": 0.4777248501777649,
      "step": 488
    },
    {
      "clip_ratio/high_max": 0.00155533077668224,
      "clip_ratio/high_mean": 0.0004262628510787181,
      "clip_ratio/low_mean": 0.00034666936699068174,
      "clip_ratio/low_min": 1.0991910130542237e-05,
      "clip_ratio/region_mean": 0.0007729322132945526,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3033.0,
      "completions/mean_length": 609.171875,
      "completions/mean_terminated_length": 565.832763671875,
      "completions/min_length": 83.0,
      "completions/min_terminated_length": 83.0,
      "epoch": 4.569262175561388,
      "grad_norm": 0.1337890625,
      "learning_rate": 1e-06,
      "loss": -0.0032,
      "num_tokens": 284184821.0,
      "reward": 0.5301339626312256,
      "reward_std": 0.22804802656173706,
      "rewards/verify_math_reward/mean": 0.5301339030265808,
      "rewards/verify_math_reward/std": 0.49936985969543457,
      "step": 489
    },
    {
      "clip_ratio/high_max": 0.0016612641138635809,
      "clip_ratio/high_mean": 0.0005550469866193453,
      "clip_ratio/low_mean": 0.000347608870242766,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009026558464029222,
      "completions/clipped_ratio": 0.005580357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3457.0,
      "completions/mean_length": 593.9285888671875,
      "completions/mean_terminated_length": 574.276123046875,
      "completions/min_length": 113.0,
      "completions/min_terminated_length": 113.0,
      "epoch": 4.57859434237387,
      "grad_norm": 0.1376953125,
      "learning_rate": 1e-06,
      "loss": 0.0018,
      "num_tokens": 284794053.0,
      "reward": 0.5189732313156128,
      "reward_std": 0.21786358952522278,
      "rewards/verify_math_reward/mean": 0.5189732313156128,
      "rewards/verify_math_reward/std": 0.49991893768310547,
      "step": 490
    },
    {
      "clip_ratio/high_max": 0.0017183926356665324,
      "clip_ratio/high_mean": 0.0005356732121981622,
      "clip_ratio/low_mean": 0.000266077066271464,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000801750290520431,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1681.0,
      "completions/mean_length": 579.5357666015625,
      "completions/mean_terminated_length": 535.8282470703125,
      "completions/min_length": 3.0,
      "completions/min_terminated_length": 3.0,
      "epoch": 4.587926509186351,
      "grad_norm": 0.1279296875,
      "learning_rate": 1e-06,
      "loss": 0.0129,
      "num_tokens": 285354101.0,
      "reward": 0.613839328289032,
      "reward_std": 0.22120575606822968,
      "rewards/verify_math_reward/mean": 0.6138392686843872,
      "rewards/verify_math_reward/std": 0.48714008927345276,
      "step": 491
    },
    {
      "clip_ratio/high_max": 0.0013726251263506128,
      "clip_ratio/high_mean": 0.0004201708183018127,
      "clip_ratio/low_mean": 0.00048207846452896774,
      "clip_ratio/low_min": 1.4420857951336075e-05,
      "clip_ratio/region_mean": 0.0009022492840813356,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3267.0,
      "completions/mean_length": 591.4520263671875,
      "completions/mean_terminated_length": 555.8928833007812,
      "completions/min_length": 98.0,
      "completions/min_terminated_length": 98.0,
      "epoch": 4.597258675998834,
      "grad_norm": 0.13671875,
      "learning_rate": 1e-06,
      "loss": 0.0088,
      "num_tokens": 285935482.0,
      "reward": 0.520089328289032,
      "reward_std": 0.2368699461221695,
      "rewards/verify_math_reward/mean": 0.5200892686843872,
      "rewards/verify_math_reward/std": 0.4998753070831299,
      "step": 492
    },
    {
      "clip_ratio/high_max": 0.0015729252781966352,
      "clip_ratio/high_mean": 0.0005027093616263301,
      "clip_ratio/low_mean": 0.00034286759728274774,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008455769602733199,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3202.0,
      "completions/mean_length": 573.6484375,
      "completions/mean_terminated_length": 529.8677978515625,
      "completions/min_length": 16.0,
      "completions/min_terminated_length": 16.0,
      "epoch": 4.606590842811316,
      "grad_norm": 0.1298828125,
      "learning_rate": 1e-06,
      "loss": 0.0055,
      "num_tokens": 286490207.0,
      "reward": 0.5345982313156128,
      "reward_std": 0.20546559989452362,
      "rewards/verify_math_reward/mean": 0.5345982313156128,
      "rewards/verify_math_reward/std": 0.4990801215171814,
      "step": 493
    },
    {
      "clip_ratio/high_max": 0.0014307285273389425,
      "clip_ratio/high_mean": 0.00043732595440815203,
      "clip_ratio/low_mean": 0.00033495401589789253,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007722799809926073,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4067.0,
      "completions/mean_length": 597.2142944335938,
      "completions/mean_terminated_length": 545.7032470703125,
      "completions/min_length": 113.0,
      "completions/min_terminated_length": 113.0,
      "epoch": 4.615923009623797,
      "grad_norm": 0.13671875,
      "learning_rate": 1e-06,
      "loss": 0.0097,
      "num_tokens": 287051447.0,
      "reward": 0.566964328289032,
      "reward_std": 0.22206270694732666,
      "rewards/verify_math_reward/mean": 0.5669642686843872,
      "rewards/verify_math_reward/std": 0.49577224254608154,
      "step": 494
    },
    {
      "clip_ratio/high_max": 0.00183015546099341,
      "clip_ratio/high_mean": 0.0005407442623663883,
      "clip_ratio/low_mean": 0.00035960421735126147,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009003484856293653,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3774.0,
      "completions/mean_length": 565.8092041015625,
      "completions/mean_terminated_length": 525.9650268554688,
      "completions/min_length": 53.0,
      "completions/min_terminated_length": 53.0,
      "epoch": 4.625255176436279,
      "grad_norm": 0.1357421875,
      "learning_rate": 1e-06,
      "loss": 0.0117,
      "num_tokens": 287603084.0,
      "reward": 0.5870535969734192,
      "reward_std": 0.25040724873542786,
      "rewards/verify_math_reward/mean": 0.5870535969734192,
      "rewards/verify_math_reward/std": 0.49263834953308105,
      "step": 495
    },
    {
      "clip_ratio/high_max": 0.0016625006010144716,
      "clip_ratio/high_mean": 0.0004916719269658643,
      "clip_ratio/low_mean": 0.00033617579026667954,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008278477125713835,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3990.0,
      "completions/mean_length": 626.0658569335938,
      "completions/mean_terminated_length": 574.9796142578125,
      "completions/min_length": 21.0,
      "completions/min_terminated_length": 21.0,
      "epoch": 4.634587343248761,
      "grad_norm": 0.12353515625,
      "learning_rate": 1e-06,
      "loss": 0.0108,
      "num_tokens": 288202551.0,
      "reward": 0.5323660969734192,
      "reward_std": 0.211582213640213,
      "rewards/verify_math_reward/mean": 0.5323660969734192,
      "rewards/verify_math_reward/std": 0.4992299973964691,
      "step": 496
    },
    {
      "clip_ratio/high_max": 0.0015355633304352523,
      "clip_ratio/high_mean": 0.0004849236019026648,
      "clip_ratio/low_mean": 0.0002844143895117668,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007693379930060473,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2231.0,
      "completions/mean_length": 546.359375,
      "completions/mean_terminated_length": 514.380615234375,
      "completions/min_length": 110.0,
      "completions/min_terminated_length": 110.0,
      "epoch": 4.6439195100612425,
      "grad_norm": 0.1279296875,
      "learning_rate": 1e-06,
      "loss": -0.002,
      "num_tokens": 288742153.0,
      "reward": 0.606026828289032,
      "reward_std": 0.21060511469841003,
      "rewards/verify_math_reward/mean": 0.6060267686843872,
      "rewards/verify_math_reward/std": 0.48890194296836853,
      "step": 497
    },
    {
      "clip_ratio/high_max": 0.001776651070031221,
      "clip_ratio/high_mean": 0.0005591528490640485,
      "clip_ratio/low_mean": 0.00031130768547882326,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008704605488674133,
      "completions/clipped_ratio": 0.021205357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3757.0,
      "completions/mean_length": 655.8214721679688,
      "completions/mean_terminated_length": 581.290771484375,
      "completions/min_length": 96.0,
      "completions/min_terminated_length": 96.0,
      "epoch": 4.653251676873724,
      "grad_norm": 0.130859375,
      "learning_rate": 1e-06,
      "loss": -0.0053,
      "num_tokens": 289341481.0,
      "reward": 0.527901828289032,
      "reward_std": 0.21155014634132385,
      "rewards/verify_math_reward/mean": 0.5279017686843872,
      "rewards/verify_math_reward/std": 0.49949970841407776,
      "step": 498
    },
    {
      "clip_ratio/high_max": 0.0015834210566936235,
      "clip_ratio/high_mean": 0.0005732856652684859,
      "clip_ratio/low_mean": 0.0003561456413763153,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009294313213104033,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3401.0,
      "completions/mean_length": 595.716552734375,
      "completions/mean_terminated_length": 540.156494140625,
      "completions/min_length": 36.0,
      "completions/min_terminated_length": 36.0,
      "epoch": 4.662583843686206,
      "grad_norm": 0.1376953125,
      "learning_rate": 1e-06,
      "loss": 0.0103,
      "num_tokens": 289908931.0,
      "reward": 0.6149553656578064,
      "reward_std": 0.24273650348186493,
      "rewards/verify_math_reward/mean": 0.6149553656578064,
      "rewards/verify_math_reward/std": 0.4868776500225067,
      "step": 499
    },
    {
      "clip_ratio/high_max": 0.0014789197302889079,
      "clip_ratio/high_mean": 0.0004162362445185863,
      "clip_ratio/low_mean": 0.0003480468201360054,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007642830669283285,
      "completions/clipped_ratio": 0.0200892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3045.0,
      "completions/mean_length": 629.9910888671875,
      "completions/mean_terminated_length": 558.9339599609375,
      "completions/min_length": 114.0,
      "completions/min_terminated_length": 114.0,
      "epoch": 4.671916010498688,
      "grad_norm": 0.1259765625,
      "learning_rate": 1e-06,
      "loss": 0.0091,
      "num_tokens": 290490819.0,
      "reward": 0.5613839626312256,
      "reward_std": 0.20560871064662933,
      "rewards/verify_math_reward/mean": 0.5613839030265808,
      "rewards/verify_math_reward/std": 0.496494859457016,
      "step": 500
    },
    {
      "clip_ratio/high_max": 0.0016843309413161478,
      "clip_ratio/high_mean": 0.0004671865392538166,
      "clip_ratio/low_mean": 0.0002480672757201319,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000715253814632888,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3054.0,
      "completions/mean_length": 627.794677734375,
      "completions/mean_terminated_length": 560.718994140625,
      "completions/min_length": 29.0,
      "completions/min_terminated_length": 29.0,
      "epoch": 4.681248177311169,
      "grad_norm": 0.126953125,
      "learning_rate": 1e-06,
      "loss": -0.0051,
      "num_tokens": 291078891.0,
      "reward": 0.4977678656578064,
      "reward_std": 0.20437045395374298,
      "rewards/verify_math_reward/mean": 0.4977678656578064,
      "rewards/verify_math_reward/std": 0.5002743005752563,
      "step": 501
    },
    {
      "clip_ratio/high_max": 0.0013590634198408225,
      "clip_ratio/high_mean": 0.00048674410629701015,
      "clip_ratio/low_mean": 0.00043030665983678773,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009170507692033425,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3714.0,
      "completions/mean_length": 623.8783569335938,
      "completions/mean_terminated_length": 584.6896362304688,
      "completions/min_length": 101.0,
      "completions/min_terminated_length": 101.0,
      "epoch": 4.690580344123651,
      "grad_norm": 0.1337890625,
      "learning_rate": 1e-06,
      "loss": -0.0,
      "num_tokens": 291695502.0,
      "reward": 0.494419664144516,
      "reward_std": 0.25100386142730713,
      "rewards/verify_math_reward/mean": 0.4944196343421936,
      "rewards/verify_math_reward/std": 0.5002480745315552,
      "step": 502
    },
    {
      "clip_ratio/high_max": 0.0017423807448722073,
      "clip_ratio/high_mean": 0.0005437559307210904,
      "clip_ratio/low_mean": 0.00040957945202535484,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009533353813822032,
      "completions/clipped_ratio": 0.024553571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3738.0,
      "completions/mean_length": 649.2332763671875,
      "completions/mean_terminated_length": 562.4725341796875,
      "completions/min_length": 148.0,
      "completions/min_terminated_length": 148.0,
      "epoch": 4.699912510936133,
      "grad_norm": 0.14453125,
      "learning_rate": 1e-06,
      "loss": -0.0026,
      "num_tokens": 292281383.0,
      "reward": 0.4988839626312256,
      "reward_std": 0.2477276772260666,
      "rewards/verify_math_reward/mean": 0.4988839328289032,
      "rewards/verify_math_reward/std": 0.5002779960632324,
      "step": 503
    },
    {
      "clip_ratio/high_max": 0.0014809887270530453,
      "clip_ratio/high_mean": 0.00043425572494015796,
      "clip_ratio/low_mean": 0.00035411961005138437,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000788375347838155,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2632.0,
      "completions/mean_length": 643.568115234375,
      "completions/mean_terminated_length": 588.767578125,
      "completions/min_length": 159.0,
      "completions/min_terminated_length": 159.0,
      "epoch": 4.7092446777486145,
      "grad_norm": 0.1298828125,
      "learning_rate": 1e-06,
      "loss": 0.0139,
      "num_tokens": 292892268.0,
      "reward": 0.53125,
      "reward_std": 0.21447932720184326,
      "rewards/verify_math_reward/mean": 0.53125,
      "rewards/verify_math_reward/std": 0.4993011951446533,
      "step": 504
    },
    {
      "clip_ratio/high_max": 0.0012838945476687513,
      "clip_ratio/high_mean": 0.0003630980708067,
      "clip_ratio/low_mean": 0.0003246117719299946,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006877098348923028,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3031.0,
      "completions/mean_length": 635.3292846679688,
      "completions/mean_terminated_length": 568.3992919921875,
      "completions/min_length": 7.0,
      "completions/min_terminated_length": 7.0,
      "epoch": 4.718576844561096,
      "grad_norm": 0.11328125,
      "learning_rate": 1e-06,
      "loss": 0.001,
      "num_tokens": 293486659.0,
      "reward": 0.5189732313156128,
      "reward_std": 0.19125540554523468,
      "rewards/verify_math_reward/mean": 0.5189732313156128,
      "rewards/verify_math_reward/std": 0.49991893768310547,
      "step": 505
    },
    {
      "clip_ratio/high_max": 0.0013454458530759439,
      "clip_ratio/high_mean": 0.00038541975618500146,
      "clip_ratio/low_mean": 0.000398620349869816,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007840401012799703,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3845.0,
      "completions/mean_length": 625.091552734375,
      "completions/mean_terminated_length": 569.9977416992188,
      "completions/min_length": 92.0,
      "completions/min_terminated_length": 92.0,
      "epoch": 4.727909011373578,
      "grad_norm": 0.119140625,
      "learning_rate": 1e-06,
      "loss": 0.0054,
      "num_tokens": 294078909.0,
      "reward": 0.5078125,
      "reward_std": 0.1907668560743332,
      "rewards/verify_math_reward/mean": 0.5078125,
      "rewards/verify_math_reward/std": 0.5002182126045227,
      "step": 506
    },
    {
      "clip_ratio/high_max": 0.0018093389862769982,
      "clip_ratio/high_mean": 0.000492179358161593,
      "clip_ratio/low_mean": 0.0003963819007140046,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008885612569429213,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2044.0,
      "completions/mean_length": 538.8717041015625,
      "completions/mean_terminated_length": 506.8254699707031,
      "completions/min_length": 92.0,
      "completions/min_terminated_length": 92.0,
      "epoch": 4.73724117818606,
      "grad_norm": 0.1298828125,
      "learning_rate": 1e-06,
      "loss": 0.0086,
      "num_tokens": 294627082.0,
      "reward": 0.5859375,
      "reward_std": 0.20790556073188782,
      "rewards/verify_math_reward/mean": 0.5859375,
      "rewards/verify_math_reward/std": 0.4928344786167145,
      "step": 507
    },
    {
      "clip_ratio/high_max": 0.0013911390997236595,
      "clip_ratio/high_mean": 0.00044665890368378314,
      "clip_ratio/low_mean": 0.0003579561971491785,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008046150978771038,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3191.0,
      "completions/mean_length": 673.943115234375,
      "completions/mean_terminated_length": 607.7599487304688,
      "completions/min_length": 120.0,
      "completions/min_terminated_length": 120.0,
      "epoch": 4.746573344998541,
      "grad_norm": 0.1328125,
      "learning_rate": 1e-06,
      "loss": 0.0087,
      "num_tokens": 295256015.0,
      "reward": 0.4921875298023224,
      "reward_std": 0.24217379093170166,
      "rewards/verify_math_reward/mean": 0.4921875,
      "rewards/verify_math_reward/std": 0.5002182126045227,
      "step": 508
    },
    {
      "clip_ratio/high_max": 0.0017413587320334045,
      "clip_ratio/high_mean": 0.0006114172979323484,
      "clip_ratio/low_mean": 0.0003403451073609176,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009517624075670028,
      "completions/clipped_ratio": 0.0234375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2225.0,
      "completions/mean_length": 603.8136596679688,
      "completions/mean_terminated_length": 520.0011596679688,
      "completions/min_length": 87.0,
      "completions/min_terminated_length": 87.0,
      "epoch": 4.755905511811024,
      "grad_norm": 0.1494140625,
      "learning_rate": 1e-06,
      "loss": -0.0016,
      "num_tokens": 295804640.0,
      "reward": 0.5558035969734192,
      "reward_std": 0.23353983461856842,
      "rewards/verify_math_reward/mean": 0.5558035969734192,
      "rewards/verify_math_reward/std": 0.49715372920036316,
      "step": 509
    },
    {
      "clip_ratio/high_max": 0.0019195449704056955,
      "clip_ratio/high_mean": 0.0006316532999335323,
      "clip_ratio/low_mean": 0.0003624669336659281,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009941202315530973,
      "completions/clipped_ratio": 0.0078125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3821.0,
      "completions/mean_length": 598.1752319335938,
      "completions/mean_terminated_length": 570.63330078125,
      "completions/min_length": 54.0,
      "completions/min_terminated_length": 54.0,
      "epoch": 4.765237678623506,
      "grad_norm": 0.130859375,
      "learning_rate": 1e-06,
      "loss": 0.0134,
      "num_tokens": 296396533.0,
      "reward": 0.6171875,
      "reward_std": 0.24901039898395538,
      "rewards/verify_math_reward/mean": 0.6171875,
      "rewards/verify_math_reward/std": 0.4863446056842804,
      "step": 510
    },
    {
      "clip_ratio/high_max": 0.0014497905503958464,
      "clip_ratio/high_mean": 0.0004942071557252348,
      "clip_ratio/low_mean": 0.00029100762981215667,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007852147855373914,
      "completions/clipped_ratio": 0.005580357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4041.0,
      "completions/mean_length": 627.5178833007812,
      "completions/mean_terminated_length": 608.0538940429688,
      "completions/min_length": 68.0,
      "completions/min_terminated_length": 68.0,
      "epoch": 4.7745698454359875,
      "grad_norm": 0.12255859375,
      "learning_rate": 1e-06,
      "loss": 0.023,
      "num_tokens": 297041293.0,
      "reward": 0.5301339626312256,
      "reward_std": 0.24089357256889343,
      "rewards/verify_math_reward/mean": 0.5301339030265808,
      "rewards/verify_math_reward/std": 0.49936988949775696,
      "step": 511
    },
    {
      "clip_ratio/high_max": 0.001688839212874882,
      "clip_ratio/high_mean": 0.0005524357045487704,
      "clip_ratio/low_mean": 0.0003401884688400969,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008926241744120489,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1510.0,
      "completions/mean_length": 544.1350708007812,
      "completions/mean_terminated_length": 483.66064453125,
      "completions/min_length": 5.0,
      "completions/min_terminated_length": 5.0,
      "epoch": 4.783902012248469,
      "grad_norm": 0.14453125,
      "learning_rate": 1e-06,
      "loss": -0.0101,
      "num_tokens": 297552446.0,
      "reward": 0.574776828289032,
      "reward_std": 0.21643507480621338,
      "rewards/verify_math_reward/mean": 0.5747767686843872,
      "rewards/verify_math_reward/std": 0.49465295672416687,
      "step": 512
    },
    {
      "clip_ratio/high_max": 0.00187992444352858,
      "clip_ratio/high_mean": 0.0005772723015979864,
      "clip_ratio/low_mean": 0.00031451219206246606,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008917844893403526,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3439.0,
      "completions/mean_length": 601.372802734375,
      "completions/mean_terminated_length": 553.9343872070312,
      "completions/min_length": 23.0,
      "completions/min_terminated_length": 23.0,
      "epoch": 4.793234179060951,
      "grad_norm": 0.1337890625,
      "learning_rate": 1e-06,
      "loss": 0.001,
      "num_tokens": 298134436.0,
      "reward": 0.5580357313156128,
      "reward_std": 0.19997744262218475,
      "rewards/verify_math_reward/mean": 0.5580357313156128,
      "rewards/verify_math_reward/std": 0.49689778685569763,
      "step": 513
    },
    {
      "clip_ratio/high_max": 0.0016608175455985474,
      "clip_ratio/high_mean": 0.0005447676526273426,
      "clip_ratio/low_mean": 0.00028974903091238957,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008345166697836248,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3857.0,
      "completions/mean_length": 622.1350708007812,
      "completions/mean_terminated_length": 554.949951171875,
      "completions/min_length": 127.0,
      "completions/min_terminated_length": 127.0,
      "epoch": 4.802566345873433,
      "grad_norm": 0.12255859375,
      "learning_rate": 1e-06,
      "loss": -0.0025,
      "num_tokens": 298703125.0,
      "reward": 0.5424107313156128,
      "reward_std": 0.22202809154987335,
      "rewards/verify_math_reward/mean": 0.5424107313156128,
      "rewards/verify_math_reward/std": 0.4984763562679291,
      "step": 514
    },
    {
      "clip_ratio/high_max": 0.0018567469451227225,
      "clip_ratio/high_mean": 0.0005445421916192572,
      "clip_ratio/low_mean": 0.0003416407251961573,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008861829123816278,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3681.0,
      "completions/mean_length": 657.4140625,
      "completions/mean_terminated_length": 598.8683471679688,
      "completions/min_length": 119.0,
      "completions/min_terminated_length": 119.0,
      "epoch": 4.811898512685914,
      "grad_norm": 0.12060546875,
      "learning_rate": 1e-06,
      "loss": -0.0001,
      "num_tokens": 299327504.0,
      "reward": 0.5334821939468384,
      "reward_std": 0.2286778688430786,
      "rewards/verify_math_reward/mean": 0.5334821343421936,
      "rewards/verify_math_reward/std": 0.49915632605552673,
      "step": 515
    },
    {
      "clip_ratio/high_max": 0.0014091908105910989,
      "clip_ratio/high_mean": 0.0004355001061639996,
      "clip_ratio/low_mean": 0.00039597377326572314,
      "clip_ratio/low_min": 1.1918383279407863e-05,
      "clip_ratio/region_mean": 0.0008314738761328044,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3676.0,
      "completions/mean_length": 612.536865234375,
      "completions/mean_terminated_length": 557.2437744140625,
      "completions/min_length": 106.0,
      "completions/min_terminated_length": 106.0,
      "epoch": 4.821230679498396,
      "grad_norm": 0.1376953125,
      "learning_rate": 1e-06,
      "loss": -0.0131,
      "num_tokens": 299913089.0,
      "reward": 0.5066964626312256,
      "reward_std": 0.22579282522201538,
      "rewards/verify_math_reward/mean": 0.5066964030265808,
      "rewards/verify_math_reward/std": 0.5002344250679016,
      "step": 516
    },
    {
      "clip_ratio/high_max": 0.0017064566127373837,
      "clip_ratio/high_mean": 0.0005202652157549892,
      "clip_ratio/low_mean": 0.0003574382284341482,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008777034604463552,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2629.0,
      "completions/mean_length": 614.1373291015625,
      "completions/mean_terminated_length": 562.8754272460938,
      "completions/min_length": 100.0,
      "completions/min_terminated_length": 100.0,
      "epoch": 4.830562846310878,
      "grad_norm": 0.12109375,
      "learning_rate": 1e-06,
      "loss": -0.0048,
      "num_tokens": 300498980.0,
      "reward": 0.5,
      "reward_std": 0.2103448063135147,
      "rewards/verify_math_reward/mean": 0.5,
      "rewards/verify_math_reward/std": 0.5002792477607727,
      "step": 517
    },
    {
      "clip_ratio/high_max": 0.0016321075872838264,
      "clip_ratio/high_mean": 0.00046111401070447755,
      "clip_ratio/low_mean": 0.0002837311283201416,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007448451387972455,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3347.0,
      "completions/mean_length": 645.513427734375,
      "completions/mean_terminated_length": 594.7134399414062,
      "completions/min_length": 11.0,
      "completions/min_terminated_length": 11.0,
      "epoch": 4.83989501312336,
      "grad_norm": 0.11376953125,
      "learning_rate": 1e-06,
      "loss": 0.0133,
      "num_tokens": 301106424.0,
      "reward": 0.5714285969734192,
      "reward_std": 0.1872738152742386,
      "rewards/verify_math_reward/mean": 0.5714285969734192,
      "rewards/verify_math_reward/std": 0.49514803290367126,
      "step": 518
    },
    {
      "clip_ratio/high_max": 0.001791798085832852,
      "clip_ratio/high_mean": 0.0005298276229268595,
      "clip_ratio/low_mean": 0.0003157693417961127,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008455969691567589,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3740.0,
      "completions/mean_length": 628.6261596679688,
      "completions/mean_terminated_length": 577.5775756835938,
      "completions/min_length": 121.0,
      "completions/min_terminated_length": 121.0,
      "epoch": 4.849227179935841,
      "grad_norm": 0.1279296875,
      "learning_rate": 1e-06,
      "loss": 0.0168,
      "num_tokens": 301699529.0,
      "reward": 0.5524553656578064,
      "reward_std": 0.2000548094511032,
      "rewards/verify_math_reward/mean": 0.5524553656578064,
      "rewards/verify_math_reward/std": 0.49751853942871094,
      "step": 519
    },
    {
      "clip_ratio/high_max": 0.0015914729137875838,
      "clip_ratio/high_mean": 0.0005042617715389497,
      "clip_ratio/low_mean": 0.00036590810123016126,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008701698752702214,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3564.0,
      "completions/mean_length": 654.6295166015625,
      "completions/mean_terminated_length": 576.059326171875,
      "completions/min_length": 3.0,
      "completions/min_terminated_length": 3.0,
      "epoch": 4.858559346748323,
      "grad_norm": 0.1396484375,
      "learning_rate": 1e-06,
      "loss": -0.0047,
      "num_tokens": 302291989.0,
      "reward": 0.4955357313156128,
      "reward_std": 0.21714931726455688,
      "rewards/verify_math_reward/mean": 0.4955357015132904,
      "rewards/verify_math_reward/std": 0.500259280204773,
      "step": 520
    },
    {
      "clip_ratio/high_max": 0.0016609390340818209,
      "clip_ratio/high_mean": 0.000485138586782341,
      "clip_ratio/low_mean": 0.0002607448440130611,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007458834279532311,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4060.0,
      "completions/mean_length": 638.8035888671875,
      "completions/mean_terminated_length": 583.9274291992188,
      "completions/min_length": 109.0,
      "completions/min_terminated_length": 109.0,
      "epoch": 4.867891513560805,
      "grad_norm": 0.10986328125,
      "learning_rate": 1e-06,
      "loss": 0.0137,
      "num_tokens": 302899901.0,
      "reward": 0.5691964626312256,
      "reward_std": 0.19189411401748657,
      "rewards/verify_math_reward/mean": 0.5691964030265808,
      "rewards/verify_math_reward/std": 0.4954652488231659,
      "step": 521
    },
    {
      "clip_ratio/high_max": 0.0016318977886840003,
      "clip_ratio/high_mean": 0.0004224451587333533,
      "clip_ratio/low_mean": 0.00042914150799333584,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008515866561538132,
      "completions/clipped_ratio": 0.0200892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3058.0,
      "completions/mean_length": 588.3939819335938,
      "completions/mean_terminated_length": 516.4840698242188,
      "completions/min_length": 73.0,
      "completions/min_terminated_length": 73.0,
      "epoch": 4.8772236803732865,
      "grad_norm": 0.1318359375,
      "learning_rate": 1e-06,
      "loss": -0.0199,
      "num_tokens": 303438342.0,
      "reward": 0.6004464626312256,
      "reward_std": 0.19746080040931702,
      "rewards/verify_math_reward/mean": 0.6004464030265808,
      "rewards/verify_math_reward/std": 0.49008017778396606,
      "step": 522
    },
    {
      "clip_ratio/high_max": 0.0017518207860121038,
      "clip_ratio/high_mean": 0.0005529486968498531,
      "clip_ratio/low_mean": 0.00034203485483885743,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008949835546445684,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1923.0,
      "completions/mean_length": 580.65625,
      "completions/mean_terminated_length": 540.9796752929688,
      "completions/min_length": 56.0,
      "completions/min_terminated_length": 56.0,
      "epoch": 4.886555847185768,
      "grad_norm": 0.126953125,
      "learning_rate": 1e-06,
      "loss": 0.0034,
      "num_tokens": 304014050.0,
      "reward": 0.4642857313156128,
      "reward_std": 0.2295461893081665,
      "rewards/verify_math_reward/mean": 0.4642857015132904,
      "rewards/verify_math_reward/std": 0.4990013837814331,
      "step": 523
    },
    {
      "clip_ratio/high_max": 0.001817819624193362,
      "clip_ratio/high_mean": 0.000533803677853939,
      "clip_ratio/low_mean": 0.0002993256985064363,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008331293784067384,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3766.0,
      "completions/mean_length": 565.1373291015625,
      "completions/mean_terminated_length": 521.2508544921875,
      "completions/min_length": 72.0,
      "completions/min_terminated_length": 72.0,
      "epoch": 4.89588801399825,
      "grad_norm": 0.12255859375,
      "learning_rate": 1e-06,
      "loss": 0.0039,
      "num_tokens": 304566661.0,
      "reward": 0.5636160969734192,
      "reward_std": 0.18979115784168243,
      "rewards/verify_math_reward/mean": 0.5636160969734192,
      "rewards/verify_math_reward/std": 0.49621346592903137,
      "step": 524
    },
    {
      "clip_ratio/high_max": 0.0014290894741861848,
      "clip_ratio/high_mean": 0.00046198587665458035,
      "clip_ratio/low_mean": 0.0002933630476036342,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007553489303973038,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2825.0,
      "completions/mean_length": 655.3370971679688,
      "completions/mean_terminated_length": 592.779541015625,
      "completions/min_length": 163.0,
      "completions/min_terminated_length": 163.0,
      "epoch": 4.905220180810732,
      "grad_norm": 0.130859375,
      "learning_rate": 1e-06,
      "loss": 0.0062,
      "num_tokens": 305184483.0,
      "reward": 0.4732142984867096,
      "reward_std": 0.22695286571979523,
      "rewards/verify_math_reward/mean": 0.4732142984867096,
      "rewards/verify_math_reward/std": 0.4995608627796173,
      "step": 525
    },
    {
      "clip_ratio/high_max": 0.0013120018265908584,
      "clip_ratio/high_mean": 0.00043393694613769185,
      "clip_ratio/low_mean": 0.0002596845182551988,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006936214717825351,
      "completions/clipped_ratio": 0.0267857142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3692.0,
      "completions/mean_length": 673.5267944335938,
      "completions/mean_terminated_length": 579.3302612304688,
      "completions/min_length": 83.0,
      "completions/min_terminated_length": 83.0,
      "epoch": 4.914552347623214,
      "grad_norm": 0.1162109375,
      "learning_rate": 1e-06,
      "loss": 0.0022,
      "num_tokens": 305781475.0,
      "reward": 0.5368303656578064,
      "reward_std": 0.19933508336544037,
      "rewards/verify_math_reward/mean": 0.5368303656578064,
      "rewards/verify_math_reward/std": 0.49892017245292664,
      "step": 526
    },
    {
      "clip_ratio/high_max": 0.001622979414605652,
      "clip_ratio/high_mean": 0.00045199954479357984,
      "clip_ratio/low_mean": 0.0003581312797678038,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008101308358163806,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2902.0,
      "completions/mean_length": 603.1953125,
      "completions/mean_terminated_length": 535.6439208984375,
      "completions/min_length": 112.0,
      "completions/min_terminated_length": 112.0,
      "epoch": 4.923884514435695,
      "grad_norm": 0.1337890625,
      "learning_rate": 1e-06,
      "loss": 0.0,
      "num_tokens": 306341770.0,
      "reward": 0.512276828289032,
      "reward_std": 0.19791541993618011,
      "rewards/verify_math_reward/mean": 0.5122767686843872,
      "rewards/verify_math_reward/std": 0.500128448009491,
      "step": 527
    },
    {
      "clip_ratio/high_max": 0.0017586277590453392,
      "clip_ratio/high_mean": 0.0005600705312645005,
      "clip_ratio/low_mean": 0.00034087833500962006,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009009488585434156,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3627.0,
      "completions/mean_length": 626.2020263671875,
      "completions/mean_terminated_length": 559.0955200195312,
      "completions/min_length": 101.0,
      "completions/min_terminated_length": 101.0,
      "epoch": 4.933216681248178,
      "grad_norm": 0.134765625,
      "learning_rate": 1e-06,
      "loss": -0.0066,
      "num_tokens": 306922247.0,
      "reward": 0.5870535969734192,
      "reward_std": 0.2231585681438446,
      "rewards/verify_math_reward/mean": 0.5870535969734192,
      "rewards/verify_math_reward/std": 0.49263834953308105,
      "step": 528
    },
    {
      "clip_ratio/high_max": 0.0018863202985812677,
      "clip_ratio/high_mean": 0.0005989270262034552,
      "clip_ratio/low_mean": 0.0003069568422233715,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009058838868440944,
      "completions/clipped_ratio": 0.0078125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2550.0,
      "completions/mean_length": 568.3482666015625,
      "completions/mean_terminated_length": 540.5714721679688,
      "completions/min_length": 24.0,
      "completions/min_terminated_length": 24.0,
      "epoch": 4.942548848060659,
      "grad_norm": 0.138671875,
      "learning_rate": 1e-06,
      "loss": 0.0069,
      "num_tokens": 307482927.0,
      "reward": 0.5691964626312256,
      "reward_std": 0.24495214223861694,
      "rewards/verify_math_reward/mean": 0.5691964030265808,
      "rewards/verify_math_reward/std": 0.4954652488231659,
      "step": 529
    },
    {
      "clip_ratio/high_max": 0.0015429682252943167,
      "clip_ratio/high_mean": 0.00048443989123825304,
      "clip_ratio/low_mean": 0.0004059024936395872,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008903423749870853,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2187.0,
      "completions/mean_length": 613.6752319335938,
      "completions/mean_terminated_length": 562.4065551757812,
      "completions/min_length": 18.0,
      "completions/min_terminated_length": 18.0,
      "epoch": 4.951881014873141,
      "grad_norm": 0.134765625,
      "learning_rate": 1e-06,
      "loss": -0.0096,
      "num_tokens": 308069340.0,
      "reward": 0.5915178656578064,
      "reward_std": 0.2319568395614624,
      "rewards/verify_math_reward/mean": 0.5915178656578064,
      "rewards/verify_math_reward/std": 0.49182769656181335,
      "step": 530
    },
    {
      "clip_ratio/high_max": 0.0013118975239194697,
      "clip_ratio/high_mean": 0.0003627593423516373,
      "clip_ratio/low_mean": 0.0002987153034155199,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006614746437207941,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3686.0,
      "completions/mean_length": 582.3114013671875,
      "completions/mean_terminated_length": 526.53857421875,
      "completions/min_length": 133.0,
      "completions/min_terminated_length": 133.0,
      "epoch": 4.961213181685623,
      "grad_norm": 0.12109375,
      "learning_rate": 1e-06,
      "loss": 0.0084,
      "num_tokens": 308617995.0,
      "reward": 0.6484375,
      "reward_std": 0.1827705055475235,
      "rewards/verify_math_reward/mean": 0.6484375,
      "rewards/verify_math_reward/std": 0.4777248501777649,
      "step": 531
    },
    {
      "clip_ratio/high_max": 0.0015116052236407995,
      "clip_ratio/high_mean": 0.00044818270077939815,
      "clip_ratio/low_mean": 0.00041221798005608434,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008604006825407851,
      "completions/clipped_ratio": 0.0078125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2991.0,
      "completions/mean_length": 592.599365234375,
      "completions/mean_terminated_length": 565.0135498046875,
      "completions/min_length": 166.0,
      "completions/min_terminated_length": 166.0,
      "epoch": 4.970545348498105,
      "grad_norm": 0.1357421875,
      "learning_rate": 1e-06,
      "loss": 0.0136,
      "num_tokens": 309199980.0,
      "reward": 0.5401785969734192,
      "reward_std": 0.22398200631141663,
      "rewards/verify_math_reward/mean": 0.5401785969734192,
      "rewards/verify_math_reward/std": 0.49866142868995667,
      "step": 532
    },
    {
      "clip_ratio/high_max": 0.0017853919598564971,
      "clip_ratio/high_mean": 0.0004675755010339344,
      "clip_ratio/low_mean": 0.0002622930529696532,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007298685522982851,
      "completions/clipped_ratio": 0.0200892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2747.0,
      "completions/mean_length": 624.0546875,
      "completions/mean_terminated_length": 552.8758544921875,
      "completions/min_length": 42.0,
      "completions/min_terminated_length": 42.0,
      "epoch": 4.979877515310586,
      "grad_norm": 0.1328125,
      "learning_rate": 1e-06,
      "loss": 0.0118,
      "num_tokens": 309778109.0,
      "reward": 0.5424107313156128,
      "reward_std": 0.18701308965682983,
      "rewards/verify_math_reward/mean": 0.5424107313156128,
      "rewards/verify_math_reward/std": 0.4984763562679291,
      "step": 533
    },
    {
      "clip_ratio/high_max": 0.001457198579373653,
      "clip_ratio/high_mean": 0.00040370797762534494,
      "clip_ratio/low_mean": 0.0003826118379492982,
      "clip_ratio/low_min": 1.0232482054561842e-05,
      "clip_ratio/region_mean": 0.0007863198206905508,
      "completions/clipped_ratio": 0.024553571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4014.0,
      "completions/mean_length": 654.2199096679688,
      "completions/mean_terminated_length": 567.5846557617188,
      "completions/min_length": 64.0,
      "completions/min_terminated_length": 64.0,
      "epoch": 4.989209682123068,
      "grad_norm": 0.126953125,
      "learning_rate": 1e-06,
      "loss": -0.0015,
      "num_tokens": 310366386.0,
      "reward": 0.5022321939468384,
      "reward_std": 0.22303980588912964,
      "rewards/verify_math_reward/mean": 0.5022321343421936,
      "rewards/verify_math_reward/std": 0.5002742409706116,
      "step": 534
    },
    {
      "clip_ratio/high_max": 0.001458971493775607,
      "clip_ratio/high_mean": 0.00043968069712718716,
      "clip_ratio/low_mean": 0.00028276925286263577,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007224499568110332,
      "completions/clipped_ratio": 0.014204545454545414,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1857.0,
      "completions/mean_length": 556.3863525390625,
      "completions/mean_terminated_length": 505.3832702636719,
      "completions/min_length": 94.0,
      "completions/min_terminated_length": 94.0,
      "epoch": 4.99854184893555,
      "grad_norm": 0.1162109375,
      "learning_rate": 1e-06,
      "loss": 0.0132,
      "num_tokens": 310945778.0,
      "reward": 0.5703125,
      "reward_std": 0.17379067838191986,
      "rewards/verify_math_reward/mean": 0.5703125,
      "rewards/verify_math_reward/std": 0.49530795216560364,
      "step": 535
    },
    {
      "clip_ratio/high_max": 0.0017452636657253606,
      "clip_ratio/high_mean": 0.0005312801556556224,
      "clip_ratio/low_mean": 0.0003431514528529078,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008744316155571141,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3998.0,
      "completions/mean_length": 594.6629638671875,
      "completions/mean_terminated_length": 559.1364135742188,
      "completions/min_length": 104.0,
      "completions/min_terminated_length": 104.0,
      "epoch": 5.009332166812482,
      "grad_norm": 0.12890625,
      "learning_rate": 1e-06,
      "loss": 0.0051,
      "num_tokens": 311519324.0,
      "reward": 0.6127232313156128,
      "reward_std": 0.2122662216424942,
      "rewards/verify_math_reward/mean": 0.6127232313156128,
      "rewards/verify_math_reward/std": 0.4873998463153839,
      "step": 536
    },
    {
      "clip_ratio/high_max": 0.0015920778469080688,
      "clip_ratio/high_mean": 0.0004222492743792827,
      "clip_ratio/low_mean": 0.00038444910774160235,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008066983773460379,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3801.0,
      "completions/mean_length": 639.763427734375,
      "completions/mean_terminated_length": 572.919189453125,
      "completions/min_length": 67.0,
      "completions/min_terminated_length": 67.0,
      "epoch": 5.0186643336249634,
      "grad_norm": 0.1298828125,
      "learning_rate": 1e-06,
      "loss": 0.0025,
      "num_tokens": 312104640.0,
      "reward": 0.4966517984867096,
      "reward_std": 0.21425220370292664,
      "rewards/verify_math_reward/mean": 0.4966517984867096,
      "rewards/verify_math_reward/std": 0.5002680420875549,
      "step": 537
    },
    {
      "clip_ratio/high_max": 0.0012568632373586297,
      "clip_ratio/high_mean": 0.00036879289780245017,
      "clip_ratio/low_mean": 0.0003587537908060767,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007275466850842349,
      "completions/clipped_ratio": 0.029017857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4034.0,
      "completions/mean_length": 708.7288208007812,
      "completions/mean_terminated_length": 607.5,
      "completions/min_length": 143.0,
      "completions/min_terminated_length": 143.0,
      "epoch": 5.027996500437445,
      "grad_norm": 0.12255859375,
      "learning_rate": 1e-06,
      "loss": 0.0182,
      "num_tokens": 312723525.0,
      "reward": 0.5078125,
      "reward_std": 0.23345358669757843,
      "rewards/verify_math_reward/mean": 0.5078125,
      "rewards/verify_math_reward/std": 0.5002182126045227,
      "step": 538
    },
    {
      "clip_ratio/high_max": 0.0016520366107215523,
      "clip_ratio/high_mean": 0.0005098993252659056,
      "clip_ratio/low_mean": 0.000413374274103262,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009232736074409331,
      "completions/clipped_ratio": 0.0267857142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4070.0,
      "completions/mean_length": 638.755615234375,
      "completions/mean_terminated_length": 543.60205078125,
      "completions/min_length": 51.0,
      "completions/min_terminated_length": 51.0,
      "epoch": 5.037328667249927,
      "grad_norm": 0.146484375,
      "learning_rate": 1e-06,
      "loss": 0.0087,
      "num_tokens": 313290154.0,
      "reward": 0.5301339626312256,
      "reward_std": 0.23131422698497772,
      "rewards/verify_math_reward/mean": 0.5301339030265808,
      "rewards/verify_math_reward/std": 0.49936985969543457,
      "step": 539
    },
    {
      "clip_ratio/high_max": 0.0018554929847596213,
      "clip_ratio/high_mean": 0.0006344309333599085,
      "clip_ratio/low_mean": 0.000356605803972343,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009910367325574043,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2387.0,
      "completions/mean_length": 603.2064819335938,
      "completions/mean_terminated_length": 547.7653198242188,
      "completions/min_length": 4.0,
      "completions/min_terminated_length": 4.0,
      "epoch": 5.046660834062409,
      "grad_norm": 0.146484375,
      "learning_rate": 1e-06,
      "loss": 0.008,
      "num_tokens": 313856395.0,
      "reward": 0.59375,
      "reward_std": 0.25690361857414246,
      "rewards/verify_math_reward/mean": 0.59375,
      "rewards/verify_math_reward/std": 0.4914066195487976,
      "step": 540
    },
    {
      "clip_ratio/high_max": 0.0020592304954334395,
      "clip_ratio/high_mean": 0.0006356832564051729,
      "clip_ratio/low_mean": 0.00038154293611114554,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010172261918341974,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4073.0,
      "completions/mean_length": 584.7109375,
      "completions/mean_terminated_length": 541.0678100585938,
      "completions/min_length": 75.0,
      "completions/min_terminated_length": 75.0,
      "epoch": 5.05599300087489,
      "grad_norm": 0.1455078125,
      "learning_rate": 1e-06,
      "loss": 0.0037,
      "num_tokens": 314418168.0,
      "reward": 0.5680803656578064,
      "reward_std": 0.2286132574081421,
      "rewards/verify_math_reward/mean": 0.5680803656578064,
      "rewards/verify_math_reward/std": 0.4956200420856476,
      "step": 541
    },
    {
      "clip_ratio/high_max": 0.0015770561394674587,
      "clip_ratio/high_mean": 0.0004937669634728081,
      "clip_ratio/low_mean": 0.0003279400937117316,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008217070571845397,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2069.0,
      "completions/mean_length": 591.0022583007812,
      "completions/mean_terminated_length": 543.423095703125,
      "completions/min_length": 143.0,
      "completions/min_terminated_length": 143.0,
      "epoch": 5.065325167687372,
      "grad_norm": 0.126953125,
      "learning_rate": 1e-06,
      "loss": 0.0049,
      "num_tokens": 314981090.0,
      "reward": 0.535714328289032,
      "reward_std": 0.23198752105236053,
      "rewards/verify_math_reward/mean": 0.5357142686843872,
      "rewards/verify_math_reward/std": 0.4990014135837555,
      "step": 542
    },
    {
      "clip_ratio/high_max": 0.0014711573840031633,
      "clip_ratio/high_mean": 0.00045135759103231976,
      "clip_ratio/low_mean": 0.00039902060279928264,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008503782141815464,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3578.0,
      "completions/mean_length": 591.724365234375,
      "completions/mean_terminated_length": 552.1727294921875,
      "completions/min_length": 96.0,
      "completions/min_terminated_length": 96.0,
      "epoch": 5.074657334499854,
      "grad_norm": 0.134765625,
      "learning_rate": 1e-06,
      "loss": 0.0139,
      "num_tokens": 315557763.0,
      "reward": 0.5189732313156128,
      "reward_std": 0.23833489418029785,
      "rewards/verify_math_reward/mean": 0.5189732313156128,
      "rewards/verify_math_reward/std": 0.49991893768310547,
      "step": 543
    },
    {
      "clip_ratio/high_max": 0.0013870163129467983,
      "clip_ratio/high_mean": 0.0003142091329664254,
      "clip_ratio/low_mean": 0.00029854387867089827,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006127530073172238,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3082.0,
      "completions/mean_length": 593.5324096679688,
      "completions/mean_terminated_length": 557.9943237304688,
      "completions/min_length": 13.0,
      "completions/min_terminated_length": 13.0,
      "epoch": 5.083989501312336,
      "grad_norm": 0.11279296875,
      "learning_rate": 1e-06,
      "loss": -0.0015,
      "num_tokens": 316146208.0,
      "reward": 0.5524553656578064,
      "reward_std": 0.17405030131340027,
      "rewards/verify_math_reward/mean": 0.5524553656578064,
      "rewards/verify_math_reward/std": 0.49751853942871094,
      "step": 544
    },
    {
      "clip_ratio/high_max": 0.0009673911044956185,
      "clip_ratio/high_mean": 0.0002331657036620527,
      "clip_ratio/low_mean": 0.0002735016506676402,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0005066673679721134,
      "completions/clipped_ratio": 0.006696428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3570.0,
      "completions/mean_length": 591.6473388671875,
      "completions/mean_terminated_length": 568.0224609375,
      "completions/min_length": 142.0,
      "completions/min_terminated_length": 142.0,
      "epoch": 5.093321668124818,
      "grad_norm": 0.1064453125,
      "learning_rate": 1e-06,
      "loss": 0.0037,
      "num_tokens": 316751004.0,
      "reward": 0.4464285969734192,
      "reward_std": 0.15596871078014374,
      "rewards/verify_math_reward/mean": 0.4464285671710968,
      "rewards/verify_math_reward/std": 0.4973995089530945,
      "step": 545
    },
    {
      "clip_ratio/high_max": 0.0015565626936222543,
      "clip_ratio/high_mean": 0.00043005349834857043,
      "clip_ratio/low_mean": 0.00026414494732307503,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006941984429431614,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3617.0,
      "completions/mean_length": 588.6864013671875,
      "completions/mean_terminated_length": 528.9705200195312,
      "completions/min_length": 119.0,
      "completions/min_terminated_length": 119.0,
      "epoch": 5.1026538349373,
      "grad_norm": 0.12060546875,
      "learning_rate": 1e-06,
      "loss": -0.0097,
      "num_tokens": 317299995.0,
      "reward": 0.5770089626312256,
      "reward_std": 0.19387967884540558,
      "rewards/verify_math_reward/mean": 0.5770089030265808,
      "rewards/verify_math_reward/std": 0.4943099617958069,
      "step": 546
    },
    {
      "clip_ratio/high_max": 0.0015786259682499804,
      "clip_ratio/high_mean": 0.0005050115909170927,
      "clip_ratio/low_mean": 0.00023925772779875842,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007442693167831749,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4042.0,
      "completions/mean_length": 591.5971069335938,
      "completions/mean_terminated_length": 531.9307861328125,
      "completions/min_length": 3.0,
      "completions/min_terminated_length": 3.0,
      "epoch": 5.111986001749782,
      "grad_norm": 0.1357421875,
      "learning_rate": 1e-06,
      "loss": -0.0068,
      "num_tokens": 317858642.0,
      "reward": 0.5803571939468384,
      "reward_std": 0.18228377401828766,
      "rewards/verify_math_reward/mean": 0.5803571343421936,
      "rewards/verify_math_reward/std": 0.4937761127948761,
      "step": 547
    },
    {
      "clip_ratio/high_max": 0.0016671581252012402,
      "clip_ratio/high_mean": 0.0005753812163220573,
      "clip_ratio/low_mean": 0.00031317391835727904,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008885551487765042,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1895.0,
      "completions/mean_length": 567.0658569335938,
      "completions/mean_terminated_length": 535.273681640625,
      "completions/min_length": 130.0,
      "completions/min_terminated_length": 130.0,
      "epoch": 5.121318168562263,
      "grad_norm": 0.134765625,
      "learning_rate": 1e-06,
      "loss": 0.0003,
      "num_tokens": 318415933.0,
      "reward": 0.6004464626312256,
      "reward_std": 0.22492702305316925,
      "rewards/verify_math_reward/mean": 0.6004464030265808,
      "rewards/verify_math_reward/std": 0.49008017778396606,
      "step": 548
    },
    {
      "clip_ratio/high_max": 0.001725742742564762,
      "clip_ratio/high_mean": 0.000571799853560151,
      "clip_ratio/low_mean": 0.000325643508858775,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008974433749244781,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3755.0,
      "completions/mean_length": 642.2277221679688,
      "completions/mean_terminated_length": 595.3439331054688,
      "completions/min_length": 117.0,
      "completions/min_terminated_length": 117.0,
      "epoch": 5.130650335374745,
      "grad_norm": 0.1259765625,
      "learning_rate": 1e-06,
      "loss": 0.0004,
      "num_tokens": 319033209.0,
      "reward": 0.5613839626312256,
      "reward_std": 0.22974373400211334,
      "rewards/verify_math_reward/mean": 0.5613839030265808,
      "rewards/verify_math_reward/std": 0.496494859457016,
      "step": 549
    },
    {
      "clip_ratio/high_max": 0.0011382587290427182,
      "clip_ratio/high_mean": 0.00030806649988335266,
      "clip_ratio/low_mean": 0.0002933993080205255,
      "clip_ratio/low_min": 1.544735459901858e-05,
      "clip_ratio/region_mean": 0.0006014658119966043,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4047.0,
      "completions/mean_length": 592.9832763671875,
      "completions/mean_terminated_length": 553.4458618164062,
      "completions/min_length": 123.0,
      "completions/min_terminated_length": 123.0,
      "epoch": 5.139982502187227,
      "grad_norm": 0.11181640625,
      "learning_rate": 1e-06,
      "loss": 0.0079,
      "num_tokens": 319622818.0,
      "reward": 0.4520089626312256,
      "reward_std": 0.15977369248867035,
      "rewards/verify_math_reward/mean": 0.4520089328289032,
      "rewards/verify_math_reward/std": 0.49796947836875916,
      "step": 550
    },
    {
      "clip_ratio/high_max": 0.0018136503895220812,
      "clip_ratio/high_mean": 0.00048139415093828575,
      "clip_ratio/low_mean": 0.0003645178137503535,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008459119626422762,
      "completions/clipped_ratio": 0.0078125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2129.0,
      "completions/mean_length": 545.9553833007812,
      "completions/mean_terminated_length": 518.0022583007812,
      "completions/min_length": 59.0,
      "completions/min_terminated_length": 59.0,
      "epoch": 5.1493146689997085,
      "grad_norm": 0.1279296875,
      "learning_rate": 1e-06,
      "loss": -0.0046,
      "num_tokens": 320171658.0,
      "reward": 0.5892857313156128,
      "reward_std": 0.18333503603935242,
      "rewards/verify_math_reward/mean": 0.5892857313156128,
      "rewards/verify_math_reward/std": 0.49223825335502625,
      "step": 551
    },
    {
      "clip_ratio/high_max": 0.0017637156015553046,
      "clip_ratio/high_mean": 0.0005619154640044144,
      "clip_ratio/low_mean": 0.00030062308655942616,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008625385489722248,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3447.0,
      "completions/mean_length": 636.1517944335938,
      "completions/mean_terminated_length": 601.0462036132812,
      "completions/min_length": 165.0,
      "completions/min_terminated_length": 165.0,
      "epoch": 5.15864683581219,
      "grad_norm": 0.1171875,
      "learning_rate": 1e-06,
      "loss": -0.0031,
      "num_tokens": 320792066.0,
      "reward": 0.5256696939468384,
      "reward_std": 0.22361180186271667,
      "rewards/verify_math_reward/mean": 0.5256696343421936,
      "rewards/verify_math_reward/std": 0.4996195435523987,
      "step": 552
    },
    {
      "clip_ratio/high_max": 0.0013090748489048565,
      "clip_ratio/high_mean": 0.00036141685393431544,
      "clip_ratio/low_mean": 0.00035929172508986085,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007207085664049373,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3668.0,
      "completions/mean_length": 593.4386596679688,
      "completions/mean_terminated_length": 557.899658203125,
      "completions/min_length": 140.0,
      "completions/min_terminated_length": 140.0,
      "epoch": 5.167979002624672,
      "grad_norm": 0.12451171875,
      "learning_rate": 1e-06,
      "loss": 0.0042,
      "num_tokens": 321373147.0,
      "reward": 0.551339328289032,
      "reward_std": 0.20215661823749542,
      "rewards/verify_math_reward/mean": 0.5513392686843872,
      "rewards/verify_math_reward/std": 0.4976350665092468,
      "step": 553
    },
    {
      "clip_ratio/high_max": 0.001539472751574067,
      "clip_ratio/high_mean": 0.00043860073469659255,
      "clip_ratio/low_mean": 0.0003451004399721569,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000783701170803397,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3065.0,
      "completions/mean_length": 628.9453125,
      "completions/mean_terminated_length": 569.9149169921875,
      "completions/min_length": 3.0,
      "completions/min_terminated_length": 3.0,
      "epoch": 5.177311169437154,
      "grad_norm": 0.11865234375,
      "learning_rate": 1e-06,
      "loss": -0.0004,
      "num_tokens": 321962658.0,
      "reward": 0.5212053656578064,
      "reward_std": 0.20426209270954132,
      "rewards/verify_math_reward/mean": 0.5212053656578064,
      "rewards/verify_math_reward/std": 0.49982914328575134,
      "step": 554
    },
    {
      "clip_ratio/high_max": 0.0013414804416242987,
      "clip_ratio/high_mean": 0.00043196804767831054,
      "clip_ratio/low_mean": 0.00035095157750220096,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007829196079001122,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3377.0,
      "completions/mean_length": 633.3236694335938,
      "completions/mean_terminated_length": 574.3677978515625,
      "completions/min_length": 115.0,
      "completions/min_terminated_length": 115.0,
      "epoch": 5.186643336249635,
      "grad_norm": 0.11962890625,
      "learning_rate": 1e-06,
      "loss": 0.0026,
      "num_tokens": 322554580.0,
      "reward": 0.5334821939468384,
      "reward_std": 0.2213120013475418,
      "rewards/verify_math_reward/mean": 0.5334821343421936,
      "rewards/verify_math_reward/std": 0.49915629625320435,
      "step": 555
    },
    {
      "clip_ratio/high_max": 0.0014922396949259564,
      "clip_ratio/high_mean": 0.0004467867923949598,
      "clip_ratio/low_mean": 0.00029836162491392315,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007451484184457513,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3797.0,
      "completions/mean_length": 591.4475708007812,
      "completions/mean_terminated_length": 547.8881225585938,
      "completions/min_length": 105.0,
      "completions/min_terminated_length": 105.0,
      "epoch": 5.195975503062117,
      "grad_norm": 0.12451171875,
      "learning_rate": 1e-06,
      "loss": -0.0034,
      "num_tokens": 323130573.0,
      "reward": 0.6160714626312256,
      "reward_std": 0.20819656550884247,
      "rewards/verify_math_reward/mean": 0.6160714030265808,
      "rewards/verify_math_reward/std": 0.486612468957901,
      "step": 556
    },
    {
      "clip_ratio/high_max": 0.001416543123923475,
      "clip_ratio/high_mean": 0.0004352897617536655,
      "clip_ratio/low_mean": 0.0003223823791813629,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007576721350233129,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2090.0,
      "completions/mean_length": 653.6484375,
      "completions/mean_terminated_length": 595.0386352539062,
      "completions/min_length": 3.0,
      "completions/min_terminated_length": 3.0,
      "epoch": 5.205307669874599,
      "grad_norm": 0.12353515625,
      "learning_rate": 1e-06,
      "loss": -0.0033,
      "num_tokens": 323743770.0,
      "reward": 0.5290178656578064,
      "reward_std": 0.2077540010213852,
      "rewards/verify_math_reward/mean": 0.5290178656578064,
      "rewards/verify_math_reward/std": 0.49943605065345764,
      "step": 557
    },
    {
      "clip_ratio/high_max": 0.0016756152781454148,
      "clip_ratio/high_mean": 0.0005096179843349091,
      "clip_ratio/low_mean": 0.0005116133927458577,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001021231357299257,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3703.0,
      "completions/mean_length": 607.3236694335938,
      "completions/mean_terminated_length": 555.9614868164062,
      "completions/min_length": 74.0,
      "completions/min_terminated_length": 74.0,
      "epoch": 5.2146398366870805,
      "grad_norm": 0.14453125,
      "learning_rate": 1e-06,
      "loss": -0.0082,
      "num_tokens": 324330212.0,
      "reward": 0.5613839626312256,
      "reward_std": 0.2486380934715271,
      "rewards/verify_math_reward/mean": 0.5613839030265808,
      "rewards/verify_math_reward/std": 0.496494859457016,
      "step": 558
    },
    {
      "clip_ratio/high_max": 0.0015683017918490805,
      "clip_ratio/high_mean": 0.0004760605506817228,
      "clip_ratio/low_mean": 0.0004015500310288189,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008776105796641787,
      "completions/clipped_ratio": 0.021205357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4070.0,
      "completions/mean_length": 646.7645263671875,
      "completions/mean_terminated_length": 572.03759765625,
      "completions/min_length": 129.0,
      "completions/min_terminated_length": 129.0,
      "epoch": 5.223972003499562,
      "grad_norm": 0.1357421875,
      "learning_rate": 1e-06,
      "loss": 0.0023,
      "num_tokens": 324917201.0,
      "reward": 0.5580357313156128,
      "reward_std": 0.23758603632450104,
      "rewards/verify_math_reward/mean": 0.5580357313156128,
      "rewards/verify_math_reward/std": 0.49689778685569763,
      "step": 559
    },
    {
      "clip_ratio/high_max": 0.0014849870021862444,
      "clip_ratio/high_mean": 0.0004950290035594662,
      "clip_ratio/low_mean": 0.0002708091083150066,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007658381196051778,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2493.0,
      "completions/mean_length": 614.6495971679688,
      "completions/mean_terminated_length": 547.3196411132812,
      "completions/min_length": 32.0,
      "completions/min_terminated_length": 32.0,
      "epoch": 5.233304170312044,
      "grad_norm": 0.126953125,
      "learning_rate": 1e-06,
      "loss": 0.0002,
      "num_tokens": 325488015.0,
      "reward": 0.4888392984867096,
      "reward_std": 0.19798003137111664,
      "rewards/verify_math_reward/mean": 0.4888392984867096,
      "rewards/verify_math_reward/std": 0.5001546144485474,
      "step": 560
    },
    {
      "clip_ratio/high_max": 0.0013966550609438855,
      "clip_ratio/high_mean": 0.00037200242871904265,
      "clip_ratio/low_mean": 0.00021488170978045673,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0005868841367373534,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4033.0,
      "completions/mean_length": 630.3069458007812,
      "completions/mean_terminated_length": 567.2943115234375,
      "completions/min_length": 129.0,
      "completions/min_terminated_length": 129.0,
      "epoch": 5.242636337124526,
      "grad_norm": 0.119140625,
      "learning_rate": 1e-06,
      "loss": 0.0173,
      "num_tokens": 326078362.0,
      "reward": 0.582589328289032,
      "reward_std": 0.16311585903167725,
      "rewards/verify_math_reward/mean": 0.5825892686843872,
      "rewards/verify_math_reward/std": 0.4934072494506836,
      "step": 561
    },
    {
      "clip_ratio/high_max": 0.0016356369869754417,
      "clip_ratio/high_mean": 0.0005107918336761941,
      "clip_ratio/low_mean": 0.00031867606321611675,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000829467900985037,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1975.0,
      "completions/mean_length": 573.1685791015625,
      "completions/mean_terminated_length": 541.4313354492188,
      "completions/min_length": 11.0,
      "completions/min_terminated_length": 11.0,
      "epoch": 5.251968503937007,
      "grad_norm": 0.1337890625,
      "learning_rate": 1e-06,
      "loss": 0.0079,
      "num_tokens": 326644473.0,
      "reward": 0.5345982313156128,
      "reward_std": 0.2242858111858368,
      "rewards/verify_math_reward/mean": 0.5345982313156128,
      "rewards/verify_math_reward/std": 0.4990801215171814,
      "step": 562
    },
    {
      "clip_ratio/high_max": 0.0015790890902280807,
      "clip_ratio/high_mean": 0.0005361258564562377,
      "clip_ratio/low_mean": 0.00045058669729769463,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009867125536402455,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2186.0,
      "completions/mean_length": 601.5480346679688,
      "completions/mean_terminated_length": 554.1119995117188,
      "completions/min_length": 151.0,
      "completions/min_terminated_length": 151.0,
      "epoch": 5.26130067074949,
      "grad_norm": 0.1435546875,
      "learning_rate": 1e-06,
      "loss": 0.0102,
      "num_tokens": 327219580.0,
      "reward": 0.5145089626312256,
      "reward_std": 0.2533339262008667,
      "rewards/verify_math_reward/mean": 0.5145089030265808,
      "rewards/verify_math_reward/std": 0.5000686049461365,
      "step": 563
    },
    {
      "clip_ratio/high_max": 0.0015544271627732087,
      "clip_ratio/high_mean": 0.00044894331995237735,
      "clip_ratio/low_mean": 0.0003341211422593915,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007830644690329791,
      "completions/clipped_ratio": 0.0200892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2676.0,
      "completions/mean_length": 673.8928833007812,
      "completions/mean_terminated_length": 603.7357788085938,
      "completions/min_length": 157.0,
      "completions/min_terminated_length": 157.0,
      "epoch": 5.270632837561972,
      "grad_norm": 0.125,
      "learning_rate": 1e-06,
      "loss": 0.0064,
      "num_tokens": 327849308.0,
      "reward": 0.447544664144516,
      "reward_std": 0.21725627779960632,
      "rewards/verify_math_reward/mean": 0.4475446343421936,
      "rewards/verify_math_reward/std": 0.49751853942871094,
      "step": 564
    },
    {
      "clip_ratio/high_max": 0.0016612288181931945,
      "clip_ratio/high_mean": 0.00047205458122334676,
      "clip_ratio/low_mean": 0.0005131374111897458,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009851919849097612,
      "completions/clipped_ratio": 0.006696428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2925.0,
      "completions/mean_length": 568.7265625,
      "completions/mean_terminated_length": 544.9472045898438,
      "completions/min_length": 11.0,
      "completions/min_terminated_length": 11.0,
      "epoch": 5.2799650043744535,
      "grad_norm": 0.1416015625,
      "learning_rate": 1e-06,
      "loss": 0.0042,
      "num_tokens": 328425319.0,
      "reward": 0.535714328289032,
      "reward_std": 0.25066685676574707,
      "rewards/verify_math_reward/mean": 0.5357142686843872,
      "rewards/verify_math_reward/std": 0.4990013837814331,
      "step": 565
    },
    {
      "clip_ratio/high_max": 0.0017810704557632562,
      "clip_ratio/high_mean": 0.0004967451700395031,
      "clip_ratio/low_mean": 0.00039699774970358703,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008937429438446998,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2700.0,
      "completions/mean_length": 581.0670166015625,
      "completions/mean_terminated_length": 545.4024658203125,
      "completions/min_length": 103.0,
      "completions/min_terminated_length": 103.0,
      "epoch": 5.289297171186935,
      "grad_norm": 0.1357421875,
      "learning_rate": 1e-06,
      "loss": 0.0142,
      "num_tokens": 328994995.0,
      "reward": 0.5691964626312256,
      "reward_std": 0.2088063806295395,
      "rewards/verify_math_reward/mean": 0.5691964030265808,
      "rewards/verify_math_reward/std": 0.4954652488231659,
      "step": 566
    },
    {
      "clip_ratio/high_max": 0.0017589321250852663,
      "clip_ratio/high_mean": 0.0004996654330398087,
      "clip_ratio/low_mean": 0.0002853683243984051,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007850337747186131,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3904.0,
      "completions/mean_length": 624.4319458007812,
      "completions/mean_terminated_length": 573.3215942382812,
      "completions/min_length": 105.0,
      "completions/min_terminated_length": 105.0,
      "epoch": 5.298629337999417,
      "grad_norm": 0.126953125,
      "learning_rate": 1e-06,
      "loss": -0.0016,
      "num_tokens": 329586702.0,
      "reward": 0.515625,
      "reward_std": 0.24036115407943726,
      "rewards/verify_math_reward/mean": 0.515625,
      "rewards/verify_math_reward/std": 0.5000349283218384,
      "step": 567
    },
    {
      "clip_ratio/high_max": 0.001833881327911513,
      "clip_ratio/high_mean": 0.0005528118585971242,
      "clip_ratio/low_mean": 0.00028448922512325225,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008373010800823977,
      "completions/clipped_ratio": 0.021205357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2299.0,
      "completions/mean_length": 616.3125,
      "completions/mean_terminated_length": 540.9258422851562,
      "completions/min_length": 91.0,
      "completions/min_terminated_length": 91.0,
      "epoch": 5.307961504811899,
      "grad_norm": 0.123046875,
      "learning_rate": 1e-06,
      "loss": -0.0037,
      "num_tokens": 330147854.0,
      "reward": 0.582589328289032,
      "reward_std": 0.1862967312335968,
      "rewards/verify_math_reward/mean": 0.5825892686843872,
      "rewards/verify_math_reward/std": 0.4934072494506836,
      "step": 568
    },
    {
      "clip_ratio/high_max": 0.0013719777434744174,
      "clip_ratio/high_mean": 0.00037502635007058416,
      "clip_ratio/low_mean": 0.0003804965200515653,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007555228744422493,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4067.0,
      "completions/mean_length": 627.7366333007812,
      "completions/mean_terminated_length": 568.6856079101562,
      "completions/min_length": 176.0,
      "completions/min_terminated_length": 176.0,
      "epoch": 5.31729367162438,
      "grad_norm": 0.1259765625,
      "learning_rate": 1e-06,
      "loss": 0.0043,
      "num_tokens": 330741122.0,
      "reward": 0.5256696939468384,
      "reward_std": 0.2009527087211609,
      "rewards/verify_math_reward/mean": 0.5256696343421936,
      "rewards/verify_math_reward/std": 0.4996195435523987,
      "step": 569
    },
    {
      "clip_ratio/high_max": 0.0016941607818807825,
      "clip_ratio/high_mean": 0.0005355355674510065,
      "clip_ratio/low_mean": 0.00033066106607293477,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008661966294312151,
      "completions/clipped_ratio": 0.021205357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3053.0,
      "completions/mean_length": 647.3850708007812,
      "completions/mean_terminated_length": 572.6715698242188,
      "completions/min_length": 153.0,
      "completions/min_terminated_length": 153.0,
      "epoch": 5.326625838436862,
      "grad_norm": 0.12890625,
      "learning_rate": 1e-06,
      "loss": 0.0089,
      "num_tokens": 331328323.0,
      "reward": 0.5290178656578064,
      "reward_std": 0.22282226383686066,
      "rewards/verify_math_reward/mean": 0.5290178656578064,
      "rewards/verify_math_reward/std": 0.49943605065345764,
      "step": 570
    },
    {
      "clip_ratio/high_max": 0.0018209992931588204,
      "clip_ratio/high_mean": 0.0005488518972924794,
      "clip_ratio/low_mean": 0.0004011082201031968,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009499601151219395,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3269.0,
      "completions/mean_length": 617.2042846679688,
      "completions/mean_terminated_length": 549.9237670898438,
      "completions/min_length": 129.0,
      "completions/min_terminated_length": 129.0,
      "epoch": 5.335958005249344,
      "grad_norm": 0.1337890625,
      "learning_rate": 1e-06,
      "loss": 0.0103,
      "num_tokens": 331893930.0,
      "reward": 0.574776828289032,
      "reward_std": 0.2292449027299881,
      "rewards/verify_math_reward/mean": 0.5747767686843872,
      "rewards/verify_math_reward/std": 0.49465295672416687,
      "step": 571
    },
    {
      "clip_ratio/high_max": 0.0015630527213943424,
      "clip_ratio/high_mean": 0.0004424715377808752,
      "clip_ratio/low_mean": 0.00027531418140824826,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007177857105489238,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2148.0,
      "completions/mean_length": 590.4631958007812,
      "completions/mean_terminated_length": 550.8972778320312,
      "completions/min_length": 111.0,
      "completions/min_terminated_length": 111.0,
      "epoch": 5.3452901720618256,
      "grad_norm": 0.12109375,
      "learning_rate": 1e-06,
      "loss": 0.0065,
      "num_tokens": 332474713.0,
      "reward": 0.4966517984867096,
      "reward_std": 0.21894694864749908,
      "rewards/verify_math_reward/mean": 0.4966517984867096,
      "rewards/verify_math_reward/std": 0.5002680420875549,
      "step": 572
    },
    {
      "clip_ratio/high_max": 0.0016773431261754013,
      "clip_ratio/high_mean": 0.0005180152492130219,
      "clip_ratio/low_mean": 0.0003890500597663049,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000907065300452814,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3431.0,
      "completions/mean_length": 608.5100708007812,
      "completions/mean_terminated_length": 549.1317138671875,
      "completions/min_length": 87.0,
      "completions/min_terminated_length": 87.0,
      "epoch": 5.354622338874307,
      "grad_norm": 0.126953125,
      "learning_rate": 1e-06,
      "loss": 0.0136,
      "num_tokens": 333053074.0,
      "reward": 0.590401828289032,
      "reward_std": 0.22781910002231598,
      "rewards/verify_math_reward/mean": 0.5904017686843872,
      "rewards/verify_math_reward/std": 0.49203425645828247,
      "step": 573
    },
    {
      "clip_ratio/high_max": 0.0015063100163388299,
      "clip_ratio/high_mean": 0.00047234158000719617,
      "clip_ratio/low_mean": 0.000319266402584617,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007916079744063609,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2021.0,
      "completions/mean_length": 548.0,
      "completions/mean_terminated_length": 503.90057373046875,
      "completions/min_length": 20.0,
      "completions/min_terminated_length": 20.0,
      "epoch": 5.363954505686789,
      "grad_norm": 0.1396484375,
      "learning_rate": 1e-06,
      "loss": 0.0094,
      "num_tokens": 333589578.0,
      "reward": 0.5613839626312256,
      "reward_std": 0.19550618529319763,
      "rewards/verify_math_reward/mean": 0.5613839030265808,
      "rewards/verify_math_reward/std": 0.496494859457016,
      "step": 574
    },
    {
      "clip_ratio/high_max": 0.001743977880323655,
      "clip_ratio/high_mean": 0.0005185850332054542,
      "clip_ratio/low_mean": 0.0002559582160301943,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007745432667434216,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3274.0,
      "completions/mean_length": 654.4375,
      "completions/mean_terminated_length": 591.8636474609375,
      "completions/min_length": 92.0,
      "completions/min_terminated_length": 92.0,
      "epoch": 5.373286672499271,
      "grad_norm": 0.11767578125,
      "learning_rate": 1e-06,
      "loss": -0.0099,
      "num_tokens": 334202898.0,
      "reward": 0.5212053656578064,
      "reward_std": 0.20478273928165436,
      "rewards/verify_math_reward/mean": 0.5212053656578064,
      "rewards/verify_math_reward/std": 0.49982914328575134,
      "step": 575
    },
    {
      "clip_ratio/high_max": 0.0017537659587105736,
      "clip_ratio/high_mean": 0.0004901738057014882,
      "clip_ratio/low_mean": 0.0003412311627926101,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008314049746331875,
      "completions/clipped_ratio": 0.0200892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2358.0,
      "completions/mean_length": 621.1038208007812,
      "completions/mean_terminated_length": 549.864501953125,
      "completions/min_length": 15.0,
      "completions/min_terminated_length": 15.0,
      "epoch": 5.3826188393117524,
      "grad_norm": 0.126953125,
      "learning_rate": 1e-06,
      "loss": 0.0019,
      "num_tokens": 334782223.0,
      "reward": 0.5502232313156128,
      "reward_std": 0.22882941365242004,
      "rewards/verify_math_reward/mean": 0.5502232313156128,
      "rewards/verify_math_reward/std": 0.49774909019470215,
      "step": 576
    },
    {
      "clip_ratio/high_max": 0.0012921654160891194,
      "clip_ratio/high_mean": 0.0003068975581754785,
      "clip_ratio/low_mean": 0.0002428725406389276,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0005497700994965271,
      "completions/clipped_ratio": 0.0200892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3291.0,
      "completions/mean_length": 640.5736694335938,
      "completions/mean_terminated_length": 569.7335205078125,
      "completions/min_length": 107.0,
      "completions/min_terminated_length": 107.0,
      "epoch": 5.391951006124234,
      "grad_norm": 0.1171875,
      "learning_rate": 1e-06,
      "loss": -0.0052,
      "num_tokens": 335369025.0,
      "reward": 0.6026785969734192,
      "reward_std": 0.1888531893491745,
      "rewards/verify_math_reward/mean": 0.6026785969734192,
      "rewards/verify_math_reward/std": 0.48961687088012695,
      "step": 577
    },
    {
      "clip_ratio/high_max": 0.0015022727311588824,
      "clip_ratio/high_mean": 0.00045029936018181616,
      "clip_ratio/low_mean": 0.000329555451799024,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007798548226674029,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2838.0,
      "completions/mean_length": 577.747802734375,
      "completions/mean_terminated_length": 542.049560546875,
      "completions/min_length": 33.0,
      "completions/min_terminated_length": 33.0,
      "epoch": 5.401283172936716,
      "grad_norm": 0.12255859375,
      "learning_rate": 1e-06,
      "loss": 0.0027,
      "num_tokens": 335945447.0,
      "reward": 0.5870535969734192,
      "reward_std": 0.20580121874809265,
      "rewards/verify_math_reward/mean": 0.5870535969734192,
      "rewards/verify_math_reward/std": 0.49263837933540344,
      "step": 578
    },
    {
      "clip_ratio/high_max": 0.0015232671194098657,
      "clip_ratio/high_mean": 0.0004816653081434197,
      "clip_ratio/low_mean": 0.0003052244320542741,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000786889742812491,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4004.0,
      "completions/mean_length": 665.099365234375,
      "completions/mean_terminated_length": 598.7451171875,
      "completions/min_length": 137.0,
      "completions/min_terminated_length": 137.0,
      "epoch": 5.410615339749198,
      "grad_norm": 0.1279296875,
      "learning_rate": 1e-06,
      "loss": 0.0147,
      "num_tokens": 336565784.0,
      "reward": 0.5189732313156128,
      "reward_std": 0.21312426030635834,
      "rewards/verify_math_reward/mean": 0.5189732313156128,
      "rewards/verify_math_reward/std": 0.49991893768310547,
      "step": 579
    },
    {
      "clip_ratio/high_max": 0.0018734333134489134,
      "clip_ratio/high_mean": 0.0005695729123544879,
      "clip_ratio/low_mean": 0.00034296873502626113,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009125416563620092,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3944.0,
      "completions/mean_length": 627.1060791015625,
      "completions/mean_terminated_length": 560.0170288085938,
      "completions/min_length": 113.0,
      "completions/min_terminated_length": 113.0,
      "epoch": 5.41994750656168,
      "grad_norm": 0.1455078125,
      "learning_rate": 1e-06,
      "loss": 0.0091,
      "num_tokens": 337143775.0,
      "reward": 0.5569196939468384,
      "reward_std": 0.22883333265781403,
      "rewards/verify_math_reward/mean": 0.5569196343421936,
      "rewards/verify_math_reward/std": 0.4970270097255707,
      "step": 580
    },
    {
      "clip_ratio/high_max": 0.0016861082176546915,
      "clip_ratio/high_mean": 0.0004648963398494743,
      "clip_ratio/low_mean": 0.0003388802524568746,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008037765906010463,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3145.0,
      "completions/mean_length": 586.6964721679688,
      "completions/mean_terminated_length": 543.0780029296875,
      "completions/min_length": 141.0,
      "completions/min_terminated_length": 141.0,
      "epoch": 5.429279673374162,
      "grad_norm": 0.12109375,
      "learning_rate": 1e-06,
      "loss": 0.0076,
      "num_tokens": 337710623.0,
      "reward": 0.559151828289032,
      "reward_std": 0.20156000554561615,
      "rewards/verify_math_reward/mean": 0.5591517686843872,
      "rewards/verify_math_reward/std": 0.496766060590744,
      "step": 581
    },
    {
      "clip_ratio/high_max": 0.001424003468855517,
      "clip_ratio/high_mean": 0.00037773066139834555,
      "clip_ratio/low_mean": 0.0003189537942489551,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006966844571252295,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2821.0,
      "completions/mean_length": 643.5580444335938,
      "completions/mean_terminated_length": 564.735107421875,
      "completions/min_length": 125.0,
      "completions/min_terminated_length": 125.0,
      "epoch": 5.438611840186644,
      "grad_norm": 0.1240234375,
      "learning_rate": 1e-06,
      "loss": 0.0234,
      "num_tokens": 338286859.0,
      "reward": 0.5703125,
      "reward_std": 0.18426935374736786,
      "rewards/verify_math_reward/mean": 0.5703125,
      "rewards/verify_math_reward/std": 0.49530795216560364,
      "step": 582
    },
    {
      "clip_ratio/high_max": 0.0012338231508692843,
      "clip_ratio/high_mean": 0.00044580090559520613,
      "clip_ratio/low_mean": 0.00031469859845856263,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007604994998473558,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2355.0,
      "completions/mean_length": 579.0324096679688,
      "completions/mean_terminated_length": 535.3186645507812,
      "completions/min_length": 6.0,
      "completions/min_terminated_length": 6.0,
      "epoch": 5.447944006999125,
      "grad_norm": 0.130859375,
      "learning_rate": 1e-06,
      "loss": -0.0006,
      "num_tokens": 338843504.0,
      "reward": 0.5792410969734192,
      "reward_std": 0.22800594568252563,
      "rewards/verify_math_reward/mean": 0.5792410969734192,
      "rewards/verify_math_reward/std": 0.49395665526390076,
      "step": 583
    },
    {
      "clip_ratio/high_max": 0.0021068601017759647,
      "clip_ratio/high_mean": 0.0006619417997626442,
      "clip_ratio/low_mean": 0.00037777678517159075,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001039718593347061,
      "completions/clipped_ratio": 0.0078125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1972.0,
      "completions/mean_length": 582.5614013671875,
      "completions/mean_terminated_length": 554.8965454101562,
      "completions/min_length": 67.0,
      "completions/min_terminated_length": 67.0,
      "epoch": 5.457276173811607,
      "grad_norm": 0.1376953125,
      "learning_rate": 1e-06,
      "loss": 0.0056,
      "num_tokens": 339421447.0,
      "reward": 0.5345982313156128,
      "reward_std": 0.24487334489822388,
      "rewards/verify_math_reward/mean": 0.5345982313156128,
      "rewards/verify_math_reward/std": 0.4990801215171814,
      "step": 584
    },
    {
      "clip_ratio/high_max": 0.0016412534496339504,
      "clip_ratio/high_mean": 0.0004804542854799365,
      "clip_ratio/low_mean": 0.000321047708439437,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008015020030143205,
      "completions/clipped_ratio": 0.021205357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3328.0,
      "completions/mean_length": 660.6205444335938,
      "completions/mean_terminated_length": 586.19384765625,
      "completions/min_length": 4.0,
      "completions/min_terminated_length": 4.0,
      "epoch": 5.466608340624089,
      "grad_norm": 0.11962890625,
      "learning_rate": 1e-06,
      "loss": -0.0003,
      "num_tokens": 340032507.0,
      "reward": 0.5234375,
      "reward_std": 0.2059527486562729,
      "rewards/verify_math_reward/mean": 0.5234375,
      "rewards/verify_math_reward/std": 0.49972933530807495,
      "step": 585
    },
    {
      "clip_ratio/high_max": 0.0015810954027983826,
      "clip_ratio/high_mean": 0.0005144460722021904,
      "clip_ratio/low_mean": 0.0003890473876708711,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009034934582814458,
      "completions/clipped_ratio": 0.0200892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2884.0,
      "completions/mean_length": 613.6808471679688,
      "completions/mean_terminated_length": 542.289306640625,
      "completions/min_length": 151.0,
      "completions/min_terminated_length": 151.0,
      "epoch": 5.475940507436571,
      "grad_norm": 0.1318359375,
      "learning_rate": 1e-06,
      "loss": 0.0063,
      "num_tokens": 340596373.0,
      "reward": 0.515625,
      "reward_std": 0.22308439016342163,
      "rewards/verify_math_reward/mean": 0.515625,
      "rewards/verify_math_reward/std": 0.5000349283218384,
      "step": 586
    },
    {
      "clip_ratio/high_max": 0.0014493896469502943,
      "clip_ratio/high_mean": 0.0003946472652387456,
      "clip_ratio/low_mean": 0.000327332647430012,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007219799072117894,
      "completions/clipped_ratio": 0.006696428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3835.0,
      "completions/mean_length": 624.6842041015625,
      "completions/mean_terminated_length": 601.2820434570312,
      "completions/min_length": 145.0,
      "completions/min_terminated_length": 145.0,
      "epoch": 5.485272674249052,
      "grad_norm": 0.11767578125,
      "learning_rate": 1e-06,
      "loss": 0.0147,
      "num_tokens": 341230130.0,
      "reward": 0.4877232313156128,
      "reward_std": 0.18952901661396027,
      "rewards/verify_math_reward/mean": 0.4877232015132904,
      "rewards/verify_math_reward/std": 0.500128448009491,
      "step": 587
    },
    {
      "clip_ratio/high_max": 0.0013787154102828936,
      "clip_ratio/high_mean": 0.0004109978194719588,
      "clip_ratio/low_mean": 0.0003107044553871674,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007217022816803365,
      "completions/clipped_ratio": 0.024553571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3268.0,
      "completions/mean_length": 653.9553833007812,
      "completions/mean_terminated_length": 567.3134765625,
      "completions/min_length": 5.0,
      "completions/min_terminated_length": 5.0,
      "epoch": 5.494604841061534,
      "grad_norm": 0.126953125,
      "learning_rate": 1e-06,
      "loss": 0.0004,
      "num_tokens": 341823634.0,
      "reward": 0.5212053656578064,
      "reward_std": 0.22240857779979706,
      "rewards/verify_math_reward/mean": 0.5212053656578064,
      "rewards/verify_math_reward/std": 0.49982914328575134,
      "step": 588
    },
    {
      "clip_ratio/high_max": 0.0016527361967746401,
      "clip_ratio/high_mean": 0.000604953545916942,
      "clip_ratio/low_mean": 0.0003452273822404095,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009501809254288673,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3403.0,
      "completions/mean_length": 618.625,
      "completions/mean_terminated_length": 559.4188842773438,
      "completions/min_length": 90.0,
      "completions/min_terminated_length": 90.0,
      "epoch": 5.503937007874016,
      "grad_norm": 0.1279296875,
      "learning_rate": 1e-06,
      "loss": -0.0088,
      "num_tokens": 342402138.0,
      "reward": 0.5736607313156128,
      "reward_std": 0.2408929020166397,
      "rewards/verify_math_reward/mean": 0.5736607313156128,
      "rewards/verify_math_reward/std": 0.4948205351829529,
      "step": 589
    },
    {
      "clip_ratio/high_max": 0.0019028083952434827,
      "clip_ratio/high_mean": 0.0005872474728221277,
      "clip_ratio/low_mean": 0.00035215872594562825,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009394062053615926,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2184.0,
      "completions/mean_length": 544.7734375,
      "completions/mean_terminated_length": 508.74066162109375,
      "completions/min_length": 51.0,
      "completions/min_terminated_length": 51.0,
      "epoch": 5.5132691746864975,
      "grad_norm": 0.12890625,
      "learning_rate": 1e-06,
      "loss": 0.0233,
      "num_tokens": 342937111.0,
      "reward": 0.5703125,
      "reward_std": 0.2225145399570465,
      "rewards/verify_math_reward/mean": 0.5703125,
      "rewards/verify_math_reward/std": 0.49530795216560364,
      "step": 590
    },
    {
      "clip_ratio/high_max": 0.0014833757168162265,
      "clip_ratio/high_mean": 0.000418451493487737,
      "clip_ratio/low_mean": 0.00046329974998116086,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008817512316454668,
      "completions/clipped_ratio": 0.0234375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3757.0,
      "completions/mean_length": 645.1038208007812,
      "completions/mean_terminated_length": 562.2822875976562,
      "completions/min_length": 138.0,
      "completions/min_terminated_length": 138.0,
      "epoch": 5.522601341498979,
      "grad_norm": 0.13671875,
      "learning_rate": 1e-06,
      "loss": 0.0076,
      "num_tokens": 343525188.0,
      "reward": 0.527901828289032,
      "reward_std": 0.22229093313217163,
      "rewards/verify_math_reward/mean": 0.5279017686843872,
      "rewards/verify_math_reward/std": 0.49949970841407776,
      "step": 591
    },
    {
      "clip_ratio/high_max": 0.0015365254057542188,
      "clip_ratio/high_mean": 0.0004624969318456351,
      "clip_ratio/low_mean": 0.00023578075342811644,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006982776862969331,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2926.0,
      "completions/mean_length": 600.8873291015625,
      "completions/mean_terminated_length": 557.4451904296875,
      "completions/min_length": 109.0,
      "completions/min_terminated_length": 109.0,
      "epoch": 5.531933508311461,
      "grad_norm": 0.12255859375,
      "learning_rate": 1e-06,
      "loss": 0.013,
      "num_tokens": 344119759.0,
      "reward": 0.5390625,
      "reward_std": 0.20512789487838745,
      "rewards/verify_math_reward/mean": 0.5390625,
      "rewards/verify_math_reward/std": 0.4987502098083496,
      "step": 592
    },
    {
      "clip_ratio/high_max": 0.0016532313657080522,
      "clip_ratio/high_mean": 0.0005328238598849566,
      "clip_ratio/low_mean": 0.000368635616496249,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009014594506879803,
      "completions/clipped_ratio": 0.0078125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2424.0,
      "completions/mean_length": 565.2109375,
      "completions/mean_terminated_length": 537.4094848632812,
      "completions/min_length": 115.0,
      "completions/min_terminated_length": 115.0,
      "epoch": 5.541265675123943,
      "grad_norm": 0.1396484375,
      "learning_rate": 1e-06,
      "loss": 0.0116,
      "num_tokens": 344693836.0,
      "reward": 0.578125,
      "reward_std": 0.2285715937614441,
      "rewards/verify_math_reward/mean": 0.578125,
      "rewards/verify_math_reward/std": 0.4941346049308777,
      "step": 593
    },
    {
      "clip_ratio/high_max": 0.0016362896258215187,
      "clip_ratio/high_mean": 0.0004772021652570402,
      "clip_ratio/low_mean": 0.00041457257748334087,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008917747445593704,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3985.0,
      "completions/mean_length": 684.950927734375,
      "completions/mean_terminated_length": 607.0730590820312,
      "completions/min_length": 93.0,
      "completions/min_terminated_length": 93.0,
      "epoch": 5.550597841936424,
      "grad_norm": 0.1396484375,
      "learning_rate": 1e-06,
      "loss": 0.0089,
      "num_tokens": 345310208.0,
      "reward": 0.520089328289032,
      "reward_std": 0.2350693941116333,
      "rewards/verify_math_reward/mean": 0.5200892686843872,
      "rewards/verify_math_reward/std": 0.4998753070831299,
      "step": 594
    },
    {
      "clip_ratio/high_max": 0.0014269785442593275,
      "clip_ratio/high_mean": 0.00038235130114117055,
      "clip_ratio/low_mean": 0.0004296153347240761,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008119666417769622,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3296.0,
      "completions/mean_length": 641.1953125,
      "completions/mean_terminated_length": 574.3788452148438,
      "completions/min_length": 97.0,
      "completions/min_terminated_length": 97.0,
      "epoch": 5.559930008748906,
      "grad_norm": 0.1298828125,
      "learning_rate": 1e-06,
      "loss": -0.0071,
      "num_tokens": 345922127.0,
      "reward": 0.4665178656578064,
      "reward_std": 0.22887356579303741,
      "rewards/verify_math_reward/mean": 0.4665178656578064,
      "rewards/verify_math_reward/std": 0.49915632605552673,
      "step": 595
    },
    {
      "clip_ratio/high_max": 0.0016614030055279727,
      "clip_ratio/high_mean": 0.0005471608137668227,
      "clip_ratio/low_mean": 0.00035743593355164194,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000904596743566799,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3889.0,
      "completions/mean_length": 617.5,
      "completions/mean_terminated_length": 566.2876586914062,
      "completions/min_length": 129.0,
      "completions/min_terminated_length": 129.0,
      "epoch": 5.569262175561388,
      "grad_norm": 0.1279296875,
      "learning_rate": 1e-06,
      "loss": 0.0016,
      "num_tokens": 346497959.0,
      "reward": 0.535714328289032,
      "reward_std": 0.23427662253379822,
      "rewards/verify_math_reward/mean": 0.5357142686843872,
      "rewards/verify_math_reward/std": 0.4990014135837555,
      "step": 596
    },
    {
      "clip_ratio/high_max": 0.0013901961174269672,
      "clip_ratio/high_mean": 0.00041155310964313685,
      "clip_ratio/low_mean": 0.00027649343553548533,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006880465534777613,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3500.0,
      "completions/mean_length": 603.739990234375,
      "completions/mean_terminated_length": 544.2803955078125,
      "completions/min_length": 14.0,
      "completions/min_terminated_length": 14.0,
      "epoch": 5.57859434237387,
      "grad_norm": 0.12890625,
      "learning_rate": 1e-06,
      "loss": 0.0034,
      "num_tokens": 347055054.0,
      "reward": 0.5848214626312256,
      "reward_std": 0.17507019639015198,
      "rewards/verify_math_reward/mean": 0.5848214030265808,
      "rewards/verify_math_reward/std": 0.49302801489830017,
      "step": 597
    },
    {
      "clip_ratio/high_max": 0.0017493439572717762,
      "clip_ratio/high_mean": 0.0005472518530496018,
      "clip_ratio/low_mean": 0.00026612177282459015,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008133736073432374,
      "completions/clipped_ratio": 0.021205357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4075.0,
      "completions/mean_length": 640.8136596679688,
      "completions/mean_terminated_length": 565.9578247070312,
      "completions/min_length": 71.0,
      "completions/min_terminated_length": 71.0,
      "epoch": 5.587926509186351,
      "grad_norm": 0.12353515625,
      "learning_rate": 1e-06,
      "loss": -0.0045,
      "num_tokens": 347635911.0,
      "reward": 0.5178571939468384,
      "reward_std": 0.21083195507526398,
      "rewards/verify_math_reward/mean": 0.5178571343421936,
      "rewards/verify_math_reward/std": 0.4999600946903229,
      "step": 598
    },
    {
      "clip_ratio/high_max": 0.0014640272147516953,
      "clip_ratio/high_mean": 0.00047749230498084216,
      "clip_ratio/low_mean": 0.0003387007839137368,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008161930854839738,
      "completions/clipped_ratio": 0.0234375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3855.0,
      "completions/mean_length": 659.5424194335938,
      "completions/mean_terminated_length": 577.0674438476562,
      "completions/min_length": 144.0,
      "completions/min_terminated_length": 144.0,
      "epoch": 5.597258675998834,
      "grad_norm": 0.1318359375,
      "learning_rate": 1e-06,
      "loss": 0.0019,
      "num_tokens": 348232437.0,
      "reward": 0.4877232313156128,
      "reward_std": 0.2295454740524292,
      "rewards/verify_math_reward/mean": 0.4877232015132904,
      "rewards/verify_math_reward/std": 0.500128448009491,
      "step": 599
    },
    {
      "clip_ratio/high_max": 0.0017931208994923509,
      "clip_ratio/high_mean": 0.000473861554951327,
      "clip_ratio/low_mean": 0.000331737685655753,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008055992452682403,
      "completions/clipped_ratio": 0.0279017857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3956.0,
      "completions/mean_length": 675.8627319335938,
      "completions/mean_terminated_length": 577.6957397460938,
      "completions/min_length": 161.0,
      "completions/min_terminated_length": 161.0,
      "epoch": 5.606590842811316,
      "grad_norm": 0.130859375,
      "learning_rate": 1e-06,
      "loss": -0.0046,
      "num_tokens": 348822922.0,
      "reward": 0.5323660969734192,
      "reward_std": 0.21421900391578674,
      "rewards/verify_math_reward/mean": 0.5323660969734192,
      "rewards/verify_math_reward/std": 0.4992299973964691,
      "step": 600
    },
    {
      "clip_ratio/high_max": 0.0016256882372545078,
      "clip_ratio/high_mean": 0.000549583998918024,
      "clip_ratio/low_mean": 0.0004123904088828567,
      "clip_ratio/low_min": 1.0665528861864004e-05,
      "clip_ratio/region_mean": 0.0009619744059818913,
      "completions/clipped_ratio": 0.006696428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3994.0,
      "completions/mean_length": 584.552490234375,
      "completions/mean_terminated_length": 560.8797607421875,
      "completions/min_length": 122.0,
      "completions/min_terminated_length": 122.0,
      "epoch": 5.615923009623797,
      "grad_norm": 0.1279296875,
      "learning_rate": 1e-06,
      "loss": 0.0077,
      "num_tokens": 349410545.0,
      "reward": 0.5602678656578064,
      "reward_std": 0.24968752264976501,
      "rewards/verify_math_reward/mean": 0.5602678656578064,
      "rewards/verify_math_reward/std": 0.4966317415237427,
      "step": 601
    },
    {
      "clip_ratio/high_max": 0.0013063722926744958,
      "clip_ratio/high_mean": 0.0003865236153615115,
      "clip_ratio/low_mean": 0.00041439876008553256,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008009223861336068,
      "completions/clipped_ratio": 0.0078125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3812.0,
      "completions/mean_length": 579.1998291015625,
      "completions/mean_terminated_length": 551.5084838867188,
      "completions/min_length": 57.0,
      "completions/min_terminated_length": 57.0,
      "epoch": 5.625255176436279,
      "grad_norm": 0.140625,
      "learning_rate": 1e-06,
      "loss": 0.0129,
      "num_tokens": 349980436.0,
      "reward": 0.5625,
      "reward_std": 0.23258204758167267,
      "rewards/verify_math_reward/mean": 0.5625,
      "rewards/verify_math_reward/std": 0.49635544419288635,
      "step": 602
    },
    {
      "clip_ratio/high_max": 0.0016484307761857053,
      "clip_ratio/high_mean": 0.000552778579731239,
      "clip_ratio/low_mean": 0.00043712548176699784,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009899040687741945,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3108.0,
      "completions/mean_length": 615.982177734375,
      "completions/mean_terminated_length": 564.7474365234375,
      "completions/min_length": 125.0,
      "completions/min_terminated_length": 125.0,
      "epoch": 5.634587343248761,
      "grad_norm": 0.142578125,
      "learning_rate": 1e-06,
      "loss": -0.003,
      "num_tokens": 350573884.0,
      "reward": 0.5703125,
      "reward_std": 0.2594209611415863,
      "rewards/verify_math_reward/mean": 0.5703125,
      "rewards/verify_math_reward/std": 0.49530795216560364,
      "step": 603
    },
    {
      "clip_ratio/high_max": 0.0018310111427126685,
      "clip_ratio/high_mean": 0.0005517259573935007,
      "clip_ratio/low_mean": 0.00037904168902969104,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009307676305070345,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4056.0,
      "completions/mean_length": 637.4955444335938,
      "completions/mean_terminated_length": 598.4605102539062,
      "completions/min_length": 117.0,
      "completions/min_terminated_length": 117.0,
      "epoch": 5.6439195100612425,
      "grad_norm": 0.12255859375,
      "learning_rate": 1e-06,
      "loss": -0.0067,
      "num_tokens": 351190560.0,
      "reward": 0.5959821939468384,
      "reward_std": 0.22905904054641724,
      "rewards/verify_math_reward/mean": 0.5959821343421936,
      "rewards/verify_math_reward/std": 0.490975022315979,
      "step": 604
    },
    {
      "clip_ratio/high_max": 0.001524799096841889,
      "clip_ratio/high_mean": 0.00046769564380610973,
      "clip_ratio/low_mean": 0.00047231378539436264,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009400094295415329,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2504.0,
      "completions/mean_length": 584.9642944335938,
      "completions/mean_terminated_length": 545.3363647460938,
      "completions/min_length": 113.0,
      "completions/min_terminated_length": 113.0,
      "epoch": 5.653251676873724,
      "grad_norm": 0.1318359375,
      "learning_rate": 1e-06,
      "loss": 0.0021,
      "num_tokens": 351763216.0,
      "reward": 0.5691964626312256,
      "reward_std": 0.22330942749977112,
      "rewards/verify_math_reward/mean": 0.5691964030265808,
      "rewards/verify_math_reward/std": 0.4954652786254883,
      "step": 605
    },
    {
      "clip_ratio/high_max": 0.0016735976951167686,
      "clip_ratio/high_mean": 0.0004567844382563635,
      "clip_ratio/low_mean": 0.0003422894055802317,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007990738413354848,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3886.0,
      "completions/mean_length": 585.1395263671875,
      "completions/mean_terminated_length": 545.5135498046875,
      "completions/min_length": 103.0,
      "completions/min_terminated_length": 103.0,
      "epoch": 5.662583843686206,
      "grad_norm": 0.134765625,
      "learning_rate": 1e-06,
      "loss": 0.006,
      "num_tokens": 352333725.0,
      "reward": 0.559151828289032,
      "reward_std": 0.23142297565937042,
      "rewards/verify_math_reward/mean": 0.5591517686843872,
      "rewards/verify_math_reward/std": 0.496766060590744,
      "step": 606
    },
    {
      "clip_ratio/high_max": 0.0016542480852876906,
      "clip_ratio/high_mean": 0.0005613918172002741,
      "clip_ratio/low_mean": 0.00032017077523960324,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008815625860734144,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3874.0,
      "completions/mean_length": 611.34375,
      "completions/mean_terminated_length": 560.040771484375,
      "completions/min_length": 42.0,
      "completions/min_terminated_length": 42.0,
      "epoch": 5.671916010498688,
      "grad_norm": 0.125,
      "learning_rate": 1e-06,
      "loss": 0.0168,
      "num_tokens": 352915033.0,
      "reward": 0.6116071939468384,
      "reward_std": 0.20793946087360382,
      "rewards/verify_math_reward/mean": 0.6116071343421936,
      "rewards/verify_math_reward/std": 0.4876568913459778,
      "step": 607
    },
    {
      "clip_ratio/high_max": 0.001837543752117199,
      "clip_ratio/high_mean": 0.0005368785557493538,
      "clip_ratio/low_mean": 0.00026023138480013586,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007971099412316107,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3735.0,
      "completions/mean_length": 548.4944458007812,
      "completions/mean_terminated_length": 512.4993896484375,
      "completions/min_length": 92.0,
      "completions/min_terminated_length": 92.0,
      "epoch": 5.681248177311169,
      "grad_norm": 0.1298828125,
      "learning_rate": 1e-06,
      "loss": 0.004,
      "num_tokens": 353463684.0,
      "reward": 0.6026785969734192,
      "reward_std": 0.18971771001815796,
      "rewards/verify_math_reward/mean": 0.6026785969734192,
      "rewards/verify_math_reward/std": 0.48961687088012695,
      "step": 608
    },
    {
      "clip_ratio/high_max": 0.0018288902429048903,
      "clip_ratio/high_mean": 0.0005793526834168006,
      "clip_ratio/low_mean": 0.0003307258682525571,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009100785482587526,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3042.0,
      "completions/mean_length": 633.583740234375,
      "completions/mean_terminated_length": 578.6246948242188,
      "completions/min_length": 111.0,
      "completions/min_terminated_length": 111.0,
      "epoch": 5.690580344123651,
      "grad_norm": 0.126953125,
      "learning_rate": 1e-06,
      "loss": -0.0083,
      "num_tokens": 354063415.0,
      "reward": 0.5256696939468384,
      "reward_std": 0.22857119143009186,
      "rewards/verify_math_reward/mean": 0.5256696343421936,
      "rewards/verify_math_reward/std": 0.4996195137500763,
      "step": 609
    },
    {
      "clip_ratio/high_max": 0.0015159009162744042,
      "clip_ratio/high_mean": 0.00041560329555068165,
      "clip_ratio/low_mean": 0.00027500016324211174,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006906034586791066,
      "completions/clipped_ratio": 0.006696428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4034.0,
      "completions/mean_length": 538.8772583007812,
      "completions/mean_terminated_length": 514.8966064453125,
      "completions/min_length": 143.0,
      "completions/min_terminated_length": 143.0,
      "epoch": 5.699912510936133,
      "grad_norm": 0.12451171875,
      "learning_rate": 1e-06,
      "loss": 0.0176,
      "num_tokens": 354613161.0,
      "reward": 0.5970982313156128,
      "reward_std": 0.17784711718559265,
      "rewards/verify_math_reward/mean": 0.5970982313156128,
      "rewards/verify_math_reward/std": 0.49075525999069214,
      "step": 610
    },
    {
      "clip_ratio/high_max": 0.001717625229503028,
      "clip_ratio/high_mean": 0.0004708107526312233,
      "clip_ratio/low_mean": 0.000301633516187394,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007724442730250303,
      "completions/clipped_ratio": 0.0078125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2423.0,
      "completions/mean_length": 582.0267944335938,
      "completions/mean_terminated_length": 554.3577270507812,
      "completions/min_length": 27.0,
      "completions/min_terminated_length": 27.0,
      "epoch": 5.7092446777486145,
      "grad_norm": 0.126953125,
      "learning_rate": 1e-06,
      "loss": 0.0088,
      "num_tokens": 355201385.0,
      "reward": 0.5446428656578064,
      "reward_std": 0.21463268995285034,
      "rewards/verify_math_reward/mean": 0.5446428656578064,
      "rewards/verify_math_reward/std": 0.49828118085861206,
      "step": 611
    },
    {
      "clip_ratio/high_max": 0.0018678093802009244,
      "clip_ratio/high_mean": 0.000609123405297396,
      "clip_ratio/low_mean": 0.00041631638282524364,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010254397884637,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2777.0,
      "completions/mean_length": 614.341552734375,
      "completions/mean_terminated_length": 567.0792236328125,
      "completions/min_length": 2.0,
      "completions/min_terminated_length": 2.0,
      "epoch": 5.718576844561096,
      "grad_norm": 0.1396484375,
      "learning_rate": 1e-06,
      "loss": 0.0098,
      "num_tokens": 355802331.0,
      "reward": 0.5725446939468384,
      "reward_std": 0.24446289241313934,
      "rewards/verify_math_reward/mean": 0.5725446343421936,
      "rewards/verify_math_reward/std": 0.49498558044433594,
      "step": 612
    },
    {
      "clip_ratio/high_max": 0.0015789622739248443,
      "clip_ratio/high_mean": 0.0004802795135674387,
      "clip_ratio/low_mean": 0.0002489008927568648,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007291804026863247,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2506.0,
      "completions/mean_length": 614.5592041015625,
      "completions/mean_terminated_length": 535.0741577148438,
      "completions/min_length": 124.0,
      "completions/min_terminated_length": 124.0,
      "epoch": 5.727909011373578,
      "grad_norm": 0.1279296875,
      "learning_rate": 1e-06,
      "loss": 0.0021,
      "num_tokens": 356360712.0,
      "reward": 0.551339328289032,
      "reward_std": 0.18490806221961975,
      "rewards/verify_math_reward/mean": 0.5513392686843872,
      "rewards/verify_math_reward/std": 0.4976350665092468,
      "step": 613
    },
    {
      "clip_ratio/high_max": 0.0014210480057954555,
      "clip_ratio/high_mean": 0.00043928807838256034,
      "clip_ratio/low_mean": 0.00035182789724785835,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007911159791547107,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4019.0,
      "completions/mean_length": 598.2232666015625,
      "completions/mean_terminated_length": 542.7029418945312,
      "completions/min_length": 74.0,
      "completions/min_terminated_length": 74.0,
      "epoch": 5.73724117818606,
      "grad_norm": 0.13671875,
      "learning_rate": 1e-06,
      "loss": -0.0115,
      "num_tokens": 356933112.0,
      "reward": 0.5613839626312256,
      "reward_std": 0.2124505490064621,
      "rewards/verify_math_reward/mean": 0.5613839030265808,
      "rewards/verify_math_reward/std": 0.496494859457016,
      "step": 614
    },
    {
      "clip_ratio/high_max": 0.001356075529656664,
      "clip_ratio/high_mean": 0.0004500520685724041,
      "clip_ratio/low_mean": 0.00033761252723252255,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007876646013755817,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2545.0,
      "completions/mean_length": 604.6920166015625,
      "completions/mean_terminated_length": 557.2986450195312,
      "completions/min_length": 59.0,
      "completions/min_terminated_length": 59.0,
      "epoch": 5.746573344998541,
      "grad_norm": 0.140625,
      "learning_rate": 1e-06,
      "loss": 0.0154,
      "num_tokens": 357514260.0,
      "reward": 0.5133928656578064,
      "reward_std": 0.21937061846256256,
      "rewards/verify_math_reward/mean": 0.5133928656578064,
      "rewards/verify_math_reward/std": 0.500099778175354,
      "step": 615
    },
    {
      "clip_ratio/high_max": 0.0020445919399207924,
      "clip_ratio/high_mean": 0.0006977882353567111,
      "clip_ratio/low_mean": 0.00032132818421359843,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010191164237767225,
      "completions/clipped_ratio": 0.0200892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3926.0,
      "completions/mean_length": 628.6395263671875,
      "completions/mean_terminated_length": 557.5546875,
      "completions/min_length": 24.0,
      "completions/min_terminated_length": 24.0,
      "epoch": 5.755905511811024,
      "grad_norm": 0.1484375,
      "learning_rate": 1e-06,
      "loss": 0.0185,
      "num_tokens": 358096625.0,
      "reward": 0.5725446939468384,
      "reward_std": 0.253483384847641,
      "rewards/verify_math_reward/mean": 0.5725446343421936,
      "rewards/verify_math_reward/std": 0.49498558044433594,
      "step": 616
    },
    {
      "clip_ratio/high_max": 0.0017153845365101006,
      "clip_ratio/high_mean": 0.0005488036445058242,
      "clip_ratio/low_mean": 0.0003400926821086614,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008888963166100439,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3174.0,
      "completions/mean_length": 594.0435791015625,
      "completions/mean_terminated_length": 542.48583984375,
      "completions/min_length": 113.0,
      "completions/min_terminated_length": 113.0,
      "epoch": 5.765237678623506,
      "grad_norm": 0.1337890625,
      "learning_rate": 1e-06,
      "loss": -0.0044,
      "num_tokens": 358661976.0,
      "reward": 0.5580357313156128,
      "reward_std": 0.2055736631155014,
      "rewards/verify_math_reward/mean": 0.5580357313156128,
      "rewards/verify_math_reward/std": 0.49689778685569763,
      "step": 617
    },
    {
      "clip_ratio/high_max": 0.0015714221190137323,
      "clip_ratio/high_mean": 0.00046793652518317685,
      "clip_ratio/low_mean": 0.0002259613844444175,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006938979076949181,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4054.0,
      "completions/mean_length": 653.9152221679688,
      "completions/mean_terminated_length": 611.1322021484375,
      "completions/min_length": 157.0,
      "completions/min_terminated_length": 157.0,
      "epoch": 5.7745698454359875,
      "grad_norm": 0.1123046875,
      "learning_rate": 1e-06,
      "loss": 0.0014,
      "num_tokens": 359287116.0,
      "reward": 0.5915178656578064,
      "reward_std": 0.2069612443447113,
      "rewards/verify_math_reward/mean": 0.5915178656578064,
      "rewards/verify_math_reward/std": 0.49182769656181335,
      "step": 618
    },
    {
      "clip_ratio/high_max": 0.001240374596818583,
      "clip_ratio/high_mean": 0.00037524275262512674,
      "clip_ratio/low_mean": 0.00035552919950987416,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007307719456548512,
      "completions/clipped_ratio": 0.021205357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3951.0,
      "completions/mean_length": 627.8359375,
      "completions/mean_terminated_length": 552.698974609375,
      "completions/min_length": 158.0,
      "completions/min_terminated_length": 158.0,
      "epoch": 5.783902012248469,
      "grad_norm": 0.1279296875,
      "learning_rate": 1e-06,
      "loss": 0.002,
      "num_tokens": 359861337.0,
      "reward": 0.5491071939468384,
      "reward_std": 0.19418711960315704,
      "rewards/verify_math_reward/mean": 0.5491071343421936,
      "rewards/verify_math_reward/std": 0.49786055088043213,
      "step": 619
    },
    {
      "clip_ratio/high_max": 0.0017908200443343958,
      "clip_ratio/high_mean": 0.0005974826347028284,
      "clip_ratio/low_mean": 0.0002967977391108434,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008942803697209456,
      "completions/clipped_ratio": 0.0078125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2140.0,
      "completions/mean_length": 549.5892944335938,
      "completions/mean_terminated_length": 521.664794921875,
      "completions/min_length": 107.0,
      "completions/min_terminated_length": 107.0,
      "epoch": 5.793234179060951,
      "grad_norm": 0.140625,
      "learning_rate": 1e-06,
      "loss": 0.0022,
      "num_tokens": 360421977.0,
      "reward": 0.5479910969734192,
      "reward_std": 0.25998368859291077,
      "rewards/verify_math_reward/mean": 0.5479910969734192,
      "rewards/verify_math_reward/std": 0.49796950817108154,
      "step": 620
    },
    {
      "clip_ratio/high_max": 0.0013891612707084278,
      "clip_ratio/high_mean": 0.00039783442730367824,
      "clip_ratio/low_mean": 0.00032748142780292255,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007253158555613481,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3945.0,
      "completions/mean_length": 599.9017944335938,
      "completions/mean_terminated_length": 548.4303588867188,
      "completions/min_length": 105.0,
      "completions/min_terminated_length": 105.0,
      "epoch": 5.802566345873433,
      "grad_norm": 0.1220703125,
      "learning_rate": 1e-06,
      "loss": 0.0007,
      "num_tokens": 360993321.0,
      "reward": 0.5524553656578064,
      "reward_std": 0.19531185925006866,
      "rewards/verify_math_reward/mean": 0.5524553656578064,
      "rewards/verify_math_reward/std": 0.49751853942871094,
      "step": 621
    },
    {
      "clip_ratio/high_max": 0.0020891256326649454,
      "clip_ratio/high_mean": 0.0006852979413451976,
      "clip_ratio/low_mean": 0.00036986430779961665,
      "clip_ratio/low_min": 9.530344868835527e-06,
      "clip_ratio/region_mean": 0.001055162253578601,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4015.0,
      "completions/mean_length": 619.2455444335938,
      "completions/mean_terminated_length": 568.0588989257812,
      "completions/min_length": 137.0,
      "completions/min_terminated_length": 137.0,
      "epoch": 5.811898512685914,
      "grad_norm": 0.1416015625,
      "learning_rate": 1e-06,
      "loss": 0.0055,
      "num_tokens": 361579125.0,
      "reward": 0.578125,
      "reward_std": 0.24014613032341003,
      "rewards/verify_math_reward/mean": 0.578125,
      "rewards/verify_math_reward/std": 0.4941346049308777,
      "step": 622
    },
    {
      "clip_ratio/high_max": 0.00170293934570509,
      "clip_ratio/high_mean": 0.0005552408042603929,
      "clip_ratio/low_mean": 0.000346487800925388,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009017286201924435,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3962.0,
      "completions/mean_length": 642.864990234375,
      "completions/mean_terminated_length": 588.0532836914062,
      "completions/min_length": 108.0,
      "completions/min_terminated_length": 108.0,
      "epoch": 5.821230679498396,
      "grad_norm": 0.134765625,
      "learning_rate": 1e-06,
      "loss": 0.0115,
      "num_tokens": 362198188.0,
      "reward": 0.535714328289032,
      "reward_std": 0.22628137469291687,
      "rewards/verify_math_reward/mean": 0.5357142686843872,
      "rewards/verify_math_reward/std": 0.4990014135837555,
      "step": 623
    },
    {
      "clip_ratio/high_max": 0.001384750374199939,
      "clip_ratio/high_mean": 0.00036116401918206975,
      "clip_ratio/low_mean": 0.00028290550721976615,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006440695348146619,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3538.0,
      "completions/mean_length": 637.4308471679688,
      "completions/mean_terminated_length": 582.5328979492188,
      "completions/min_length": 79.0,
      "completions/min_terminated_length": 79.0,
      "epoch": 5.830562846310878,
      "grad_norm": 0.11376953125,
      "learning_rate": 1e-06,
      "loss": 0.0018,
      "num_tokens": 362813758.0,
      "reward": 0.4877232313156128,
      "reward_std": 0.17318309843540192,
      "rewards/verify_math_reward/mean": 0.4877232015132904,
      "rewards/verify_math_reward/std": 0.5001283884048462,
      "step": 624
    },
    {
      "clip_ratio/high_max": 0.002005233105592197,
      "clip_ratio/high_mean": 0.0005235718522271782,
      "clip_ratio/low_mean": 0.00034254040178893774,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008661122628836893,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3183.0,
      "completions/mean_length": 582.4855346679688,
      "completions/mean_terminated_length": 538.814697265625,
      "completions/min_length": 39.0,
      "completions/min_terminated_length": 39.0,
      "epoch": 5.83989501312336,
      "grad_norm": 0.1259765625,
      "learning_rate": 1e-06,
      "loss": 0.0034,
      "num_tokens": 363370241.0,
      "reward": 0.6127232313156128,
      "reward_std": 0.21958746016025543,
      "rewards/verify_math_reward/mean": 0.6127232313156128,
      "rewards/verify_math_reward/std": 0.4873998463153839,
      "step": 625
    },
    {
      "clip_ratio/high_max": 0.001632858402444981,
      "clip_ratio/high_mean": 0.00048267613510688534,
      "clip_ratio/low_mean": 0.00028833066107836203,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007710067802690901,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3588.0,
      "completions/mean_length": 576.6707763671875,
      "completions/mean_terminated_length": 524.8572998046875,
      "completions/min_length": 80.0,
      "completions/min_terminated_length": 80.0,
      "epoch": 5.849227179935841,
      "grad_norm": 0.1259765625,
      "learning_rate": 1e-06,
      "loss": -0.0081,
      "num_tokens": 363913666.0,
      "reward": 0.5647321939468384,
      "reward_std": 0.1759282499551773,
      "rewards/verify_math_reward/mean": 0.5647321343421936,
      "rewards/verify_math_reward/std": 0.49606895446777344,
      "step": 626
    },
    {
      "clip_ratio/high_max": 0.0016306336801790167,
      "clip_ratio/high_mean": 0.0004919825648812548,
      "clip_ratio/low_mean": 0.00039717310869491484,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008891556699381908,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3771.0,
      "completions/mean_length": 584.0546875,
      "completions/mean_terminated_length": 544.41650390625,
      "completions/min_length": 118.0,
      "completions/min_terminated_length": 118.0,
      "epoch": 5.858559346748323,
      "grad_norm": 0.1298828125,
      "learning_rate": 1e-06,
      "loss": 0.0068,
      "num_tokens": 364490979.0,
      "reward": 0.5691964626312256,
      "reward_std": 0.2117016762495041,
      "rewards/verify_math_reward/mean": 0.5691964030265808,
      "rewards/verify_math_reward/std": 0.4954652488231659,
      "step": 627
    },
    {
      "clip_ratio/high_max": 0.0017665231662249425,
      "clip_ratio/high_mean": 0.0005111080458846118,
      "clip_ratio/low_mean": 0.00038048836950110854,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008915963871913846,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3010.0,
      "completions/mean_length": 620.357177734375,
      "completions/mean_terminated_length": 565.188232421875,
      "completions/min_length": 97.0,
      "completions/min_terminated_length": 97.0,
      "epoch": 5.867891513560805,
      "grad_norm": 0.134765625,
      "learning_rate": 1e-06,
      "loss": 0.0034,
      "num_tokens": 365069779.0,
      "reward": 0.5401785969734192,
      "reward_std": 0.22349488735198975,
      "rewards/verify_math_reward/mean": 0.5401785969734192,
      "rewards/verify_math_reward/std": 0.49866142868995667,
      "step": 628
    },
    {
      "clip_ratio/high_max": 0.0014437277868637466,
      "clip_ratio/high_mean": 0.0004319528011365037,
      "clip_ratio/low_mean": 0.00029387151062110206,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007258243049363955,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3923.0,
      "completions/mean_length": 607.8995971679688,
      "completions/mean_terminated_length": 556.5458374023438,
      "completions/min_length": 64.0,
      "completions/min_terminated_length": 64.0,
      "epoch": 5.8772236803732865,
      "grad_norm": 0.126953125,
      "learning_rate": 1e-06,
      "loss": 0.0023,
      "num_tokens": 365659329.0,
      "reward": 0.5256696939468384,
      "reward_std": 0.22446875274181366,
      "rewards/verify_math_reward/mean": 0.5256696343421936,
      "rewards/verify_math_reward/std": 0.4996195435523987,
      "step": 629
    },
    {
      "clip_ratio/high_max": 0.0016678804149705684,
      "clip_ratio/high_mean": 0.0005439781342602146,
      "clip_ratio/low_mean": 0.0003042860504365308,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008482641760565457,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2244.0,
      "completions/mean_length": 582.1317138671875,
      "completions/mean_terminated_length": 538.4564819335938,
      "completions/min_length": 4.0,
      "completions/min_terminated_length": 4.0,
      "epoch": 5.886555847185768,
      "grad_norm": 0.1328125,
      "learning_rate": 1e-06,
      "loss": 0.0001,
      "num_tokens": 366227895.0,
      "reward": 0.5959821939468384,
      "reward_std": 0.23059743642807007,
      "rewards/verify_math_reward/mean": 0.5959821343421936,
      "rewards/verify_math_reward/std": 0.490975022315979,
      "step": 630
    },
    {
      "clip_ratio/high_max": 0.00126131130491558,
      "clip_ratio/high_mean": 0.0003636357573668647,
      "clip_ratio/low_mean": 0.00033631018482083164,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006999459496910276,
      "completions/clipped_ratio": 0.0078125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3798.0,
      "completions/mean_length": 559.3917846679688,
      "completions/mean_terminated_length": 531.54443359375,
      "completions/min_length": 82.0,
      "completions/min_terminated_length": 82.0,
      "epoch": 5.89588801399825,
      "grad_norm": 0.11962890625,
      "learning_rate": 1e-06,
      "loss": 0.0115,
      "num_tokens": 366788126.0,
      "reward": 0.5837053656578064,
      "reward_std": 0.19772110879421234,
      "rewards/verify_math_reward/mean": 0.5837053656578064,
      "rewards/verify_math_reward/std": 0.49321895837783813,
      "step": 631
    },
    {
      "clip_ratio/high_max": 0.0018229607449029572,
      "clip_ratio/high_mean": 0.0005960020198472193,
      "clip_ratio/low_mean": 0.0004161727232485646,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010121747327502817,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3837.0,
      "completions/mean_length": 617.622802734375,
      "completions/mean_terminated_length": 578.3634643554688,
      "completions/min_length": 142.0,
      "completions/min_terminated_length": 142.0,
      "epoch": 5.905220180810732,
      "grad_norm": 0.13671875,
      "learning_rate": 1e-06,
      "loss": -0.0063,
      "num_tokens": 367397508.0,
      "reward": 0.4955357313156128,
      "reward_std": 0.2520148754119873,
      "rewards/verify_math_reward/mean": 0.4955357015132904,
      "rewards/verify_math_reward/std": 0.500259280204773,
      "step": 632
    },
    {
      "clip_ratio/high_max": 0.0017330375358142192,
      "clip_ratio/high_mean": 0.0004954539767823007,
      "clip_ratio/low_mean": 0.00033041706001313287,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008258710395239177,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3841.0,
      "completions/mean_length": 583.7489013671875,
      "completions/mean_terminated_length": 544.1072387695312,
      "completions/min_length": 142.0,
      "completions/min_terminated_length": 142.0,
      "epoch": 5.914552347623214,
      "grad_norm": 0.12451171875,
      "learning_rate": 1e-06,
      "loss": 0.0027,
      "num_tokens": 367960595.0,
      "reward": 0.5703125,
      "reward_std": 0.1996813714504242,
      "rewards/verify_math_reward/mean": 0.5703125,
      "rewards/verify_math_reward/std": 0.49530795216560364,
      "step": 633
    },
    {
      "clip_ratio/high_max": 0.0013183061364543391,
      "clip_ratio/high_mean": 0.0004277631906006718,
      "clip_ratio/low_mean": 0.0003647073425554481,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000792470529631828,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4003.0,
      "completions/mean_length": 623.9498291015625,
      "completions/mean_terminated_length": 560.8215942382812,
      "completions/min_length": 130.0,
      "completions/min_terminated_length": 130.0,
      "epoch": 5.923884514435695,
      "grad_norm": 0.1328125,
      "learning_rate": 1e-06,
      "loss": -0.0008,
      "num_tokens": 368546358.0,
      "reward": 0.5703125,
      "reward_std": 0.23559045791625977,
      "rewards/verify_math_reward/mean": 0.5703125,
      "rewards/verify_math_reward/std": 0.49530795216560364,
      "step": 634
    },
    {
      "clip_ratio/high_max": 0.0013197403495723847,
      "clip_ratio/high_mean": 0.0004318313540352392,
      "clip_ratio/low_mean": 0.0004125800021483883,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.00084441136004898,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3013.0,
      "completions/mean_length": 639.3471069335938,
      "completions/mean_terminated_length": 572.494873046875,
      "completions/min_length": 107.0,
      "completions/min_terminated_length": 107.0,
      "epoch": 5.933216681248178,
      "grad_norm": 0.130859375,
      "learning_rate": 1e-06,
      "loss": 0.0098,
      "num_tokens": 369146981.0,
      "reward": 0.5111607313156128,
      "reward_std": 0.23041337728500366,
      "rewards/verify_math_reward/mean": 0.5111607313156128,
      "rewards/verify_math_reward/std": 0.5001546144485474,
      "step": 635
    },
    {
      "clip_ratio/high_max": 0.0015313922449422535,
      "clip_ratio/high_mean": 0.00042677192163864675,
      "clip_ratio/low_mean": 0.0003478974579138594,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000774669376824022,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3733.0,
      "completions/mean_length": 613.5670166015625,
      "completions/mean_terminated_length": 574.2618408203125,
      "completions/min_length": 32.0,
      "completions/min_terminated_length": 32.0,
      "epoch": 5.942548848060659,
      "grad_norm": 0.119140625,
      "learning_rate": 1e-06,
      "loss": 0.0082,
      "num_tokens": 369742897.0,
      "reward": 0.5569196939468384,
      "reward_std": 0.19877348840236664,
      "rewards/verify_math_reward/mean": 0.5569196343421936,
      "rewards/verify_math_reward/std": 0.49702703952789307,
      "step": 636
    },
    {
      "clip_ratio/high_max": 0.001322434779467585,
      "clip_ratio/high_mean": 0.00038604866460900666,
      "clip_ratio/low_mean": 0.00029808235240125214,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006841310159870773,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3976.0,
      "completions/mean_length": 601.7310791015625,
      "completions/mean_terminated_length": 554.2975463867188,
      "completions/min_length": 91.0,
      "completions/min_terminated_length": 91.0,
      "epoch": 5.951881014873141,
      "grad_norm": 0.1201171875,
      "learning_rate": 1e-06,
      "loss": 0.0038,
      "num_tokens": 370328624.0,
      "reward": 0.520089328289032,
      "reward_std": 0.1770893931388855,
      "rewards/verify_math_reward/mean": 0.5200892686843872,
      "rewards/verify_math_reward/std": 0.4998753070831299,
      "step": 637
    },
    {
      "clip_ratio/high_max": 0.0017185953838634305,
      "clip_ratio/high_mean": 0.0004891297135145578,
      "clip_ratio/low_mean": 0.0003073795163572868,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007965092245285632,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3333.0,
      "completions/mean_length": 686.513427734375,
      "completions/mean_terminated_length": 608.6712036132812,
      "completions/min_length": 122.0,
      "completions/min_terminated_length": 122.0,
      "epoch": 5.961213181685623,
      "grad_norm": 0.119140625,
      "learning_rate": 1e-06,
      "loss": 0.0022,
      "num_tokens": 370957556.0,
      "reward": 0.5267857313156128,
      "reward_std": 0.2003137171268463,
      "rewards/verify_math_reward/mean": 0.5267857313156128,
      "rewards/verify_math_reward/std": 0.4995608627796173,
      "step": 638
    },
    {
      "clip_ratio/high_max": 0.001580068470502738,
      "clip_ratio/high_mean": 0.0004835813867885008,
      "clip_ratio/low_mean": 0.00032849010995050776,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008120714919641614,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3756.0,
      "completions/mean_length": 575.1138916015625,
      "completions/mean_terminated_length": 535.374755859375,
      "completions/min_length": 114.0,
      "completions/min_terminated_length": 114.0,
      "epoch": 5.970545348498105,
      "grad_norm": 0.1337890625,
      "learning_rate": 1e-06,
      "loss": 0.0186,
      "num_tokens": 371523114.0,
      "reward": 0.546875,
      "reward_std": 0.20847007632255554,
      "rewards/verify_math_reward/mean": 0.546875,
      "rewards/verify_math_reward/std": 0.4980759024620056,
      "step": 639
    },
    {
      "clip_ratio/high_max": 0.0014115801695879782,
      "clip_ratio/high_mean": 0.0003921603085927927,
      "clip_ratio/low_mean": 0.00027514910800618964,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006673094191000928,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2793.0,
      "completions/mean_length": 570.0580444335938,
      "completions/mean_terminated_length": 518.147216796875,
      "completions/min_length": 59.0,
      "completions/min_terminated_length": 59.0,
      "epoch": 5.979877515310586,
      "grad_norm": 0.1181640625,
      "learning_rate": 1e-06,
      "loss": -0.0003,
      "num_tokens": 372060878.0,
      "reward": 0.5948660969734192,
      "reward_std": 0.14884108304977417,
      "rewards/verify_math_reward/mean": 0.5948660969734192,
      "rewards/verify_math_reward/std": 0.49119213223457336,
      "step": 640
    },
    {
      "clip_ratio/high_max": 0.001312239854996733,
      "clip_ratio/high_mean": 0.0003718040077274054,
      "clip_ratio/low_mean": 0.0003047645578817537,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006765685648133513,
      "completions/clipped_ratio": 0.0234375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3832.0,
      "completions/mean_length": 659.7600708007812,
      "completions/mean_terminated_length": 577.290283203125,
      "completions/min_length": 144.0,
      "completions/min_terminated_length": 144.0,
      "epoch": 5.989209682123068,
      "grad_norm": 0.1298828125,
      "learning_rate": 1e-06,
      "loss": 0.0055,
      "num_tokens": 372665975.0,
      "reward": 0.4888392984867096,
      "reward_std": 0.18994270265102386,
      "rewards/verify_math_reward/mean": 0.4888392984867096,
      "rewards/verify_math_reward/std": 0.5001546144485474,
      "step": 641
    },
    {
      "clip_ratio/high_max": 0.0014908608982295846,
      "clip_ratio/high_mean": 0.0004632329918194955,
      "clip_ratio/low_mean": 0.0003131747744191671,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000776407773628307,
      "completions/clipped_ratio": 0.014204545454545414,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2240.0,
      "completions/mean_length": 628.64208984375,
      "completions/mean_terminated_length": 578.6801147460938,
      "completions/min_length": 131.0,
      "completions/min_terminated_length": 131.0,
      "epoch": 5.99854184893555,
      "grad_norm": 0.12060546875,
      "learning_rate": 1e-06,
      "loss": -0.0045,
      "num_tokens": 373226323.0,
      "reward": 0.5881696939468384,
      "reward_std": 0.17995189130306244,
      "rewards/verify_math_reward/mean": 0.5881696343421936,
      "rewards/verify_math_reward/std": 0.4924396276473999,
      "step": 642
    },
    {
      "clip_ratio/high_max": 0.00173607175838697,
      "clip_ratio/high_mean": 0.0005547704724904179,
      "clip_ratio/low_mean": 0.00038455201729448163,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009393225045641884,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2927.0,
      "completions/mean_length": 620.4564819335938,
      "completions/mean_terminated_length": 573.2771606445312,
      "completions/min_length": 36.0,
      "completions/min_terminated_length": 36.0,
      "epoch": 6.009332166812482,
      "grad_norm": 0.1279296875,
      "learning_rate": 1e-06,
      "loss": 0.0021,
      "num_tokens": 373838044.0,
      "reward": 0.5424107313156128,
      "reward_std": 0.23357053101062775,
      "rewards/verify_math_reward/mean": 0.5424107313156128,
      "rewards/verify_math_reward/std": 0.4984763562679291,
      "step": 643
    },
    {
      "clip_ratio/high_max": 0.001706956718408037,
      "clip_ratio/high_mean": 0.000544264402378758,
      "clip_ratio/low_mean": 0.0003396990161945723,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008839634210744407,
      "completions/clipped_ratio": 0.0078125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2822.0,
      "completions/mean_length": 547.1674194335938,
      "completions/mean_terminated_length": 519.223876953125,
      "completions/min_length": 127.0,
      "completions/min_terminated_length": 127.0,
      "epoch": 6.0186643336249634,
      "grad_norm": 0.1357421875,
      "learning_rate": 1e-06,
      "loss": 0.007,
      "num_tokens": 374377250.0,
      "reward": 0.6104910969734192,
      "reward_std": 0.22146859765052795,
      "rewards/verify_math_reward/mean": 0.6104910969734192,
      "rewards/verify_math_reward/std": 0.48791125416755676,
      "step": 644
    },
    {
      "clip_ratio/high_max": 0.002222527713456657,
      "clip_ratio/high_mean": 0.0006332240118354093,
      "clip_ratio/low_mean": 0.000419498909423055,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010527229205763433,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2226.0,
      "completions/mean_length": 612.2745971679688,
      "completions/mean_terminated_length": 552.9603271484375,
      "completions/min_length": 125.0,
      "completions/min_terminated_length": 125.0,
      "epoch": 6.027996500437445,
      "grad_norm": 0.1435546875,
      "learning_rate": 1e-06,
      "loss": 0.0045,
      "num_tokens": 374955936.0,
      "reward": 0.5412946939468384,
      "reward_std": 0.2486380934715271,
      "rewards/verify_math_reward/mean": 0.5412946343421936,
      "rewards/verify_math_reward/std": 0.49857014417648315,
      "step": 645
    },
    {
      "clip_ratio/high_max": 0.0014041293006812339,
      "clip_ratio/high_mean": 0.000395850021050137,
      "clip_ratio/low_mean": 0.0002981900096301615,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006940400326129748,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3806.0,
      "completions/mean_length": 620.0714721679688,
      "completions/mean_terminated_length": 572.8869018554688,
      "completions/min_length": 166.0,
      "completions/min_terminated_length": 166.0,
      "epoch": 6.037328667249927,
      "grad_norm": 0.11376953125,
      "learning_rate": 1e-06,
      "loss": 0.0136,
      "num_tokens": 375563200.0,
      "reward": 0.4910714626312256,
      "reward_std": 0.19425876438617706,
      "rewards/verify_math_reward/mean": 0.4910714328289032,
      "rewards/verify_math_reward/std": 0.5001994967460632,
      "step": 646
    },
    {
      "clip_ratio/high_max": 0.0017485125017628889,
      "clip_ratio/high_mean": 0.0004991767368665023,
      "clip_ratio/low_mean": 0.00040243194121103443,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009016086760311737,
      "completions/clipped_ratio": 0.025669642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4090.0,
      "completions/mean_length": 695.9922485351562,
      "completions/mean_terminated_length": 606.4158325195312,
      "completions/min_length": 120.0,
      "completions/min_terminated_length": 120.0,
      "epoch": 6.046660834062409,
      "grad_norm": 0.1357421875,
      "learning_rate": 1e-06,
      "loss": 0.0,
      "num_tokens": 376176937.0,
      "reward": 0.5066964626312256,
      "reward_std": 0.24983063340187073,
      "rewards/verify_math_reward/mean": 0.5066964030265808,
      "rewards/verify_math_reward/std": 0.5002344250679016,
      "step": 647
    },
    {
      "clip_ratio/high_max": 0.0018755322616925696,
      "clip_ratio/high_mean": 0.0005498871350937407,
      "clip_ratio/low_mean": 0.00027450664151729143,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008243937718361849,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3832.0,
      "completions/mean_length": 591.6350708007812,
      "completions/mean_terminated_length": 548.0780029296875,
      "completions/min_length": 89.0,
      "completions/min_terminated_length": 89.0,
      "epoch": 6.05599300087489,
      "grad_norm": 0.1376953125,
      "learning_rate": 1e-06,
      "loss": 0.0062,
      "num_tokens": 376749522.0,
      "reward": 0.5703125,
      "reward_std": 0.19666872918605804,
      "rewards/verify_math_reward/mean": 0.5703125,
      "rewards/verify_math_reward/std": 0.49530795216560364,
      "step": 648
    },
    {
      "clip_ratio/high_max": 0.001407445083714265,
      "clip_ratio/high_mean": 0.0004722145440609893,
      "clip_ratio/low_mean": 0.00032295044161401165,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000795164978626417,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3991.0,
      "completions/mean_length": 635.8616333007812,
      "completions/mean_terminated_length": 580.9387817382812,
      "completions/min_length": 160.0,
      "completions/min_terminated_length": 160.0,
      "epoch": 6.065325167687372,
      "grad_norm": 0.119140625,
      "learning_rate": 1e-06,
      "loss": 0.005,
      "num_tokens": 377349630.0,
      "reward": 0.5546875,
      "reward_std": 0.1961480975151062,
      "rewards/verify_math_reward/mean": 0.5546875,
      "rewards/verify_math_reward/std": 0.4972778558731079,
      "step": 649
    },
    {
      "clip_ratio/high_max": 0.0015356802682617854,
      "clip_ratio/high_mean": 0.0005339092857639116,
      "clip_ratio/low_mean": 0.00033801248764575575,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008719217780708277,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2643.0,
      "completions/mean_length": 575.2623291015625,
      "completions/mean_terminated_length": 527.469482421875,
      "completions/min_length": 4.0,
      "completions/min_terminated_length": 4.0,
      "epoch": 6.074657334499854,
      "grad_norm": 0.123046875,
      "learning_rate": 1e-06,
      "loss": 0.0091,
      "num_tokens": 377907249.0,
      "reward": 0.5625,
      "reward_std": 0.21241775155067444,
      "rewards/verify_math_reward/mean": 0.5625,
      "rewards/verify_math_reward/std": 0.49635544419288635,
      "step": 650
    },
    {
      "clip_ratio/high_max": 0.0015866073154029436,
      "clip_ratio/high_mean": 0.0005240302166384936,
      "clip_ratio/low_mean": 0.00036624932818085654,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000890279525265214,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2850.0,
      "completions/mean_length": 552.8348388671875,
      "completions/mean_terminated_length": 520.9144287109375,
      "completions/min_length": 11.0,
      "completions/min_terminated_length": 11.0,
      "epoch": 6.083989501312336,
      "grad_norm": 0.1416015625,
      "learning_rate": 1e-06,
      "loss": 0.0043,
      "num_tokens": 378460813.0,
      "reward": 0.5345982313156128,
      "reward_std": 0.213243305683136,
      "rewards/verify_math_reward/mean": 0.5345982313156128,
      "rewards/verify_math_reward/std": 0.4990801215171814,
      "step": 651
    },
    {
      "clip_ratio/high_max": 0.001572258031956153,
      "clip_ratio/high_mean": 0.0004430160465744848,
      "clip_ratio/low_mean": 0.0002523147768442868,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006953308227366506,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3970.0,
      "completions/mean_length": 580.8504638671875,
      "completions/mean_terminated_length": 541.1760864257812,
      "completions/min_length": 121.0,
      "completions/min_terminated_length": 121.0,
      "epoch": 6.093321668124818,
      "grad_norm": 0.1259765625,
      "learning_rate": 1e-06,
      "loss": 0.0054,
      "num_tokens": 379028471.0,
      "reward": 0.5770089626312256,
      "reward_std": 0.19912005960941315,
      "rewards/verify_math_reward/mean": 0.5770089030265808,
      "rewards/verify_math_reward/std": 0.4943099319934845,
      "step": 652
    },
    {
      "clip_ratio/high_max": 0.001971721856534714,
      "clip_ratio/high_mean": 0.0006360389720612147,
      "clip_ratio/low_mean": 0.0003380997884505632,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009741387666508672,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2744.0,
      "completions/mean_length": 616.0111694335938,
      "completions/mean_terminated_length": 552.7386474609375,
      "completions/min_length": 4.0,
      "completions/min_terminated_length": 4.0,
      "epoch": 6.1026538349373,
      "grad_norm": 0.1357421875,
      "learning_rate": 1e-06,
      "loss": 0.0018,
      "num_tokens": 379605673.0,
      "reward": 0.5625,
      "reward_std": 0.21752658486366272,
      "rewards/verify_math_reward/mean": 0.5625,
      "rewards/verify_math_reward/std": 0.49635544419288635,
      "step": 653
    },
    {
      "clip_ratio/high_max": 0.001424068503183662,
      "clip_ratio/high_mean": 0.0004155589747369959,
      "clip_ratio/low_mean": 0.00036105752406001557,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007766165081193321,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4081.0,
      "completions/mean_length": 630.1295166015625,
      "completions/mean_terminated_length": 594.9627685546875,
      "completions/min_length": 158.0,
      "completions/min_terminated_length": 158.0,
      "epoch": 6.111986001749782,
      "grad_norm": 0.126953125,
      "learning_rate": 1e-06,
      "loss": 0.0105,
      "num_tokens": 380230621.0,
      "reward": 0.5078125,
      "reward_std": 0.23596911132335663,
      "rewards/verify_math_reward/mean": 0.5078125,
      "rewards/verify_math_reward/std": 0.5002182126045227,
      "step": 654
    },
    {
      "clip_ratio/high_max": 0.0014793456921324832,
      "clip_ratio/high_mean": 0.00037749966213596053,
      "clip_ratio/low_mean": 0.0003206056824183179,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006981053547860938,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2218.0,
      "completions/mean_length": 577.046875,
      "completions/mean_terminated_length": 525.2389526367188,
      "completions/min_length": 125.0,
      "completions/min_terminated_length": 125.0,
      "epoch": 6.121318168562263,
      "grad_norm": 0.130859375,
      "learning_rate": 1e-06,
      "loss": -0.0099,
      "num_tokens": 380776551.0,
      "reward": 0.5613839626312256,
      "reward_std": 0.1774352639913559,
      "rewards/verify_math_reward/mean": 0.5613839030265808,
      "rewards/verify_math_reward/std": 0.496494859457016,
      "step": 655
    },
    {
      "clip_ratio/high_max": 0.001543802687592688,
      "clip_ratio/high_mean": 0.000473415589567594,
      "clip_ratio/low_mean": 0.00034356997582563054,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008169855509549961,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2219.0,
      "completions/mean_length": 570.9788208007812,
      "completions/mean_terminated_length": 523.1278686523438,
      "completions/min_length": 57.0,
      "completions/min_terminated_length": 57.0,
      "epoch": 6.130650335374745,
      "grad_norm": 0.1337890625,
      "learning_rate": 1e-06,
      "loss": -0.0049,
      "num_tokens": 381316228.0,
      "reward": 0.6194196939468384,
      "reward_std": 0.2100103199481964,
      "rewards/verify_math_reward/mean": 0.6194196343421936,
      "rewards/verify_math_reward/std": 0.48580074310302734,
      "step": 656
    },
    {
      "clip_ratio/high_max": 0.001508063778601354,
      "clip_ratio/high_mean": 0.0004405946004908401,
      "clip_ratio/low_mean": 0.0003125828362726679,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007531774517701706,
      "completions/clipped_ratio": 0.0078125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3474.0,
      "completions/mean_length": 593.1796875,
      "completions/mean_terminated_length": 565.5984497070312,
      "completions/min_length": 158.0,
      "completions/min_terminated_length": 158.0,
      "epoch": 6.139982502187227,
      "grad_norm": 0.1259765625,
      "learning_rate": 1e-06,
      "loss": 0.0225,
      "num_tokens": 381904045.0,
      "reward": 0.5814732313156128,
      "reward_std": 0.22052742540836334,
      "rewards/verify_math_reward/mean": 0.5814732313156128,
      "rewards/verify_math_reward/std": 0.4935929775238037,
      "step": 657
    },
    {
      "clip_ratio/high_max": 0.0016387092109653167,
      "clip_ratio/high_mean": 0.0004399012011617742,
      "clip_ratio/low_mean": 0.00039467282090299705,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008345740106960875,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3968.0,
      "completions/mean_length": 613.1127319335938,
      "completions/mean_terminated_length": 549.7874755859375,
      "completions/min_length": 92.0,
      "completions/min_terminated_length": 92.0,
      "epoch": 6.1493146689997085,
      "grad_norm": 0.1328125,
      "learning_rate": 1e-06,
      "loss": 0.0001,
      "num_tokens": 382481666.0,
      "reward": 0.5502232313156128,
      "reward_std": 0.21339528262615204,
      "rewards/verify_math_reward/mean": 0.5502232313156128,
      "rewards/verify_math_reward/std": 0.49774909019470215,
      "step": 658
    },
    {
      "clip_ratio/high_max": 0.0015169033158599632,
      "clip_ratio/high_mean": 0.0004784027078130748,
      "clip_ratio/low_mean": 0.00028230874386281357,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007607114562233619,
      "completions/clipped_ratio": 0.0078125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2820.0,
      "completions/mean_length": 544.4375,
      "completions/mean_terminated_length": 516.4724731445312,
      "completions/min_length": 105.0,
      "completions/min_terminated_length": 105.0,
      "epoch": 6.15864683581219,
      "grad_norm": 0.1259765625,
      "learning_rate": 1e-06,
      "loss": 0.0035,
      "num_tokens": 383025882.0,
      "reward": 0.6328125,
      "reward_std": 0.1977197229862213,
      "rewards/verify_math_reward/mean": 0.6328125,
      "rewards/verify_math_reward/std": 0.48230743408203125,
      "step": 659
    },
    {
      "clip_ratio/high_max": 0.0015689903948441497,
      "clip_ratio/high_mean": 0.000494471459091983,
      "clip_ratio/low_mean": 0.000269647275558782,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007641187344233913,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3440.0,
      "completions/mean_length": 612.4654541015625,
      "completions/mean_terminated_length": 565.1776123046875,
      "completions/min_length": 5.0,
      "completions/min_terminated_length": 5.0,
      "epoch": 6.167979002624672,
      "grad_norm": 0.11474609375,
      "learning_rate": 1e-06,
      "loss": -0.0064,
      "num_tokens": 383612043.0,
      "reward": 0.582589328289032,
      "reward_std": 0.19389037787914276,
      "rewards/verify_math_reward/mean": 0.5825892686843872,
      "rewards/verify_math_reward/std": 0.493407279253006,
      "step": 660
    },
    {
      "clip_ratio/high_max": 0.0018581387485028245,
      "clip_ratio/high_mean": 0.000576645387127428,
      "clip_ratio/low_mean": 0.00036334505170998455,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009399904201927711,
      "completions/clipped_ratio": 0.021205357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3861.0,
      "completions/mean_length": 675.46875,
      "completions/mean_terminated_length": 601.3637084960938,
      "completions/min_length": 136.0,
      "completions/min_terminated_length": 136.0,
      "epoch": 6.177311169437154,
      "grad_norm": 0.13671875,
      "learning_rate": 1e-06,
      "loss": 0.0068,
      "num_tokens": 384241719.0,
      "reward": 0.4508928656578064,
      "reward_std": 0.23011252284049988,
      "rewards/verify_math_reward/mean": 0.4508928656578064,
      "rewards/verify_math_reward/std": 0.49786055088043213,
      "step": 661
    },
    {
      "clip_ratio/high_max": 0.0015810354198038112,
      "clip_ratio/high_mean": 0.0004957555406690517,
      "clip_ratio/low_mean": 0.0002787279472613591,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007744834929326316,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3741.0,
      "completions/mean_length": 588.1685791015625,
      "completions/mean_terminated_length": 540.5509033203125,
      "completions/min_length": 3.0,
      "completions/min_terminated_length": 3.0,
      "epoch": 6.186643336249635,
      "grad_norm": 0.1328125,
      "learning_rate": 1e-06,
      "loss": 0.0036,
      "num_tokens": 384802494.0,
      "reward": 0.5725446939468384,
      "reward_std": 0.20914226770401,
      "rewards/verify_math_reward/mean": 0.5725446343421936,
      "rewards/verify_math_reward/std": 0.49498558044433594,
      "step": 662
    },
    {
      "clip_ratio/high_max": 0.0020061723298567813,
      "clip_ratio/high_mean": 0.0006206578104865912,
      "clip_ratio/low_mean": 0.00029738227794950944,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009180401002595318,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3680.0,
      "completions/mean_length": 572.9989013671875,
      "completions/mean_terminated_length": 525.1753540039062,
      "completions/min_length": 16.0,
      "completions/min_terminated_length": 16.0,
      "epoch": 6.195975503062117,
      "grad_norm": 0.1484375,
      "learning_rate": 1e-06,
      "loss": 0.0183,
      "num_tokens": 385360165.0,
      "reward": 0.613839328289032,
      "reward_std": 0.22120577096939087,
      "rewards/verify_math_reward/mean": 0.6138392686843872,
      "rewards/verify_math_reward/std": 0.48714008927345276,
      "step": 663
    },
    {
      "clip_ratio/high_max": 0.0014628641565650469,
      "clip_ratio/high_mean": 0.0004099119246347982,
      "clip_ratio/low_mean": 0.00031930648310662946,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007292184095604171,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1950.0,
      "completions/mean_length": 592.3471069335938,
      "completions/mean_terminated_length": 536.7335815429688,
      "completions/min_length": 156.0,
      "completions/min_terminated_length": 156.0,
      "epoch": 6.205307669874599,
      "grad_norm": 0.1279296875,
      "learning_rate": 1e-06,
      "loss": 0.0045,
      "num_tokens": 385914692.0,
      "reward": 0.5390625,
      "reward_std": 0.1865277737379074,
      "rewards/verify_math_reward/mean": 0.5390625,
      "rewards/verify_math_reward/std": 0.4987502098083496,
      "step": 664
    },
    {
      "clip_ratio/high_max": 0.0013592599443654763,
      "clip_ratio/high_mean": 0.0004112145575163595,
      "clip_ratio/low_mean": 0.0002543166864370505,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006655312358816445,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2683.0,
      "completions/mean_length": 626.7824096679688,
      "completions/mean_terminated_length": 563.7056884765625,
      "completions/min_length": 5.0,
      "completions/min_terminated_length": 5.0,
      "epoch": 6.2146398366870805,
      "grad_norm": 0.12451171875,
      "learning_rate": 1e-06,
      "loss": -0.0024,
      "num_tokens": 386509633.0,
      "reward": 0.4743303656578064,
      "reward_std": 0.1748419553041458,
      "rewards/verify_math_reward/mean": 0.4743303656578064,
      "rewards/verify_math_reward/std": 0.4996195137500763,
      "step": 665
    },
    {
      "clip_ratio/high_max": 0.0016358569173462456,
      "clip_ratio/high_mean": 0.0004455278162822651,
      "clip_ratio/low_mean": 0.0003062796452013572,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007518074326071655,
      "completions/clipped_ratio": 0.005580357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4042.0,
      "completions/mean_length": 543.4832763671875,
      "completions/mean_terminated_length": 523.5477294921875,
      "completions/min_length": 77.0,
      "completions/min_terminated_length": 77.0,
      "epoch": 6.223972003499562,
      "grad_norm": 0.130859375,
      "learning_rate": 1e-06,
      "loss": -0.0032,
      "num_tokens": 387054490.0,
      "reward": 0.5926339626312256,
      "reward_std": 0.19133350253105164,
      "rewards/verify_math_reward/mean": 0.5926339030265808,
      "rewards/verify_math_reward/std": 0.49161848425865173,
      "step": 666
    },
    {
      "clip_ratio/high_max": 0.0012536881449705106,
      "clip_ratio/high_mean": 0.0003850666768130395,
      "clip_ratio/low_mean": 0.00037460315843418357,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007596698401357571,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4060.0,
      "completions/mean_length": 630.3236694335938,
      "completions/mean_terminated_length": 595.158935546875,
      "completions/min_length": 160.0,
      "completions/min_terminated_length": 160.0,
      "epoch": 6.233304170312044,
      "grad_norm": 0.123046875,
      "learning_rate": 1e-06,
      "loss": -0.0004,
      "num_tokens": 387664516.0,
      "reward": 0.5680803656578064,
      "reward_std": 0.2067355215549469,
      "rewards/verify_math_reward/mean": 0.5680803656578064,
      "rewards/verify_math_reward/std": 0.4956200420856476,
      "step": 667
    },
    {
      "clip_ratio/high_max": 0.001667837970671826,
      "clip_ratio/high_mean": 0.0004726103929897363,
      "clip_ratio/low_mean": 0.00034267061596438,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008152810041792691,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3206.0,
      "completions/mean_length": 637.919677734375,
      "completions/mean_terminated_length": 579.0420532226562,
      "completions/min_length": 11.0,
      "completions/min_terminated_length": 11.0,
      "epoch": 6.242636337124526,
      "grad_norm": 0.1376953125,
      "learning_rate": 1e-06,
      "loss": 0.0015,
      "num_tokens": 388270436.0,
      "reward": 0.5,
      "reward_std": 0.23582643270492554,
      "rewards/verify_math_reward/mean": 0.5,
      "rewards/verify_math_reward/std": 0.5002792477607727,
      "step": 668
    },
    {
      "clip_ratio/high_max": 0.0011765300996557926,
      "clip_ratio/high_mean": 0.000360934918262501,
      "clip_ratio/low_mean": 0.0002547097612932703,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006156446756904188,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3207.0,
      "completions/mean_length": 619.7176513671875,
      "completions/mean_terminated_length": 580.48193359375,
      "completions/min_length": 24.0,
      "completions/min_terminated_length": 24.0,
      "epoch": 6.251968503937007,
      "grad_norm": 0.1171875,
      "learning_rate": 1e-06,
      "loss": 0.012,
      "num_tokens": 388876095.0,
      "reward": 0.5267857313156128,
      "reward_std": 0.18869741261005402,
      "rewards/verify_math_reward/mean": 0.5267857313156128,
      "rewards/verify_math_reward/std": 0.4995608627796173,
      "step": 669
    },
    {
      "clip_ratio/high_max": 0.0016365329174732324,
      "clip_ratio/high_mean": 0.00046390640022764273,
      "clip_ratio/low_mean": 0.00034075080952788994,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008046572138482588,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3929.0,
      "completions/mean_length": 610.5234375,
      "completions/mean_terminated_length": 543.11376953125,
      "completions/min_length": 111.0,
      "completions/min_terminated_length": 111.0,
      "epoch": 6.26130067074949,
      "grad_norm": 0.1259765625,
      "learning_rate": 1e-06,
      "loss": 0.0003,
      "num_tokens": 389443724.0,
      "reward": 0.5993303656578064,
      "reward_std": 0.18336893618106842,
      "rewards/verify_math_reward/mean": 0.5993303656578064,
      "rewards/verify_math_reward/std": 0.49030786752700806,
      "step": 670
    },
    {
      "clip_ratio/high_max": 0.0020880389129160903,
      "clip_ratio/high_mean": 0.0006353806134029583,
      "clip_ratio/low_mean": 0.0003999091197783855,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010352897297707386,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2489.0,
      "completions/mean_length": 580.3303833007812,
      "completions/mean_terminated_length": 524.5260620117188,
      "completions/min_length": 112.0,
      "completions/min_terminated_length": 112.0,
      "epoch": 6.270632837561972,
      "grad_norm": 0.1455078125,
      "learning_rate": 1e-06,
      "loss": 0.0017,
      "num_tokens": 389995396.0,
      "reward": 0.5803571939468384,
      "reward_std": 0.24536260962486267,
      "rewards/verify_math_reward/mean": 0.5803571343421936,
      "rewards/verify_math_reward/std": 0.4937761425971985,
      "step": 671
    },
    {
      "clip_ratio/high_max": 0.0013693723931282875,
      "clip_ratio/high_mean": 0.0004878874083260598,
      "clip_ratio/low_mean": 0.0002530723661493539,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000740959772883798,
      "completions/clipped_ratio": 0.0200892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3128.0,
      "completions/mean_length": 623.4129638671875,
      "completions/mean_terminated_length": 552.2210083007812,
      "completions/min_length": 43.0,
      "completions/min_terminated_length": 43.0,
      "epoch": 6.2799650043744535,
      "grad_norm": 0.12890625,
      "learning_rate": 1e-06,
      "loss": 0.0076,
      "num_tokens": 390574550.0,
      "reward": 0.574776828289032,
      "reward_std": 0.20805639028549194,
      "rewards/verify_math_reward/mean": 0.5747767686843872,
      "rewards/verify_math_reward/std": 0.49465295672416687,
      "step": 672
    },
    {
      "clip_ratio/high_max": 0.0016816371062304825,
      "clip_ratio/high_mean": 0.0004886971887572145,
      "clip_ratio/low_mean": 0.0003883276204987851,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008770248023211025,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3445.0,
      "completions/mean_length": 585.2176513671875,
      "completions/mean_terminated_length": 537.5599975585938,
      "completions/min_length": 117.0,
      "completions/min_terminated_length": 117.0,
      "epoch": 6.289297171186935,
      "grad_norm": 0.134765625,
      "learning_rate": 1e-06,
      "loss": 0.0104,
      "num_tokens": 391149481.0,
      "reward": 0.543526828289032,
      "reward_std": 0.23563112318515778,
      "rewards/verify_math_reward/mean": 0.5435267686843872,
      "rewards/verify_math_reward/std": 0.49838003516197205,
      "step": 673
    },
    {
      "clip_ratio/high_max": 0.001866939315732452,
      "clip_ratio/high_mean": 0.0006465037920406758,
      "clip_ratio/low_mean": 0.0003531074702323167,
      "clip_ratio/low_min": 1.5024038475530688e-05,
      "clip_ratio/region_mean": 0.0009996112567023374,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3129.0,
      "completions/mean_length": 617.8192138671875,
      "completions/mean_terminated_length": 570.6041259765625,
      "completions/min_length": 79.0,
      "completions/min_terminated_length": 79.0,
      "epoch": 6.298629337999417,
      "grad_norm": 0.134765625,
      "learning_rate": 1e-06,
      "loss": -0.0054,
      "num_tokens": 391740735.0,
      "reward": 0.5569196939468384,
      "reward_std": 0.24319295585155487,
      "rewards/verify_math_reward/mean": 0.5569196343421936,
      "rewards/verify_math_reward/std": 0.49702703952789307,
      "step": 674
    },
    {
      "clip_ratio/high_max": 0.001925237715113326,
      "clip_ratio/high_mean": 0.0005865288562745263,
      "clip_ratio/low_mean": 0.0003072172133897766,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008937460552260745,
      "completions/clipped_ratio": 0.021205357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2857.0,
      "completions/mean_length": 651.921875,
      "completions/mean_terminated_length": 577.3067016601562,
      "completions/min_length": 110.0,
      "completions/min_terminated_length": 110.0,
      "epoch": 6.307961504811899,
      "grad_norm": 0.1298828125,
      "learning_rate": 1e-06,
      "loss": -0.0036,
      "num_tokens": 392322777.0,
      "reward": 0.5546875,
      "reward_std": 0.21854645013809204,
      "rewards/verify_math_reward/mean": 0.5546875,
      "rewards/verify_math_reward/std": 0.4972778558731079,
      "step": 675
    },
    {
      "clip_ratio/high_max": 0.00126882118274807,
      "clip_ratio/high_mean": 0.00040721619598116376,
      "clip_ratio/low_mean": 0.0003446972309575358,
      "clip_ratio/low_min": 5.852059985045344e-06,
      "clip_ratio/region_mean": 0.000751913428757689,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3996.0,
      "completions/mean_length": 663.4620971679688,
      "completions/mean_terminated_length": 608.9773559570312,
      "completions/min_length": 89.0,
      "completions/min_terminated_length": 89.0,
      "epoch": 6.31729367162438,
      "grad_norm": 0.11474609375,
      "learning_rate": 1e-06,
      "loss": 0.0136,
      "num_tokens": 392946999.0,
      "reward": 0.543526828289032,
      "reward_std": 0.22233189642429352,
      "rewards/verify_math_reward/mean": 0.5435267686843872,
      "rewards/verify_math_reward/std": 0.49838000535964966,
      "step": 676
    },
    {
      "clip_ratio/high_max": 0.0015275381419996847,
      "clip_ratio/high_mean": 0.00044322943040242535,
      "clip_ratio/low_mean": 0.0003800341171427135,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008232635477725125,
      "completions/clipped_ratio": 0.0200892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4015.0,
      "completions/mean_length": 614.3080444335938,
      "completions/mean_terminated_length": 542.9293823242188,
      "completions/min_length": 82.0,
      "completions/min_terminated_length": 82.0,
      "epoch": 6.326625838436862,
      "grad_norm": 0.1259765625,
      "learning_rate": 1e-06,
      "loss": -0.0027,
      "num_tokens": 393515051.0,
      "reward": 0.5558035969734192,
      "reward_std": 0.20718877017498016,
      "rewards/verify_math_reward/mean": 0.5558035969734192,
      "rewards/verify_math_reward/std": 0.49715372920036316,
      "step": 677
    },
    {
      "clip_ratio/high_max": 0.001644826803385513,
      "clip_ratio/high_mean": 0.0005085031702947163,
      "clip_ratio/low_mean": 0.00031327084036547603,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008217740023610531,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2954.0,
      "completions/mean_length": 603.734375,
      "completions/mean_terminated_length": 548.3015747070312,
      "completions/min_length": 86.0,
      "completions/min_terminated_length": 86.0,
      "epoch": 6.335958005249344,
      "grad_norm": 0.1357421875,
      "learning_rate": 1e-06,
      "loss": 0.013,
      "num_tokens": 394101701.0,
      "reward": 0.5412946939468384,
      "reward_std": 0.21568436920642853,
      "rewards/verify_math_reward/mean": 0.5412946343421936,
      "rewards/verify_math_reward/std": 0.49857014417648315,
      "step": 678
    },
    {
      "clip_ratio/high_max": 0.0017491900125605753,
      "clip_ratio/high_mean": 0.0005638512056975742,
      "clip_ratio/low_mean": 0.00034446125062004285,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009083124632525141,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3163.0,
      "completions/mean_length": 609.7890625,
      "completions/mean_terminated_length": 546.4033813476562,
      "completions/min_length": 72.0,
      "completions/min_terminated_length": 72.0,
      "epoch": 6.3452901720618256,
      "grad_norm": 0.130859375,
      "learning_rate": 1e-06,
      "loss": 0.0008,
      "num_tokens": 394671032.0,
      "reward": 0.566964328289032,
      "reward_std": 0.22681200504302979,
      "rewards/verify_math_reward/mean": 0.5669642686843872,
      "rewards/verify_math_reward/std": 0.49577224254608154,
      "step": 679
    },
    {
      "clip_ratio/high_max": 0.0015627999573553097,
      "clip_ratio/high_mean": 0.0004745071148590796,
      "clip_ratio/low_mean": 0.00038098836557765026,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008554954756618827,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3060.0,
      "completions/mean_length": 631.9799194335938,
      "completions/mean_terminated_length": 584.95703125,
      "completions/min_length": 141.0,
      "completions/min_terminated_length": 141.0,
      "epoch": 6.354622338874307,
      "grad_norm": 0.12109375,
      "learning_rate": 1e-06,
      "loss": -0.0106,
      "num_tokens": 395273998.0,
      "reward": 0.5446428656578064,
      "reward_std": 0.22552183270454407,
      "rewards/verify_math_reward/mean": 0.5446428656578064,
      "rewards/verify_math_reward/std": 0.49828118085861206,
      "step": 680
    },
    {
      "clip_ratio/high_max": 0.001402102237989311,
      "clip_ratio/high_mean": 0.0003810048119703424,
      "clip_ratio/low_mean": 0.000357528841277599,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.00073853366120602,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3522.0,
      "completions/mean_length": 663.310302734375,
      "completions/mean_terminated_length": 604.8649291992188,
      "completions/min_length": 166.0,
      "completions/min_terminated_length": 166.0,
      "epoch": 6.363954505686789,
      "grad_norm": 0.1328125,
      "learning_rate": 1e-06,
      "loss": 0.009,
      "num_tokens": 395897684.0,
      "reward": 0.5323660969734192,
      "reward_std": 0.22638945281505585,
      "rewards/verify_math_reward/mean": 0.5323660969734192,
      "rewards/verify_math_reward/std": 0.4992299973964691,
      "step": 681
    },
    {
      "clip_ratio/high_max": 0.0015754902833577944,
      "clip_ratio/high_mean": 0.00041263157504545234,
      "clip_ratio/low_mean": 0.00030170024456310784,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007143318134694709,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2620.0,
      "completions/mean_length": 615.755615234375,
      "completions/mean_terminated_length": 560.5136108398438,
      "completions/min_length": 81.0,
      "completions/min_terminated_length": 81.0,
      "epoch": 6.373286672499271,
      "grad_norm": 0.125,
      "learning_rate": 1e-06,
      "loss": 0.0004,
      "num_tokens": 396487401.0,
      "reward": 0.5323660969734192,
      "reward_std": 0.20294009149074554,
      "rewards/verify_math_reward/mean": 0.5323660969734192,
      "rewards/verify_math_reward/std": 0.4992299973964691,
      "step": 682
    },
    {
      "clip_ratio/high_max": 0.0015777024700582842,
      "clip_ratio/high_mean": 0.00047866311706457054,
      "clip_ratio/low_mean": 0.00033791923738135665,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008165823555827956,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1780.0,
      "completions/mean_length": 584.7924194335938,
      "completions/mean_terminated_length": 537.1289672851562,
      "completions/min_length": 11.0,
      "completions/min_terminated_length": 11.0,
      "epoch": 6.3826188393117524,
      "grad_norm": 0.1279296875,
      "learning_rate": 1e-06,
      "loss": -0.0009,
      "num_tokens": 397054823.0,
      "reward": 0.6015625,
      "reward_std": 0.20910878479480743,
      "rewards/verify_math_reward/mean": 0.6015625,
      "rewards/verify_math_reward/std": 0.48984986543655396,
      "step": 683
    },
    {
      "clip_ratio/high_max": 0.0014945502534828847,
      "clip_ratio/high_mean": 0.0004472033600677605,
      "clip_ratio/low_mean": 0.00036544585134379304,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008126492184601375,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4008.0,
      "completions/mean_length": 652.505615234375,
      "completions/mean_terminated_length": 573.886962890625,
      "completions/min_length": 146.0,
      "completions/min_terminated_length": 146.0,
      "epoch": 6.391951006124234,
      "grad_norm": 0.12890625,
      "learning_rate": 1e-06,
      "loss": 0.0072,
      "num_tokens": 397647868.0,
      "reward": 0.4754464626312256,
      "reward_std": 0.21797555685043335,
      "rewards/verify_math_reward/mean": 0.4754464328289032,
      "rewards/verify_math_reward/std": 0.4996756613254547,
      "step": 684
    },
    {
      "clip_ratio/high_max": 0.0017288713715970516,
      "clip_ratio/high_mean": 0.0006311056781669322,
      "clip_ratio/low_mean": 0.00029005787143887574,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009211635442625266,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3996.0,
      "completions/mean_length": 588.9163208007812,
      "completions/mean_terminated_length": 545.325439453125,
      "completions/min_length": 127.0,
      "completions/min_terminated_length": 127.0,
      "epoch": 6.401283172936716,
      "grad_norm": 0.13671875,
      "learning_rate": 1e-06,
      "loss": 0.0262,
      "num_tokens": 398222129.0,
      "reward": 0.6205357313156128,
      "reward_std": 0.247055783867836,
      "rewards/verify_math_reward/mean": 0.6205357313156128,
      "rewards/verify_math_reward/std": 0.4855247139930725,
      "step": 685
    },
    {
      "clip_ratio/high_max": 0.0014622002709074877,
      "clip_ratio/high_mean": 0.0004083467861164536,
      "clip_ratio/low_mean": 0.0002982504461215285,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007065972254167718,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3721.0,
      "completions/mean_length": 606.5647583007812,
      "completions/mean_terminated_length": 551.1768798828125,
      "completions/min_length": 131.0,
      "completions/min_terminated_length": 131.0,
      "epoch": 6.410615339749198,
      "grad_norm": 0.12890625,
      "learning_rate": 1e-06,
      "loss": 0.0007,
      "num_tokens": 398795771.0,
      "reward": 0.5602678656578064,
      "reward_std": 0.19272036850452423,
      "rewards/verify_math_reward/mean": 0.5602678656578064,
      "rewards/verify_math_reward/std": 0.4966317415237427,
      "step": 686
    },
    {
      "clip_ratio/high_max": 0.001748134076478891,
      "clip_ratio/high_mean": 0.0005480914528561698,
      "clip_ratio/low_mean": 0.0004106713050759936,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009587627728251391,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3772.0,
      "completions/mean_length": 651.4989013671875,
      "completions/mean_terminated_length": 584.8816528320312,
      "completions/min_length": 164.0,
      "completions/min_terminated_length": 164.0,
      "epoch": 6.41994750656168,
      "grad_norm": 0.1357421875,
      "learning_rate": 1e-06,
      "loss": -0.0018,
      "num_tokens": 399394314.0,
      "reward": 0.5267857313156128,
      "reward_std": 0.25216320157051086,
      "rewards/verify_math_reward/mean": 0.5267857313156128,
      "rewards/verify_math_reward/std": 0.4995608627796173,
      "step": 687
    },
    {
      "clip_ratio/high_max": 0.0015942801092023728,
      "clip_ratio/high_mean": 0.0005259980257505958,
      "clip_ratio/low_mean": 0.00030949494271226285,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008354929623237695,
      "completions/clipped_ratio": 0.0234375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2747.0,
      "completions/mean_length": 665.8549194335938,
      "completions/mean_terminated_length": 583.5314331054688,
      "completions/min_length": 147.0,
      "completions/min_terminated_length": 147.0,
      "epoch": 6.429279673374162,
      "grad_norm": 0.126953125,
      "learning_rate": 1e-06,
      "loss": 0.0156,
      "num_tokens": 399992456.0,
      "reward": 0.5245535969734192,
      "reward_std": 0.22687868773937225,
      "rewards/verify_math_reward/mean": 0.5245535969734192,
      "rewards/verify_math_reward/std": 0.4996756613254547,
      "step": 688
    },
    {
      "clip_ratio/high_max": 0.001243736903234094,
      "clip_ratio/high_mean": 0.0003723399368027458,
      "clip_ratio/low_mean": 0.00023856667121435748,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.00061090661029084,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3889.0,
      "completions/mean_length": 636.7935791015625,
      "completions/mean_terminated_length": 589.8359985351562,
      "completions/min_length": 136.0,
      "completions/min_terminated_length": 136.0,
      "epoch": 6.438611840186644,
      "grad_norm": 0.119140625,
      "learning_rate": 1e-06,
      "loss": -0.008,
      "num_tokens": 400604735.0,
      "reward": 0.5267857313156128,
      "reward_std": 0.1880679875612259,
      "rewards/verify_math_reward/mean": 0.5267857313156128,
      "rewards/verify_math_reward/std": 0.4995608627796173,
      "step": 689
    },
    {
      "clip_ratio/high_max": 0.0018221590062239557,
      "clip_ratio/high_mean": 0.0005158418440487367,
      "clip_ratio/low_mean": 0.0003384311767149484,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008542730292901979,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3536.0,
      "completions/mean_length": 620.3158569335938,
      "completions/mean_terminated_length": 565.146240234375,
      "completions/min_length": 153.0,
      "completions/min_terminated_length": 153.0,
      "epoch": 6.447944006999125,
      "grad_norm": 0.1240234375,
      "learning_rate": 1e-06,
      "loss": 0.0103,
      "num_tokens": 401188026.0,
      "reward": 0.5680803656578064,
      "reward_std": 0.21542111039161682,
      "rewards/verify_math_reward/mean": 0.5680803656578064,
      "rewards/verify_math_reward/std": 0.4956200420856476,
      "step": 690
    },
    {
      "clip_ratio/high_max": 0.0018276918799529085,
      "clip_ratio/high_mean": 0.0005538284267458948,
      "clip_ratio/low_mean": 0.0004197250436845934,
      "clip_ratio/low_min": 1.637840614421293e-05,
      "clip_ratio/region_mean": 0.0009735534704304882,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3412.0,
      "completions/mean_length": 654.6875,
      "completions/mean_terminated_length": 592.1181640625,
      "completions/min_length": 19.0,
      "completions/min_terminated_length": 19.0,
      "epoch": 6.457276173811607,
      "grad_norm": 0.13671875,
      "learning_rate": 1e-06,
      "loss": -0.0011,
      "num_tokens": 401797562.0,
      "reward": 0.5323660969734192,
      "reward_std": 0.2566012442111969,
      "rewards/verify_math_reward/mean": 0.5323660969734192,
      "rewards/verify_math_reward/std": 0.4992299973964691,
      "step": 691
    },
    {
      "clip_ratio/high_max": 0.00205546538472845,
      "clip_ratio/high_mean": 0.000640464154912479,
      "clip_ratio/low_mean": 0.0003406134549095441,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009810775991354603,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3196.0,
      "completions/mean_length": 571.5435791015625,
      "completions/mean_terminated_length": 539.7916870117188,
      "completions/min_length": 78.0,
      "completions/min_terminated_length": 78.0,
      "epoch": 6.466608340624089,
      "grad_norm": 0.12255859375,
      "learning_rate": 1e-06,
      "loss": 0.006,
      "num_tokens": 402367185.0,
      "reward": 0.5479910969734192,
      "reward_std": 0.23251745104789734,
      "rewards/verify_math_reward/mean": 0.5479910969734192,
      "rewards/verify_math_reward/std": 0.49796950817108154,
      "step": 692
    },
    {
      "clip_ratio/high_max": 0.0014482236711046426,
      "clip_ratio/high_mean": 0.0004982175551049295,
      "clip_ratio/low_mean": 0.0003383869577646692,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008366045176444459,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4052.0,
      "completions/mean_length": 623.8058471679688,
      "completions/mean_terminated_length": 556.6530151367188,
      "completions/min_length": 106.0,
      "completions/min_terminated_length": 106.0,
      "epoch": 6.475940507436571,
      "grad_norm": 0.1357421875,
      "learning_rate": 1e-06,
      "loss": -0.004,
      "num_tokens": 402938875.0,
      "reward": 0.5636160969734192,
      "reward_std": 0.2563712000846863,
      "rewards/verify_math_reward/mean": 0.5636160969734192,
      "rewards/verify_math_reward/std": 0.49621346592903137,
      "step": 693
    },
    {
      "clip_ratio/high_max": 0.0013574084459833102,
      "clip_ratio/high_mean": 0.0003852013112464192,
      "clip_ratio/low_mean": 0.0003038295905071209,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006890309043683374,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3993.0,
      "completions/mean_length": 570.4553833007812,
      "completions/mean_terminated_length": 522.5972900390625,
      "completions/min_length": 124.0,
      "completions/min_terminated_length": 124.0,
      "epoch": 6.485272674249052,
      "grad_norm": 0.126953125,
      "learning_rate": 1e-06,
      "loss": -0.0017,
      "num_tokens": 403492419.0,
      "reward": 0.582589328289032,
      "reward_std": 0.17874544858932495,
      "rewards/verify_math_reward/mean": 0.5825892686843872,
      "rewards/verify_math_reward/std": 0.4934072494506836,
      "step": 694
    },
    {
      "clip_ratio/high_max": 0.0019339576683705673,
      "clip_ratio/high_mean": 0.0005122741404193221,
      "clip_ratio/low_mean": 0.0003889548996767189,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009012290420287172,
      "completions/clipped_ratio": 0.021205357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2496.0,
      "completions/mean_length": 621.0926513671875,
      "completions/mean_terminated_length": 545.8095703125,
      "completions/min_length": 4.0,
      "completions/min_terminated_length": 4.0,
      "epoch": 6.494604841061534,
      "grad_norm": 0.1357421875,
      "learning_rate": 1e-06,
      "loss": 0.0029,
      "num_tokens": 404058454.0,
      "reward": 0.5636160969734192,
      "reward_std": 0.21395939588546753,
      "rewards/verify_math_reward/mean": 0.5636160969734192,
      "rewards/verify_math_reward/std": 0.49621346592903137,
      "step": 695
    },
    {
      "clip_ratio/high_max": 0.0017154207966996182,
      "clip_ratio/high_mean": 0.0005345684314761456,
      "clip_ratio/low_mean": 0.00036346616070659366,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008980346010503126,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3987.0,
      "completions/mean_length": 637.8170166015625,
      "completions/mean_terminated_length": 578.9376220703125,
      "completions/min_length": 29.0,
      "completions/min_terminated_length": 29.0,
      "epoch": 6.503937007874016,
      "grad_norm": 0.126953125,
      "learning_rate": 1e-06,
      "loss": 0.0041,
      "num_tokens": 404662090.0,
      "reward": 0.5011160969734192,
      "reward_std": 0.21782009303569794,
      "rewards/verify_math_reward/mean": 0.5011160969734192,
      "rewards/verify_math_reward/std": 0.5002779960632324,
      "step": 696
    },
    {
      "clip_ratio/high_max": 0.0018197655481344555,
      "clip_ratio/high_mean": 0.0005400435502451728,
      "clip_ratio/low_mean": 0.0002911914510832503,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008312349955303944,
      "completions/clipped_ratio": 0.021205357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3057.0,
      "completions/mean_length": 646.8951416015625,
      "completions/mean_terminated_length": 572.1710205078125,
      "completions/min_length": 142.0,
      "completions/min_terminated_length": 142.0,
      "epoch": 6.5132691746864975,
      "grad_norm": 0.1181640625,
      "learning_rate": 1e-06,
      "loss": 0.0024,
      "num_tokens": 405260556.0,
      "reward": 0.527901828289032,
      "reward_std": 0.21196311712265015,
      "rewards/verify_math_reward/mean": 0.5279017686843872,
      "rewards/verify_math_reward/std": 0.49949970841407776,
      "step": 697
    },
    {
      "clip_ratio/high_max": 0.0015164133819780545,
      "clip_ratio/high_mean": 0.0004549493255581183,
      "clip_ratio/low_mean": 0.00024323473064669088,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006981840529078909,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2078.0,
      "completions/mean_length": 619.0592041015625,
      "completions/mean_terminated_length": 563.86962890625,
      "completions/min_length": 143.0,
      "completions/min_terminated_length": 143.0,
      "epoch": 6.522601341498979,
      "grad_norm": 0.134765625,
      "learning_rate": 1e-06,
      "loss": 0.0141,
      "num_tokens": 405857641.0,
      "reward": 0.5558035969734192,
      "reward_std": 0.2067769169807434,
      "rewards/verify_math_reward/mean": 0.5558035969734192,
      "rewards/verify_math_reward/std": 0.49715372920036316,
      "step": 698
    },
    {
      "clip_ratio/high_max": 0.0016490780781168723,
      "clip_ratio/high_mean": 0.00043405360872839083,
      "clip_ratio/low_mean": 0.0003074140852277196,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007414677061206021,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3062.0,
      "completions/mean_length": 571.9620971679688,
      "completions/mean_terminated_length": 516.0249633789062,
      "completions/min_length": 122.0,
      "completions/min_terminated_length": 122.0,
      "epoch": 6.531933508311461,
      "grad_norm": 0.126953125,
      "learning_rate": 1e-06,
      "loss": 0.0022,
      "num_tokens": 406402503.0,
      "reward": 0.6127232313156128,
      "reward_std": 0.1875341385602951,
      "rewards/verify_math_reward/mean": 0.6127232313156128,
      "rewards/verify_math_reward/std": 0.4873998463153839,
      "step": 699
    },
    {
      "clip_ratio/high_max": 0.0014831389435130404,
      "clip_ratio/high_mean": 0.000444190265625366,
      "clip_ratio/low_mean": 0.0002342008714322219,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006783911376260221,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3494.0,
      "completions/mean_length": 555.341552734375,
      "completions/mean_terminated_length": 519.4159545898438,
      "completions/min_length": 33.0,
      "completions/min_terminated_length": 33.0,
      "epoch": 6.541265675123943,
      "grad_norm": 0.12158203125,
      "learning_rate": 1e-06,
      "loss": 0.0003,
      "num_tokens": 406943409.0,
      "reward": 0.5970982313156128,
      "reward_std": 0.1910046637058258,
      "rewards/verify_math_reward/mean": 0.5970982313156128,
      "rewards/verify_math_reward/std": 0.4907552897930145,
      "step": 700
    },
    {
      "clip_ratio/high_max": 0.0014010896938998485,
      "clip_ratio/high_mean": 0.00037620840021190816,
      "clip_ratio/low_mean": 0.0002854122628832556,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006616206599119323,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2719.0,
      "completions/mean_length": 634.6205444335938,
      "completions/mean_terminated_length": 575.686767578125,
      "completions/min_length": 68.0,
      "completions/min_terminated_length": 68.0,
      "epoch": 6.550597841936424,
      "grad_norm": 0.11572265625,
      "learning_rate": 1e-06,
      "loss": -0.0017,
      "num_tokens": 407535725.0,
      "reward": 0.5613839626312256,
      "reward_std": 0.17295697331428528,
      "rewards/verify_math_reward/mean": 0.5613839030265808,
      "rewards/verify_math_reward/std": 0.496494859457016,
      "step": 701
    },
    {
      "clip_ratio/high_max": 0.0016476720811624546,
      "clip_ratio/high_mean": 0.0004877237863638584,
      "clip_ratio/low_mean": 0.00032663168167346157,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008143554750859039,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3671.0,
      "completions/mean_length": 609.7288208007812,
      "completions/mean_terminated_length": 546.342041015625,
      "completions/min_length": 116.0,
      "completions/min_terminated_length": 116.0,
      "epoch": 6.559930008748906,
      "grad_norm": 0.130859375,
      "learning_rate": 1e-06,
      "loss": 0.0018,
      "num_tokens": 408100810.0,
      "reward": 0.5625,
      "reward_std": 0.21564048528671265,
      "rewards/verify_math_reward/mean": 0.5625,
      "rewards/verify_math_reward/std": 0.49635544419288635,
      "step": 702
    },
    {
      "clip_ratio/high_max": 0.0014412458749575308,
      "clip_ratio/high_mean": 0.0003959407438287599,
      "clip_ratio/low_mean": 0.0004448934485026257,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008408341946051223,
      "completions/clipped_ratio": 0.021205357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3537.0,
      "completions/mean_length": 674.734375,
      "completions/mean_terminated_length": 600.6134643554688,
      "completions/min_length": 115.0,
      "completions/min_terminated_length": 115.0,
      "epoch": 6.569262175561388,
      "grad_norm": 0.1220703125,
      "learning_rate": 1e-06,
      "loss": -0.0,
      "num_tokens": 408703892.0,
      "reward": 0.5212053656578064,
      "reward_std": 0.21060903370380402,
      "rewards/verify_math_reward/mean": 0.5212053656578064,
      "rewards/verify_math_reward/std": 0.49982911348342896,
      "step": 703
    },
    {
      "clip_ratio/high_max": 0.0017365259636790142,
      "clip_ratio/high_mean": 0.0005543074890965727,
      "clip_ratio/low_mean": 0.00036931589875166537,
      "clip_ratio/low_min": 1.3748350284004118e-05,
      "clip_ratio/region_mean": 0.0009236233872798039,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3110.0,
      "completions/mean_length": 570.349365234375,
      "completions/mean_terminated_length": 530.5564575195312,
      "completions/min_length": 85.0,
      "completions/min_terminated_length": 85.0,
      "epoch": 6.57859434237387,
      "grad_norm": 0.1396484375,
      "learning_rate": 1e-06,
      "loss": 0.0092,
      "num_tokens": 409259517.0,
      "reward": 0.5770089626312256,
      "reward_std": 0.24539652466773987,
      "rewards/verify_math_reward/mean": 0.5770089030265808,
      "rewards/verify_math_reward/std": 0.4943099319934845,
      "step": 704
    },
    {
      "clip_ratio/high_max": 0.0015137692444113782,
      "clip_ratio/high_mean": 0.0004934037027624072,
      "clip_ratio/low_mean": 0.000305142387333035,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007985460842974135,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3367.0,
      "completions/mean_length": 627.0189819335938,
      "completions/mean_terminated_length": 579.9287719726562,
      "completions/min_length": 63.0,
      "completions/min_terminated_length": 63.0,
      "epoch": 6.587926509186351,
      "grad_norm": 0.126953125,
      "learning_rate": 1e-06,
      "loss": 0.0033,
      "num_tokens": 409853334.0,
      "reward": 0.5613839626312256,
      "reward_std": 0.22454431653022766,
      "rewards/verify_math_reward/mean": 0.5613839030265808,
      "rewards/verify_math_reward/std": 0.496494859457016,
      "step": 705
    },
    {
      "clip_ratio/high_max": 0.0016408362607762683,
      "clip_ratio/high_mean": 0.000524249555382994,
      "clip_ratio/low_mean": 0.00033646618055627187,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008607157360529527,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3904.0,
      "completions/mean_length": 629.6920166015625,
      "completions/mean_terminated_length": 578.6591186523438,
      "completions/min_length": 65.0,
      "completions/min_terminated_length": 65.0,
      "epoch": 6.597258675998834,
      "grad_norm": 0.1484375,
      "learning_rate": 1e-06,
      "loss": 0.0032,
      "num_tokens": 410457346.0,
      "reward": 0.5334821939468384,
      "reward_std": 0.25070035457611084,
      "rewards/verify_math_reward/mean": 0.5334821343421936,
      "rewards/verify_math_reward/std": 0.49915632605552673,
      "step": 706
    },
    {
      "clip_ratio/high_max": 0.0015540211488769273,
      "clip_ratio/high_mean": 0.00046608909974565904,
      "clip_ratio/low_mean": 0.00030339352542796405,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007694826294937229,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3779.0,
      "completions/mean_length": 591.3092041015625,
      "completions/mean_terminated_length": 547.748046875,
      "completions/min_length": 143.0,
      "completions/min_terminated_length": 143.0,
      "epoch": 6.606590842811316,
      "grad_norm": 0.12060546875,
      "learning_rate": 1e-06,
      "loss": -0.0006,
      "num_tokens": 411033495.0,
      "reward": 0.5680803656578064,
      "reward_std": 0.1964409202337265,
      "rewards/verify_math_reward/mean": 0.5680803656578064,
      "rewards/verify_math_reward/std": 0.4956200420856476,
      "step": 707
    },
    {
      "clip_ratio/high_max": 0.0014835074653092306,
      "clip_ratio/high_mean": 0.00046489866849697137,
      "clip_ratio/low_mean": 0.00039885721116661443,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008637558812552015,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3547.0,
      "completions/mean_length": 609.9877319335938,
      "completions/mean_terminated_length": 530.3983764648438,
      "completions/min_length": 51.0,
      "completions/min_terminated_length": 51.0,
      "epoch": 6.615923009623797,
      "grad_norm": 0.140625,
      "learning_rate": 1e-06,
      "loss": 0.0021,
      "num_tokens": 411588252.0,
      "reward": 0.5848214626312256,
      "reward_std": 0.22094251215457916,
      "rewards/verify_math_reward/mean": 0.5848214030265808,
      "rewards/verify_math_reward/std": 0.49302801489830017,
      "step": 708
    },
    {
      "clip_ratio/high_max": 0.0015491494705202058,
      "clip_ratio/high_mean": 0.0005491037081810646,
      "clip_ratio/low_mean": 0.00035680034864071786,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009059040594365797,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2603.0,
      "completions/mean_length": 605.4296875,
      "completions/mean_terminated_length": 562.0440673828125,
      "completions/min_length": 112.0,
      "completions/min_terminated_length": 112.0,
      "epoch": 6.625255176436279,
      "grad_norm": 0.126953125,
      "learning_rate": 1e-06,
      "loss": -0.0009,
      "num_tokens": 412174957.0,
      "reward": 0.5479910969734192,
      "reward_std": 0.21751701831817627,
      "rewards/verify_math_reward/mean": 0.5479910969734192,
      "rewards/verify_math_reward/std": 0.49796947836875916,
      "step": 709
    },
    {
      "clip_ratio/high_max": 0.00162834150614799,
      "clip_ratio/high_mean": 0.00046722792126274726,
      "clip_ratio/low_mean": 0.0003135976434123222,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007808255677446141,
      "completions/clipped_ratio": 0.021205357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2484.0,
      "completions/mean_length": 627.3136596679688,
      "completions/mean_terminated_length": 552.1653442382812,
      "completions/min_length": 59.0,
      "completions/min_terminated_length": 59.0,
      "epoch": 6.634587343248761,
      "grad_norm": 0.1201171875,
      "learning_rate": 1e-06,
      "loss": 0.001,
      "num_tokens": 412758998.0,
      "reward": 0.566964328289032,
      "reward_std": 0.20718877017498016,
      "rewards/verify_math_reward/mean": 0.5669642686843872,
      "rewards/verify_math_reward/std": 0.49577224254608154,
      "step": 710
    },
    {
      "clip_ratio/high_max": 0.0015301951243600342,
      "clip_ratio/high_mean": 0.0004615695543179754,
      "clip_ratio/low_mean": 0.0002979102235940445,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007594797734782333,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3632.0,
      "completions/mean_length": 615.3370971679688,
      "completions/mean_terminated_length": 568.0882568359375,
      "completions/min_length": 164.0,
      "completions/min_terminated_length": 164.0,
      "epoch": 6.6439195100612425,
      "grad_norm": 0.12255859375,
      "learning_rate": 1e-06,
      "loss": 0.0021,
      "num_tokens": 413345044.0,
      "reward": 0.546875,
      "reward_std": 0.22237467765808105,
      "rewards/verify_math_reward/mean": 0.546875,
      "rewards/verify_math_reward/std": 0.4980759024620056,
      "step": 711
    },
    {
      "clip_ratio/high_max": 0.0016038255198509432,
      "clip_ratio/high_mean": 0.00043382210117215436,
      "clip_ratio/low_mean": 0.0004151835817083338,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008490056723076123,
      "completions/clipped_ratio": 0.021205357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2339.0,
      "completions/mean_length": 612.5982666015625,
      "completions/mean_terminated_length": 537.131103515625,
      "completions/min_length": 120.0,
      "completions/min_terminated_length": 120.0,
      "epoch": 6.653251676873724,
      "grad_norm": 0.1337890625,
      "learning_rate": 1e-06,
      "loss": -0.0013,
      "num_tokens": 413912476.0,
      "reward": 0.4977678656578064,
      "reward_std": 0.21902543306350708,
      "rewards/verify_math_reward/mean": 0.4977678656578064,
      "rewards/verify_math_reward/std": 0.5002742409706116,
      "step": 712
    },
    {
      "clip_ratio/high_max": 0.0017356780263071414,
      "clip_ratio/high_mean": 0.0005065845016360981,
      "clip_ratio/low_mean": 0.00033234570923923457,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008389302265641163,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2958.0,
      "completions/mean_length": 592.5435791015625,
      "completions/mean_terminated_length": 544.9852905273438,
      "completions/min_length": 79.0,
      "completions/min_terminated_length": 79.0,
      "epoch": 6.662583843686206,
      "grad_norm": 0.125,
      "learning_rate": 1e-06,
      "loss": -0.002,
      "num_tokens": 414485259.0,
      "reward": 0.574776828289032,
      "reward_std": 0.20072515308856964,
      "rewards/verify_math_reward/mean": 0.5747767686843872,
      "rewards/verify_math_reward/std": 0.49465295672416687,
      "step": 713
    },
    {
      "clip_ratio/high_max": 0.0015549954614471062,
      "clip_ratio/high_mean": 0.00040323632765648654,
      "clip_ratio/low_mean": 0.00026332150775942864,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006665578289357654,
      "completions/clipped_ratio": 0.024553571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4027.0,
      "completions/mean_length": 665.232177734375,
      "completions/mean_terminated_length": 578.8741455078125,
      "completions/min_length": 46.0,
      "completions/min_terminated_length": 46.0,
      "epoch": 6.671916010498688,
      "grad_norm": 0.1162109375,
      "learning_rate": 1e-06,
      "loss": 0.0033,
      "num_tokens": 415090307.0,
      "reward": 0.5234375,
      "reward_std": 0.16871507465839386,
      "rewards/verify_math_reward/mean": 0.5234375,
      "rewards/verify_math_reward/std": 0.49972933530807495,
      "step": 714
    },
    {
      "clip_ratio/high_max": 0.0016007302656362299,
      "clip_ratio/high_mean": 0.000484343542325405,
      "clip_ratio/low_mean": 0.0003593466458369221,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008436901816821774,
      "completions/clipped_ratio": 0.021205357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3986.0,
      "completions/mean_length": 640.3292846679688,
      "completions/mean_terminated_length": 565.4629516601562,
      "completions/min_length": 119.0,
      "completions/min_terminated_length": 119.0,
      "epoch": 6.681248177311169,
      "grad_norm": 0.1357421875,
      "learning_rate": 1e-06,
      "loss": -0.0023,
      "num_tokens": 415676954.0,
      "reward": 0.5647321939468384,
      "reward_std": 0.20824116468429565,
      "rewards/verify_math_reward/mean": 0.5647321343421936,
      "rewards/verify_math_reward/std": 0.49606895446777344,
      "step": 715
    },
    {
      "clip_ratio/high_max": 0.0015444225155079039,
      "clip_ratio/high_mean": 0.00040532365369472245,
      "clip_ratio/low_mean": 0.0003145333384964033,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007198569969659729,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4070.0,
      "completions/mean_length": 600.1082763671875,
      "completions/mean_terminated_length": 556.656494140625,
      "completions/min_length": 11.0,
      "completions/min_terminated_length": 11.0,
      "epoch": 6.690580344123651,
      "grad_norm": 0.12255859375,
      "learning_rate": 1e-06,
      "loss": -0.004,
      "num_tokens": 416260347.0,
      "reward": 0.5078125,
      "reward_std": 0.1994103640317917,
      "rewards/verify_math_reward/mean": 0.5078125,
      "rewards/verify_math_reward/std": 0.5002182126045227,
      "step": 716
    },
    {
      "clip_ratio/high_max": 0.0017529315591673367,
      "clip_ratio/high_mean": 0.0005029141623253963,
      "clip_ratio/low_mean": 0.00026844842068385333,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007713625773249078,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3406.0,
      "completions/mean_length": 648.497802734375,
      "completions/mean_terminated_length": 593.7755126953125,
      "completions/min_length": 138.0,
      "completions/min_terminated_length": 138.0,
      "epoch": 6.699912510936133,
      "grad_norm": 0.11669921875,
      "learning_rate": 1e-06,
      "loss": -0.0055,
      "num_tokens": 416873065.0,
      "reward": 0.5368303656578064,
      "reward_std": 0.20204602181911469,
      "rewards/verify_math_reward/mean": 0.5368303656578064,
      "rewards/verify_math_reward/std": 0.49892017245292664,
      "step": 717
    },
    {
      "clip_ratio/high_max": 0.0016268120270979125,
      "clip_ratio/high_mean": 0.000511392492626328,
      "clip_ratio/low_mean": 0.0002864757314000599,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007978682178872987,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3151.0,
      "completions/mean_length": 599.5614013671875,
      "completions/mean_terminated_length": 564.0845336914062,
      "completions/min_length": 3.0,
      "completions/min_terminated_length": 3.0,
      "epoch": 6.7092446777486145,
      "grad_norm": 0.140625,
      "learning_rate": 1e-06,
      "loss": 0.0007,
      "num_tokens": 417457728.0,
      "reward": 0.582589328289032,
      "reward_std": 0.24017822742462158,
      "rewards/verify_math_reward/mean": 0.5825892686843872,
      "rewards/verify_math_reward/std": 0.493407279253006,
      "step": 718
    },
    {
      "clip_ratio/high_max": 0.001902307061754982,
      "clip_ratio/high_mean": 0.0006159242644798724,
      "clip_ratio/low_mean": 0.0003254028285937238,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009413270954610198,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2462.0,
      "completions/mean_length": 592.3136596679688,
      "completions/mean_terminated_length": 524.5517578125,
      "completions/min_length": 88.0,
      "completions/min_terminated_length": 88.0,
      "epoch": 6.718576844561096,
      "grad_norm": 0.1328125,
      "learning_rate": 1e-06,
      "loss": -0.02,
      "num_tokens": 418017689.0,
      "reward": 0.5580357313156128,
      "reward_std": 0.23372025787830353,
      "rewards/verify_math_reward/mean": 0.5580357313156128,
      "rewards/verify_math_reward/std": 0.49689778685569763,
      "step": 719
    },
    {
      "clip_ratio/high_max": 0.0015699679061071947,
      "clip_ratio/high_mean": 0.0004537762915788335,
      "clip_ratio/low_mean": 0.0003439263783775459,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007977026662047138,
      "completions/clipped_ratio": 0.024553571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2795.0,
      "completions/mean_length": 652.4866333007812,
      "completions/mean_terminated_length": 565.8077392578125,
      "completions/min_length": 141.0,
      "completions/min_terminated_length": 141.0,
      "epoch": 6.727909011373578,
      "grad_norm": 0.12109375,
      "learning_rate": 1e-06,
      "loss": 0.0074,
      "num_tokens": 418601125.0,
      "reward": 0.527901828289032,
      "reward_std": 0.2062469869852066,
      "rewards/verify_math_reward/mean": 0.5279017686843872,
      "rewards/verify_math_reward/std": 0.49949970841407776,
      "step": 720
    },
    {
      "clip_ratio/high_max": 0.0012845893561461708,
      "clip_ratio/high_mean": 0.0003507326888438911,
      "clip_ratio/low_mean": 0.0002174667658891849,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000568199451208784,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3118.0,
      "completions/mean_length": 615.109375,
      "completions/mean_terminated_length": 563.86181640625,
      "completions/min_length": 145.0,
      "completions/min_terminated_length": 145.0,
      "epoch": 6.73724117818606,
      "grad_norm": 0.1162109375,
      "learning_rate": 1e-06,
      "loss": -0.0012,
      "num_tokens": 419179935.0,
      "reward": 0.5613839626312256,
      "reward_std": 0.1699492633342743,
      "rewards/verify_math_reward/mean": 0.5613839030265808,
      "rewards/verify_math_reward/std": 0.496494859457016,
      "step": 721
    },
    {
      "clip_ratio/high_max": 0.0013956112788946484,
      "clip_ratio/high_mean": 0.00044027766944054747,
      "clip_ratio/low_mean": 0.00026302494245555863,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007033026072349458,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3425.0,
      "completions/mean_length": 577.5714721679688,
      "completions/mean_terminated_length": 525.771240234375,
      "completions/min_length": 108.0,
      "completions/min_terminated_length": 108.0,
      "epoch": 6.746573344998541,
      "grad_norm": 0.1318359375,
      "learning_rate": 1e-06,
      "loss": 0.0023,
      "num_tokens": 419746967.0,
      "reward": 0.5491071939468384,
      "reward_std": 0.19170865416526794,
      "rewards/verify_math_reward/mean": 0.5491071343421936,
      "rewards/verify_math_reward/std": 0.49786055088043213,
      "step": 722
    },
    {
      "clip_ratio/high_max": 0.0014392837410923676,
      "clip_ratio/high_mean": 0.0004395788671445189,
      "clip_ratio/low_mean": 0.0002816496747755082,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007212285481728031,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3204.0,
      "completions/mean_length": 580.255615234375,
      "completions/mean_terminated_length": 532.5305786132812,
      "completions/min_length": 144.0,
      "completions/min_terminated_length": 144.0,
      "epoch": 6.755905511811024,
      "grad_norm": 0.1201171875,
      "learning_rate": 1e-06,
      "loss": 0.0065,
      "num_tokens": 420305660.0,
      "reward": 0.5546875,
      "reward_std": 0.1844968944787979,
      "rewards/verify_math_reward/mean": 0.5546875,
      "rewards/verify_math_reward/std": 0.4972778558731079,
      "step": 723
    },
    {
      "clip_ratio/high_max": 0.002062841680526617,
      "clip_ratio/high_mean": 0.0006532569404953392,
      "clip_ratio/low_mean": 0.00031706071627013444,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009703176574475947,
      "completions/clipped_ratio": 0.006696428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1602.0,
      "completions/mean_length": 554.2623291015625,
      "completions/mean_terminated_length": 530.3853759765625,
      "completions/min_length": 117.0,
      "completions/min_terminated_length": 117.0,
      "epoch": 6.765237678623506,
      "grad_norm": 0.1318359375,
      "learning_rate": 1e-06,
      "loss": 0.0006,
      "num_tokens": 420858735.0,
      "reward": 0.6104910969734192,
      "reward_std": 0.23671838641166687,
      "rewards/verify_math_reward/mean": 0.6104910969734192,
      "rewards/verify_math_reward/std": 0.48791125416755676,
      "step": 724
    },
    {
      "clip_ratio/high_max": 0.0013924341101301252,
      "clip_ratio/high_mean": 0.00039767156886227895,
      "clip_ratio/low_mean": 0.0002900869521909044,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006877585174152046,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3799.0,
      "completions/mean_length": 629.2210083007812,
      "completions/mean_terminated_length": 578.1812133789062,
      "completions/min_length": 132.0,
      "completions/min_terminated_length": 132.0,
      "epoch": 6.7745698454359875,
      "grad_norm": 0.119140625,
      "learning_rate": 1e-06,
      "loss": 0.0155,
      "num_tokens": 421454917.0,
      "reward": 0.5323660969734192,
      "reward_std": 0.19561494886875153,
      "rewards/verify_math_reward/mean": 0.5323660969734192,
      "rewards/verify_math_reward/std": 0.4992299973964691,
      "step": 725
    },
    {
      "clip_ratio/high_max": 0.001639011170482263,
      "clip_ratio/high_mean": 0.0005354585969143955,
      "clip_ratio/low_mean": 0.00034351671774857095,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008789753064775141,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3800.0,
      "completions/mean_length": 628.3783569335938,
      "completions/mean_terminated_length": 581.3065795898438,
      "completions/min_length": 134.0,
      "completions/min_terminated_length": 134.0,
      "epoch": 6.783902012248469,
      "grad_norm": 0.140625,
      "learning_rate": 1e-06,
      "loss": 0.0,
      "num_tokens": 422056240.0,
      "reward": 0.5736607313156128,
      "reward_std": 0.24310559034347534,
      "rewards/verify_math_reward/mean": 0.5736607313156128,
      "rewards/verify_math_reward/std": 0.4948205351829529,
      "step": 726
    },
    {
      "clip_ratio/high_max": 0.0014604899488404044,
      "clip_ratio/high_mean": 0.0004352309608748328,
      "clip_ratio/low_mean": 0.00034727866113826167,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007825096436135937,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2662.0,
      "completions/mean_length": 620.208740234375,
      "completions/mean_terminated_length": 565.0374145507812,
      "completions/min_length": 13.0,
      "completions/min_terminated_length": 13.0,
      "epoch": 6.793234179060951,
      "grad_norm": 0.12890625,
      "learning_rate": 1e-06,
      "loss": -0.0019,
      "num_tokens": 422645003.0,
      "reward": 0.504464328289032,
      "reward_std": 0.23085565865039825,
      "rewards/verify_math_reward/mean": 0.5044642686843872,
      "rewards/verify_math_reward/std": 0.5002593398094177,
      "step": 727
    },
    {
      "clip_ratio/high_max": 0.0016713964269001735,
      "clip_ratio/high_mean": 0.0005271735949463618,
      "clip_ratio/low_mean": 0.00038745741767343134,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009146310057985829,
      "completions/clipped_ratio": 0.0200892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3814.0,
      "completions/mean_length": 653.46875,
      "completions/mean_terminated_length": 582.8929443359375,
      "completions/min_length": 82.0,
      "completions/min_terminated_length": 82.0,
      "epoch": 6.802566345873433,
      "grad_norm": 0.1298828125,
      "learning_rate": 1e-06,
      "loss": -0.0041,
      "num_tokens": 423240791.0,
      "reward": 0.4977678656578064,
      "reward_std": 0.22710509598255157,
      "rewards/verify_math_reward/mean": 0.4977678656578064,
      "rewards/verify_math_reward/std": 0.5002742409706116,
      "step": 728
    },
    {
      "clip_ratio/high_max": 0.0019422290079091908,
      "clip_ratio/high_mean": 0.0005786306226127635,
      "clip_ratio/low_mean": 0.0003068054797950026,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008854360980876663,
      "completions/clipped_ratio": 0.0200892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3729.0,
      "completions/mean_length": 651.4855346679688,
      "completions/mean_terminated_length": 580.8690185546875,
      "completions/min_length": 141.0,
      "completions/min_terminated_length": 141.0,
      "epoch": 6.811898512685914,
      "grad_norm": 0.1337890625,
      "learning_rate": 1e-06,
      "loss": -0.0068,
      "num_tokens": 423832170.0,
      "reward": 0.5535714626312256,
      "reward_std": 0.2062576562166214,
      "rewards/verify_math_reward/mean": 0.5535714030265808,
      "rewards/verify_math_reward/std": 0.4973994791507721,
      "step": 729
    },
    {
      "clip_ratio/high_max": 0.0014903579613019247,
      "clip_ratio/high_mean": 0.000479897981222166,
      "clip_ratio/low_mean": 0.000424975715304754,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009048736919794464,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3378.0,
      "completions/mean_length": 605.8236694335938,
      "completions/mean_terminated_length": 554.4393920898438,
      "completions/min_length": 130.0,
      "completions/min_terminated_length": 130.0,
      "epoch": 6.821230679498396,
      "grad_norm": 0.142578125,
      "learning_rate": 1e-06,
      "loss": -0.0108,
      "num_tokens": 424398276.0,
      "reward": 0.5011160969734192,
      "reward_std": 0.22436249256134033,
      "rewards/verify_math_reward/mean": 0.5011160969734192,
      "rewards/verify_math_reward/std": 0.5002779960632324,
      "step": 730
    },
    {
      "clip_ratio/high_max": 0.001639176145545207,
      "clip_ratio/high_mean": 0.0004777425956490333,
      "clip_ratio/low_mean": 0.00033155464973333437,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008092972320810077,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3994.0,
      "completions/mean_length": 620.1629638671875,
      "completions/mean_terminated_length": 572.9796752929688,
      "completions/min_length": 129.0,
      "completions/min_terminated_length": 129.0,
      "epoch": 6.830562846310878,
      "grad_norm": 0.1298828125,
      "learning_rate": 1e-06,
      "loss": -0.0008,
      "num_tokens": 424993590.0,
      "reward": 0.543526828289032,
      "reward_std": 0.19343462586402893,
      "rewards/verify_math_reward/mean": 0.5435267686843872,
      "rewards/verify_math_reward/std": 0.49838000535964966,
      "step": 731
    },
    {
      "clip_ratio/high_max": 0.0014302250756372814,
      "clip_ratio/high_mean": 0.0003572983173398825,
      "clip_ratio/low_mean": 0.000372949505390352,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007302478170458926,
      "completions/clipped_ratio": 0.024553571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3233.0,
      "completions/mean_length": 681.872802734375,
      "completions/mean_terminated_length": 595.93359375,
      "completions/min_length": 85.0,
      "completions/min_terminated_length": 85.0,
      "epoch": 6.83989501312336,
      "grad_norm": 0.1318359375,
      "learning_rate": 1e-06,
      "loss": -0.0098,
      "num_tokens": 425605020.0,
      "reward": 0.5345982313156128,
      "reward_std": 0.2198163866996765,
      "rewards/verify_math_reward/mean": 0.5345982313156128,
      "rewards/verify_math_reward/std": 0.4990801215171814,
      "step": 732
    },
    {
      "clip_ratio/high_max": 0.0015339939745899756,
      "clip_ratio/high_mean": 0.00047710767648823094,
      "clip_ratio/low_mean": 0.00034324261071105866,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008203502979995392,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3768.0,
      "completions/mean_length": 598.1473388671875,
      "completions/mean_terminated_length": 562.6561279296875,
      "completions/min_length": 95.0,
      "completions/min_terminated_length": 95.0,
      "epoch": 6.849227179935841,
      "grad_norm": 0.1318359375,
      "learning_rate": 1e-06,
      "loss": 0.0049,
      "num_tokens": 426205184.0,
      "reward": 0.5424107313156128,
      "reward_std": 0.2335616648197174,
      "rewards/verify_math_reward/mean": 0.5424107313156128,
      "rewards/verify_math_reward/std": 0.4984763562679291,
      "step": 733
    },
    {
      "clip_ratio/high_max": 0.0014596762493965798,
      "clip_ratio/high_mean": 0.00039061952406882483,
      "clip_ratio/low_mean": 0.0002955498315486693,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006861693489099707,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4042.0,
      "completions/mean_length": 620.6942138671875,
      "completions/mean_terminated_length": 553.481201171875,
      "completions/min_length": 118.0,
      "completions/min_terminated_length": 118.0,
      "epoch": 6.858559346748323,
      "grad_norm": 0.11767578125,
      "learning_rate": 1e-06,
      "loss": 0.0023,
      "num_tokens": 426780406.0,
      "reward": 0.5546875,
      "reward_std": 0.17281359434127808,
      "rewards/verify_math_reward/mean": 0.5546875,
      "rewards/verify_math_reward/std": 0.4972778558731079,
      "step": 734
    },
    {
      "clip_ratio/high_max": 0.0016730620045564137,
      "clip_ratio/high_mean": 0.00047316162499555503,
      "clip_ratio/low_mean": 0.0003248614663107219,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007980230843713798,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4043.0,
      "completions/mean_length": 626.6808471679688,
      "completions/mean_terminated_length": 571.6122436523438,
      "completions/min_length": 81.0,
      "completions/min_terminated_length": 81.0,
      "epoch": 6.867891513560805,
      "grad_norm": 0.1259765625,
      "learning_rate": 1e-06,
      "loss": 0.0181,
      "num_tokens": 427384352.0,
      "reward": 0.5546875,
      "reward_std": 0.20756922662258148,
      "rewards/verify_math_reward/mean": 0.5546875,
      "rewards/verify_math_reward/std": 0.4972778558731079,
      "step": 735
    },
    {
      "clip_ratio/high_max": 0.0015789661929375143,
      "clip_ratio/high_mean": 0.0004630958856068901,
      "clip_ratio/low_mean": 0.0003535834071044519,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000816679294075584,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3800.0,
      "completions/mean_length": 590.3381958007812,
      "completions/mean_terminated_length": 554.7677001953125,
      "completions/min_length": 116.0,
      "completions/min_terminated_length": 116.0,
      "epoch": 6.8772236803732865,
      "grad_norm": 0.130859375,
      "learning_rate": 1e-06,
      "loss": 0.0135,
      "num_tokens": 427976567.0,
      "reward": 0.5870535969734192,
      "reward_std": 0.20496748387813568,
      "rewards/verify_math_reward/mean": 0.5870535969734192,
      "rewards/verify_math_reward/std": 0.49263834953308105,
      "step": 736
    },
    {
      "clip_ratio/high_max": 0.0017882666834339034,
      "clip_ratio/high_mean": 0.0005872034359981626,
      "clip_ratio/low_mean": 0.0003146516877450267,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009018551154440502,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3476.0,
      "completions/mean_length": 584.255615234375,
      "completions/mean_terminated_length": 536.5848388671875,
      "completions/min_length": 78.0,
      "completions/min_terminated_length": 78.0,
      "epoch": 6.886555847185768,
      "grad_norm": 0.134765625,
      "learning_rate": 1e-06,
      "loss": 0.0059,
      "num_tokens": 428549924.0,
      "reward": 0.5658482313156128,
      "reward_std": 0.2166614681482315,
      "rewards/verify_math_reward/mean": 0.5658482313156128,
      "rewards/verify_math_reward/std": 0.49592188000679016,
      "step": 737
    },
    {
      "clip_ratio/high_max": 0.0013999576303831418,
      "clip_ratio/high_mean": 0.0004432034563706111,
      "clip_ratio/low_mean": 0.0003115712222552247,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007547746636191732,
      "completions/clipped_ratio": 0.0200892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3706.0,
      "completions/mean_length": 664.646240234375,
      "completions/mean_terminated_length": 594.299560546875,
      "completions/min_length": 161.0,
      "completions/min_terminated_length": 161.0,
      "epoch": 6.89588801399825,
      "grad_norm": 0.12451171875,
      "learning_rate": 1e-06,
      "loss": 0.0138,
      "num_tokens": 429165791.0,
      "reward": 0.5189732313156128,
      "reward_std": 0.20644131302833557,
      "rewards/verify_math_reward/mean": 0.5189732313156128,
      "rewards/verify_math_reward/std": 0.49991893768310547,
      "step": 738
    },
    {
      "clip_ratio/high_max": 0.001364655442557705,
      "clip_ratio/high_mean": 0.00037603266787300527,
      "clip_ratio/low_mean": 0.0002650274038842326,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006410600663002697,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3938.0,
      "completions/mean_length": 599.0748291015625,
      "completions/mean_terminated_length": 567.5709838867188,
      "completions/min_length": 41.0,
      "completions/min_terminated_length": 41.0,
      "epoch": 6.905220180810732,
      "grad_norm": 0.1259765625,
      "learning_rate": 1e-06,
      "loss": 0.0014,
      "num_tokens": 429750978.0,
      "reward": 0.5725446939468384,
      "reward_std": 0.16927777230739594,
      "rewards/verify_math_reward/mean": 0.5725446343421936,
      "rewards/verify_math_reward/std": 0.49498558044433594,
      "step": 739
    },
    {
      "clip_ratio/high_max": 0.0013580989325419068,
      "clip_ratio/high_mean": 0.0003983628373589454,
      "clip_ratio/low_mean": 0.00028498169774593407,
      "clip_ratio/low_min": 6.322071840259014e-06,
      "clip_ratio/region_mean": 0.0006833445477241185,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3680.0,
      "completions/mean_length": 643.421875,
      "completions/mean_terminated_length": 576.6484375,
      "completions/min_length": 25.0,
      "completions/min_terminated_length": 25.0,
      "epoch": 6.914552347623214,
      "grad_norm": 0.1240234375,
      "learning_rate": 1e-06,
      "loss": 0.0061,
      "num_tokens": 430352252.0,
      "reward": 0.5334821939468384,
      "reward_std": 0.18611155450344086,
      "rewards/verify_math_reward/mean": 0.5334821343421936,
      "rewards/verify_math_reward/std": 0.49915632605552673,
      "step": 740
    },
    {
      "clip_ratio/high_max": 0.0017543586300234892,
      "clip_ratio/high_mean": 0.0005219036647758912,
      "clip_ratio/low_mean": 0.000338342642862699,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008602463058196008,
      "completions/clipped_ratio": 0.004464285714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3953.0,
      "completions/mean_length": 546.8951416015625,
      "completions/mean_terminated_length": 530.9798583984375,
      "completions/min_length": 103.0,
      "completions/min_terminated_length": 103.0,
      "epoch": 6.923884514435695,
      "grad_norm": 0.1435546875,
      "learning_rate": 1e-06,
      "loss": 0.0093,
      "num_tokens": 430906374.0,
      "reward": 0.590401828289032,
      "reward_std": 0.22608637809753418,
      "rewards/verify_math_reward/mean": 0.5904017686843872,
      "rewards/verify_math_reward/std": 0.49203425645828247,
      "step": 741
    },
    {
      "clip_ratio/high_max": 0.0015941227948133019,
      "clip_ratio/high_mean": 0.0004207086522001191,
      "clip_ratio/low_mean": 0.0003866800755076838,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008073887197497243,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4079.0,
      "completions/mean_length": 597.2689819335938,
      "completions/mean_terminated_length": 541.7335815429688,
      "completions/min_length": 101.0,
      "completions/min_terminated_length": 101.0,
      "epoch": 6.933216681248178,
      "grad_norm": 0.1298828125,
      "learning_rate": 1e-06,
      "loss": 0.004,
      "num_tokens": 431468687.0,
      "reward": 0.5613839626312256,
      "reward_std": 0.1962568461894989,
      "rewards/verify_math_reward/mean": 0.5613839030265808,
      "rewards/verify_math_reward/std": 0.496494859457016,
      "step": 742
    },
    {
      "clip_ratio/high_max": 0.0014094443440626492,
      "clip_ratio/high_mean": 0.00040140482110473386,
      "clip_ratio/low_mean": 0.0002468744604584572,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006482792828137462,
      "completions/clipped_ratio": 0.004464285714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2361.0,
      "completions/mean_length": 540.9788208007812,
      "completions/mean_terminated_length": 525.0370483398438,
      "completions/min_length": 148.0,
      "completions/min_terminated_length": 148.0,
      "epoch": 6.942548848060659,
      "grad_norm": 0.10986328125,
      "learning_rate": 1e-06,
      "loss": 0.0016,
      "num_tokens": 432031988.0,
      "reward": 0.5758928656578064,
      "reward_std": 0.16833347082138062,
      "rewards/verify_math_reward/mean": 0.5758928656578064,
      "rewards/verify_math_reward/std": 0.49448272585868835,
      "step": 743
    },
    {
      "clip_ratio/high_max": 0.0016836990744195646,
      "clip_ratio/high_mean": 0.000504533958974207,
      "clip_ratio/low_mean": 0.0003987863572092465,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009033203023136593,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3824.0,
      "completions/mean_length": 659.8370971679688,
      "completions/mean_terminated_length": 581.3858032226562,
      "completions/min_length": 128.0,
      "completions/min_terminated_length": 128.0,
      "epoch": 6.951881014873141,
      "grad_norm": 0.1298828125,
      "learning_rate": 1e-06,
      "loss": 0.0162,
      "num_tokens": 432627586.0,
      "reward": 0.4988839626312256,
      "reward_std": 0.2198163866996765,
      "rewards/verify_math_reward/mean": 0.4988839328289032,
      "rewards/verify_math_reward/std": 0.5002779960632324,
      "step": 744
    },
    {
      "clip_ratio/high_max": 0.0018406853705528192,
      "clip_ratio/high_mean": 0.0005424522314569913,
      "clip_ratio/low_mean": 0.00038954928982093406,
      "clip_ratio/low_min": 7.519249265897088e-06,
      "clip_ratio/region_mean": 0.0009320015251432778,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3735.0,
      "completions/mean_length": 635.1417846679688,
      "completions/mean_terminated_length": 576.216796875,
      "completions/min_length": 103.0,
      "completions/min_terminated_length": 103.0,
      "epoch": 6.961213181685623,
      "grad_norm": 0.1240234375,
      "learning_rate": 1e-06,
      "loss": -0.0021,
      "num_tokens": 433225793.0,
      "reward": 0.5412946939468384,
      "reward_std": 0.21830865740776062,
      "rewards/verify_math_reward/mean": 0.5412946343421936,
      "rewards/verify_math_reward/std": 0.49857014417648315,
      "step": 745
    },
    {
      "clip_ratio/high_max": 0.0019208669509680476,
      "clip_ratio/high_mean": 0.000601478594035143,
      "clip_ratio/low_mean": 0.0003656832822116485,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009671619141045085,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3292.0,
      "completions/mean_length": 602.5234375,
      "completions/mean_terminated_length": 551.090576171875,
      "completions/min_length": 120.0,
      "completions/min_terminated_length": 120.0,
      "epoch": 6.970545348498105,
      "grad_norm": 0.1337890625,
      "learning_rate": 1e-06,
      "loss": -0.0047,
      "num_tokens": 433803326.0,
      "reward": 0.543526828289032,
      "reward_std": 0.23976704478263855,
      "rewards/verify_math_reward/mean": 0.5435267686843872,
      "rewards/verify_math_reward/std": 0.49838000535964966,
      "step": 746
    },
    {
      "clip_ratio/high_max": 0.0015970139256751281,
      "clip_ratio/high_mean": 0.00045741989526959514,
      "clip_ratio/low_mean": 0.0003967752943481173,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008541951947336202,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2383.0,
      "completions/mean_length": 594.7545166015625,
      "completions/mean_terminated_length": 547.2262573242188,
      "completions/min_length": 21.0,
      "completions/min_terminated_length": 21.0,
      "epoch": 6.979877515310586,
      "grad_norm": 0.125,
      "learning_rate": 1e-06,
      "loss": -0.0001,
      "num_tokens": 434373586.0,
      "reward": 0.5725446939468384,
      "reward_std": 0.20132221281528473,
      "rewards/verify_math_reward/mean": 0.5725446343421936,
      "rewards/verify_math_reward/std": 0.49498558044433594,
      "step": 747
    },
    {
      "clip_ratio/high_max": 0.0018936878550448455,
      "clip_ratio/high_mean": 0.0005422328524673503,
      "clip_ratio/low_mean": 0.0002730562445663054,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008152890968631255,
      "completions/clipped_ratio": 0.021205357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3853.0,
      "completions/mean_length": 613.2913208007812,
      "completions/mean_terminated_length": 537.8392333984375,
      "completions/min_length": 89.0,
      "completions/min_terminated_length": 89.0,
      "epoch": 6.989209682123068,
      "grad_norm": 0.1240234375,
      "learning_rate": 1e-06,
      "loss": 0.0002,
      "num_tokens": 434925647.0,
      "reward": 0.5959821939468384,
      "reward_std": 0.19535604119300842,
      "rewards/verify_math_reward/mean": 0.5959821343421936,
      "rewards/verify_math_reward/std": 0.490975022315979,
      "step": 748
    },
    {
      "clip_ratio/high_max": 0.0013596016688097734,
      "clip_ratio/high_mean": 0.00037905835620222206,
      "clip_ratio/low_mean": 0.00038117095652978605,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000760229314892058,
      "completions/clipped_ratio": 0.005681818181818232,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4028.0,
      "completions/mean_length": 575.4716186523438,
      "completions/mean_terminated_length": 555.3543090820312,
      "completions/min_length": 113.0,
      "completions/min_terminated_length": 113.0,
      "epoch": 6.99854184893555,
      "grad_norm": 0.134765625,
      "learning_rate": 1e-06,
      "loss": 0.0004,
      "num_tokens": 435517554.0,
      "reward": 0.5111607313156128,
      "reward_std": 0.19948776066303253,
      "rewards/verify_math_reward/mean": 0.5111607313156128,
      "rewards/verify_math_reward/std": 0.5001546144485474,
      "step": 749
    },
    {
      "clip_ratio/high_max": 0.0014337654092742014,
      "clip_ratio/high_mean": 0.00040704050911699596,
      "clip_ratio/low_mean": 0.0005006092478652135,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009076497672140249,
      "completions/clipped_ratio": 0.0078125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2166.0,
      "completions/mean_length": 578.4486694335938,
      "completions/mean_terminated_length": 550.7514038085938,
      "completions/min_length": 126.0,
      "completions/min_terminated_length": 126.0,
      "epoch": 7.009332166812482,
      "grad_norm": 0.1357421875,
      "learning_rate": 1e-06,
      "loss": 0.0055,
      "num_tokens": 436103060.0,
      "reward": 0.5412946939468384,
      "reward_std": 0.2098594754934311,
      "rewards/verify_math_reward/mean": 0.5412946343421936,
      "rewards/verify_math_reward/std": 0.49857014417648315,
      "step": 750
    },
    {
      "clip_ratio/high_max": 0.0016529791428183671,
      "clip_ratio/high_mean": 0.0004881096972439991,
      "clip_ratio/low_mean": 0.0002503577248944566,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007384674263448687,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3644.0,
      "completions/mean_length": 584.1328125,
      "completions/mean_terminated_length": 540.4824829101562,
      "completions/min_length": 62.0,
      "completions/min_terminated_length": 62.0,
      "epoch": 7.0186643336249634,
      "grad_norm": 0.12890625,
      "learning_rate": 1e-06,
      "loss": 0.0108,
      "num_tokens": 436672571.0,
      "reward": 0.590401828289032,
      "reward_std": 0.20238234102725983,
      "rewards/verify_math_reward/mean": 0.5904017686843872,
      "rewards/verify_math_reward/std": 0.49203425645828247,
      "step": 751
    },
    {
      "clip_ratio/high_max": 0.0016284942066704389,
      "clip_ratio/high_mean": 0.00045171342856065166,
      "clip_ratio/low_mean": 0.00042511201991146663,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008768254574533785,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3379.0,
      "completions/mean_length": 607.5379638671875,
      "completions/mean_terminated_length": 556.1788940429688,
      "completions/min_length": 128.0,
      "completions/min_terminated_length": 128.0,
      "epoch": 7.027996500437445,
      "grad_norm": 0.1357421875,
      "learning_rate": 1e-06,
      "loss": -0.0058,
      "num_tokens": 437247989.0,
      "reward": 0.5546875,
      "reward_std": 0.22462210059165955,
      "rewards/verify_math_reward/mean": 0.5546875,
      "rewards/verify_math_reward/std": 0.4972778558731079,
      "step": 752
    },
    {
      "clip_ratio/high_max": 0.0018130133648810443,
      "clip_ratio/high_mean": 0.0005223414323154429,
      "clip_ratio/low_mean": 0.0003100730389178352,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008324144719153992,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3473.0,
      "completions/mean_length": 620.0045166015625,
      "completions/mean_terminated_length": 588.689208984375,
      "completions/min_length": 126.0,
      "completions/min_terminated_length": 126.0,
      "epoch": 7.037328667249927,
      "grad_norm": 0.126953125,
      "learning_rate": 1e-06,
      "loss": -0.0031,
      "num_tokens": 437856401.0,
      "reward": 0.5502232313156128,
      "reward_std": 0.21970760822296143,
      "rewards/verify_math_reward/mean": 0.5502232313156128,
      "rewards/verify_math_reward/std": 0.49774909019470215,
      "step": 753
    },
    {
      "clip_ratio/high_max": 0.0014793316986470018,
      "clip_ratio/high_mean": 0.00046869984316799673,
      "clip_ratio/low_mean": 0.0003304205778249525,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007991204211066361,
      "completions/clipped_ratio": 0.0234375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2394.0,
      "completions/mean_length": 624.075927734375,
      "completions/mean_terminated_length": 540.7496948242188,
      "completions/min_length": 127.0,
      "completions/min_terminated_length": 127.0,
      "epoch": 7.046660834062409,
      "grad_norm": 0.134765625,
      "learning_rate": 1e-06,
      "loss": -0.0113,
      "num_tokens": 438416461.0,
      "reward": 0.5691964626312256,
      "reward_std": 0.19422808289527893,
      "rewards/verify_math_reward/mean": 0.5691964030265808,
      "rewards/verify_math_reward/std": 0.4954652488231659,
      "step": 754
    },
    {
      "clip_ratio/high_max": 0.0014441558842008817,
      "clip_ratio/high_mean": 0.0004057947363662606,
      "clip_ratio/low_mean": 0.00032311120219219447,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007289059467439074,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2099.0,
      "completions/mean_length": 617.875,
      "completions/mean_terminated_length": 570.66064453125,
      "completions/min_length": 11.0,
      "completions/min_terminated_length": 11.0,
      "epoch": 7.05599300087489,
      "grad_norm": 0.126953125,
      "learning_rate": 1e-06,
      "loss": -0.0098,
      "num_tokens": 439021605.0,
      "reward": 0.546875,
      "reward_std": 0.2086530178785324,
      "rewards/verify_math_reward/mean": 0.546875,
      "rewards/verify_math_reward/std": 0.4980759024620056,
      "step": 755
    },
    {
      "clip_ratio/high_max": 0.001188702220133564,
      "clip_ratio/high_mean": 0.0003310770450752898,
      "clip_ratio/low_mean": 0.00032470773794557317,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006557847978001519,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4087.0,
      "completions/mean_length": 646.9676513671875,
      "completions/mean_terminated_length": 584.2579345703125,
      "completions/min_length": 151.0,
      "completions/min_terminated_length": 151.0,
      "epoch": 7.065325167687372,
      "grad_norm": 0.123046875,
      "learning_rate": 1e-06,
      "loss": 0.0001,
      "num_tokens": 439624824.0,
      "reward": 0.5625,
      "reward_std": 0.19361938536167145,
      "rewards/verify_math_reward/mean": 0.5625,
      "rewards/verify_math_reward/std": 0.49635544419288635,
      "step": 756
    },
    {
      "clip_ratio/high_max": 0.0017265345031773904,
      "clip_ratio/high_mean": 0.0005095569831610192,
      "clip_ratio/low_mean": 0.0002730259932377521,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007825829634384718,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3326.0,
      "completions/mean_length": 564.208740234375,
      "completions/mean_terminated_length": 532.3908081054688,
      "completions/min_length": 134.0,
      "completions/min_terminated_length": 134.0,
      "epoch": 7.074657334499854,
      "grad_norm": 0.12060546875,
      "learning_rate": 1e-06,
      "loss": 0.0015,
      "num_tokens": 440184827.0,
      "reward": 0.5915178656578064,
      "reward_std": 0.18960639834403992,
      "rewards/verify_math_reward/mean": 0.5915178656578064,
      "rewards/verify_math_reward/std": 0.49182769656181335,
      "step": 757
    },
    {
      "clip_ratio/high_max": 0.001866878319560783,
      "clip_ratio/high_mean": 0.0005475571178976679,
      "clip_ratio/low_mean": 0.0004235813817103917,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009711385109767434,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3874.0,
      "completions/mean_length": 613.9129638671875,
      "completions/mean_terminated_length": 550.6022338867188,
      "completions/min_length": 76.0,
      "completions/min_terminated_length": 76.0,
      "epoch": 7.083989501312336,
      "grad_norm": 0.1484375,
      "learning_rate": 1e-06,
      "loss": -0.006,
      "num_tokens": 440757349.0,
      "reward": 0.5334821939468384,
      "reward_std": 0.2717750668525696,
      "rewards/verify_math_reward/mean": 0.5334821343421936,
      "rewards/verify_math_reward/std": 0.49915632605552673,
      "step": 758
    },
    {
      "clip_ratio/high_max": 0.0018774439668050036,
      "clip_ratio/high_mean": 0.0006037406417362945,
      "clip_ratio/low_mean": 0.00038811334184174484,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009918539881255128,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3606.0,
      "completions/mean_length": 592.1763916015625,
      "completions/mean_terminated_length": 532.5198974609375,
      "completions/min_length": 93.0,
      "completions/min_terminated_length": 93.0,
      "epoch": 7.093321668124818,
      "grad_norm": 0.1376953125,
      "learning_rate": 1e-06,
      "loss": -0.0027,
      "num_tokens": 441318699.0,
      "reward": 0.5323660969734192,
      "reward_std": 0.22300560772418976,
      "rewards/verify_math_reward/mean": 0.5323660969734192,
      "rewards/verify_math_reward/std": 0.4992299973964691,
      "step": 759
    },
    {
      "clip_ratio/high_max": 0.0019466903140710201,
      "clip_ratio/high_mean": 0.0005515515213119215,
      "clip_ratio/low_mean": 0.0003898823424606235,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009414338574060821,
      "completions/clipped_ratio": 0.0078125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3343.0,
      "completions/mean_length": 599.1027221679688,
      "completions/mean_terminated_length": 571.5680541992188,
      "completions/min_length": 125.0,
      "completions/min_terminated_length": 125.0,
      "epoch": 7.1026538349373,
      "grad_norm": 0.130859375,
      "learning_rate": 1e-06,
      "loss": -0.004,
      "num_tokens": 441918743.0,
      "reward": 0.5491071939468384,
      "reward_std": 0.20474882423877716,
      "rewards/verify_math_reward/mean": 0.5491071343421936,
      "rewards/verify_math_reward/std": 0.49786055088043213,
      "step": 760
    },
    {
      "clip_ratio/high_max": 0.0017717557166179176,
      "clip_ratio/high_mean": 0.000545461072647413,
      "clip_ratio/low_mean": 0.000388486207157257,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000933947267185431,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2396.0,
      "completions/mean_length": 605.9765625,
      "completions/mean_terminated_length": 542.5215454101562,
      "completions/min_length": 129.0,
      "completions/min_terminated_length": 129.0,
      "epoch": 7.111986001749782,
      "grad_norm": 0.1328125,
      "learning_rate": 1e-06,
      "loss": 0.0163,
      "num_tokens": 442478890.0,
      "reward": 0.5636160969734192,
      "reward_std": 0.23124006390571594,
      "rewards/verify_math_reward/mean": 0.5636160969734192,
      "rewards/verify_math_reward/std": 0.49621346592903137,
      "step": 761
    },
    {
      "clip_ratio/high_max": 0.0013604059067802154,
      "clip_ratio/high_mean": 0.0004352437889565408,
      "clip_ratio/low_mean": 0.0003454785612575506,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000780722353283636,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3919.0,
      "completions/mean_length": 667.7265625,
      "completions/mean_terminated_length": 601.4232177734375,
      "completions/min_length": 131.0,
      "completions/min_terminated_length": 131.0,
      "epoch": 7.121318168562263,
      "grad_norm": 0.11865234375,
      "learning_rate": 1e-06,
      "loss": 0.0092,
      "num_tokens": 443097885.0,
      "reward": 0.5345982313156128,
      "reward_std": 0.19700251519680023,
      "rewards/verify_math_reward/mean": 0.5345982313156128,
      "rewards/verify_math_reward/std": 0.4990801215171814,
      "step": 762
    },
    {
      "clip_ratio/high_max": 0.0014831846856395714,
      "clip_ratio/high_mean": 0.0003804415205195255,
      "clip_ratio/low_mean": 0.0004031428738926479,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007835843871362158,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3097.0,
      "completions/mean_length": 609.3080444335938,
      "completions/mean_terminated_length": 545.9136352539062,
      "completions/min_length": 125.0,
      "completions/min_terminated_length": 125.0,
      "epoch": 7.130650335374745,
      "grad_norm": 0.138671875,
      "learning_rate": 1e-06,
      "loss": 0.0103,
      "num_tokens": 443673041.0,
      "reward": 0.504464328289032,
      "reward_std": 0.22262795269489288,
      "rewards/verify_math_reward/mean": 0.5044642686843872,
      "rewards/verify_math_reward/std": 0.5002593398094177,
      "step": 763
    },
    {
      "clip_ratio/high_max": 0.001707993393210927,
      "clip_ratio/high_mean": 0.00048011166109063197,
      "clip_ratio/low_mean": 0.00033218250530353544,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008122941662804806,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3149.0,
      "completions/mean_length": 603.8671875,
      "completions/mean_terminated_length": 560.462158203125,
      "completions/min_length": 111.0,
      "completions/min_terminated_length": 111.0,
      "epoch": 7.139982502187227,
      "grad_norm": 0.1220703125,
      "learning_rate": 1e-06,
      "loss": 0.0016,
      "num_tokens": 444249250.0,
      "reward": 0.578125,
      "reward_std": 0.1947498470544815,
      "rewards/verify_math_reward/mean": 0.578125,
      "rewards/verify_math_reward/std": 0.4941346049308777,
      "step": 764
    },
    {
      "clip_ratio/high_max": 0.0014983212004153756,
      "clip_ratio/high_mean": 0.00043512692445801804,
      "clip_ratio/low_mean": 0.000351197007034898,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000786323927968624,
      "completions/clipped_ratio": 0.0200892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3946.0,
      "completions/mean_length": 653.546875,
      "completions/mean_terminated_length": 582.9727172851562,
      "completions/min_length": 125.0,
      "completions/min_terminated_length": 125.0,
      "epoch": 7.1493146689997085,
      "grad_norm": 0.126953125,
      "learning_rate": 1e-06,
      "loss": 0.0108,
      "num_tokens": 444860572.0,
      "reward": 0.527901828289032,
      "reward_std": 0.2144797444343567,
      "rewards/verify_math_reward/mean": 0.5279017686843872,
      "rewards/verify_math_reward/std": 0.49949970841407776,
      "step": 765
    },
    {
      "clip_ratio/high_max": 0.0017015719322444056,
      "clip_ratio/high_mean": 0.0005034475834690966,
      "clip_ratio/low_mean": 0.00027002869319403544,
      "clip_ratio/low_min": 1.3130252227711026e-05,
      "clip_ratio/region_mean": 0.0007734762771178794,
      "completions/clipped_ratio": 0.024553571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2724.0,
      "completions/mean_length": 632.859375,
      "completions/mean_terminated_length": 545.6864624023438,
      "completions/min_length": 124.0,
      "completions/min_terminated_length": 124.0,
      "epoch": 7.15864683581219,
      "grad_norm": 0.1298828125,
      "learning_rate": 1e-06,
      "loss": 0.0029,
      "num_tokens": 445420454.0,
      "reward": 0.574776828289032,
      "reward_std": 0.20835062861442566,
      "rewards/verify_math_reward/mean": 0.5747767686843872,
      "rewards/verify_math_reward/std": 0.49465295672416687,
      "step": 766
    },
    {
      "clip_ratio/high_max": 0.0014358019634528318,
      "clip_ratio/high_mean": 0.00041219516538149037,
      "clip_ratio/low_mean": 0.00035338151928954176,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007655766912648687,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3487.0,
      "completions/mean_length": 632.9006958007812,
      "completions/mean_terminated_length": 581.9150390625,
      "completions/min_length": 156.0,
      "completions/min_terminated_length": 156.0,
      "epoch": 7.167979002624672,
      "grad_norm": 0.1318359375,
      "learning_rate": 1e-06,
      "loss": 0.0155,
      "num_tokens": 446027669.0,
      "reward": 0.4921875298023224,
      "reward_std": 0.21943660080432892,
      "rewards/verify_math_reward/mean": 0.4921875,
      "rewards/verify_math_reward/std": 0.5002182126045227,
      "step": 767
    },
    {
      "clip_ratio/high_max": 0.0017051438917405903,
      "clip_ratio/high_mean": 0.0005269981031688076,
      "clip_ratio/low_mean": 0.0003154945430878797,
      "clip_ratio/low_min": 1.4070238648855593e-05,
      "clip_ratio/region_mean": 0.000842492652736837,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2211.0,
      "completions/mean_length": 593.2332763671875,
      "completions/mean_terminated_length": 549.696044921875,
      "completions/min_length": 4.0,
      "completions/min_terminated_length": 4.0,
      "epoch": 7.177311169437154,
      "grad_norm": 0.1337890625,
      "learning_rate": 1e-06,
      "loss": -0.0006,
      "num_tokens": 446600230.0,
      "reward": 0.5267857313156128,
      "reward_std": 0.2086516171693802,
      "rewards/verify_math_reward/mean": 0.5267857313156128,
      "rewards/verify_math_reward/std": 0.4995608627796173,
      "step": 768
    },
    {
      "clip_ratio/high_max": 0.0013604813557321904,
      "clip_ratio/high_mean": 0.00041439705387347203,
      "clip_ratio/low_mean": 0.0003031894350442599,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007175864857345005,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2965.0,
      "completions/mean_length": 597.5982666015625,
      "completions/mean_terminated_length": 550.108642578125,
      "completions/min_length": 153.0,
      "completions/min_terminated_length": 153.0,
      "epoch": 7.186643336249635,
      "grad_norm": 0.123046875,
      "learning_rate": 1e-06,
      "loss": 0.0081,
      "num_tokens": 447174566.0,
      "reward": 0.5736607313156128,
      "reward_std": 0.21301396191120148,
      "rewards/verify_math_reward/mean": 0.5736607313156128,
      "rewards/verify_math_reward/std": 0.4948205351829529,
      "step": 769
    },
    {
      "clip_ratio/high_max": 0.0016332325531038805,
      "clip_ratio/high_mean": 0.000469606401566125,
      "clip_ratio/low_mean": 0.00035437491192169546,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008239813132604468,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3869.0,
      "completions/mean_length": 621.4207763671875,
      "completions/mean_terminated_length": 562.26220703125,
      "completions/min_length": 111.0,
      "completions/min_terminated_length": 111.0,
      "epoch": 7.195975503062117,
      "grad_norm": 0.1298828125,
      "learning_rate": 1e-06,
      "loss": 0.0138,
      "num_tokens": 447762439.0,
      "reward": 0.5714285969734192,
      "reward_std": 0.1998663991689682,
      "rewards/verify_math_reward/mean": 0.5714285969734192,
      "rewards/verify_math_reward/std": 0.49514803290367126,
      "step": 770
    },
    {
      "clip_ratio/high_max": 0.0015349312843682128,
      "clip_ratio/high_mean": 0.0004354138745839009,
      "clip_ratio/low_mean": 0.00032673705868546676,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007621509257660364,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4047.0,
      "completions/mean_length": 625.669677734375,
      "completions/mean_terminated_length": 586.5011596679688,
      "completions/min_length": 82.0,
      "completions/min_terminated_length": 82.0,
      "epoch": 7.205307669874599,
      "grad_norm": 0.1142578125,
      "learning_rate": 1e-06,
      "loss": 0.0007,
      "num_tokens": 448367511.0,
      "reward": 0.5111607313156128,
      "reward_std": 0.20842549204826355,
      "rewards/verify_math_reward/mean": 0.5111607313156128,
      "rewards/verify_math_reward/std": 0.5001546144485474,
      "step": 771
    },
    {
      "clip_ratio/high_max": 0.0017207709597641951,
      "clip_ratio/high_mean": 0.0005706156265432583,
      "clip_ratio/low_mean": 0.00045667639528801374,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010272920326315216,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3449.0,
      "completions/mean_length": 612.0145263671875,
      "completions/mean_terminated_length": 576.6640014648438,
      "completions/min_length": 113.0,
      "completions/min_terminated_length": 113.0,
      "epoch": 7.2146398366870805,
      "grad_norm": 0.12890625,
      "learning_rate": 1e-06,
      "loss": 0.0129,
      "num_tokens": 448972404.0,
      "reward": 0.5837053656578064,
      "reward_std": 0.21751701831817627,
      "rewards/verify_math_reward/mean": 0.5837053656578064,
      "rewards/verify_math_reward/std": 0.49321892857551575,
      "step": 772
    },
    {
      "clip_ratio/high_max": 0.0015187498365776264,
      "clip_ratio/high_mean": 0.00037981771674822085,
      "clip_ratio/low_mean": 0.0003381844456953331,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000718002160283504,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2319.0,
      "completions/mean_length": 558.6529541015625,
      "completions/mean_terminated_length": 510.6346435546875,
      "completions/min_length": 129.0,
      "completions/min_terminated_length": 129.0,
      "epoch": 7.223972003499562,
      "grad_norm": 0.1337890625,
      "learning_rate": 1e-06,
      "loss": 0.0023,
      "num_tokens": 449518685.0,
      "reward": 0.5323660969734192,
      "reward_std": 0.1911466419696808,
      "rewards/verify_math_reward/mean": 0.5323660969734192,
      "rewards/verify_math_reward/std": 0.4992299973964691,
      "step": 773
    },
    {
      "clip_ratio/high_max": 0.0015405614449264249,
      "clip_ratio/high_mean": 0.00047592004079888284,
      "clip_ratio/low_mean": 0.00040787546913634287,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008837955238050199,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3259.0,
      "completions/mean_length": 583.0491333007812,
      "completions/mean_terminated_length": 539.3853149414062,
      "completions/min_length": 123.0,
      "completions/min_terminated_length": 123.0,
      "epoch": 7.233304170312044,
      "grad_norm": 0.142578125,
      "learning_rate": 1e-06,
      "loss": -0.0006,
      "num_tokens": 450087857.0,
      "reward": 0.5256696939468384,
      "reward_std": 0.2395801991224289,
      "rewards/verify_math_reward/mean": 0.5256696343421936,
      "rewards/verify_math_reward/std": 0.4996195137500763,
      "step": 774
    },
    {
      "clip_ratio/high_max": 0.001768720663676504,
      "clip_ratio/high_mean": 0.0005720045446651056,
      "clip_ratio/low_mean": 0.00032422032188605954,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000896224875759799,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3860.0,
      "completions/mean_length": 672.036865234375,
      "completions/mean_terminated_length": 613.7401123046875,
      "completions/min_length": 164.0,
      "completions/min_terminated_length": 164.0,
      "epoch": 7.242636337124526,
      "grad_norm": 0.1259765625,
      "learning_rate": 1e-06,
      "loss": 0.0145,
      "num_tokens": 450719850.0,
      "reward": 0.5758928656578064,
      "reward_std": 0.22199852764606476,
      "rewards/verify_math_reward/mean": 0.5758928656578064,
      "rewards/verify_math_reward/std": 0.49448275566101074,
      "step": 775
    },
    {
      "clip_ratio/high_max": 0.0014075522412895225,
      "clip_ratio/high_mean": 0.0003883136796503095,
      "clip_ratio/low_mean": 0.00035391475341839396,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007422284356835007,
      "completions/clipped_ratio": 0.0200892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3844.0,
      "completions/mean_length": 656.575927734375,
      "completions/mean_terminated_length": 586.0637817382812,
      "completions/min_length": 104.0,
      "completions/min_terminated_length": 104.0,
      "epoch": 7.251968503937007,
      "grad_norm": 0.125,
      "learning_rate": 1e-06,
      "loss": 0.006,
      "num_tokens": 451320398.0,
      "reward": 0.566964328289032,
      "reward_std": 0.21271198987960815,
      "rewards/verify_math_reward/mean": 0.5669642686843872,
      "rewards/verify_math_reward/std": 0.49577224254608154,
      "step": 776
    },
    {
      "clip_ratio/high_max": 0.0017032995419867802,
      "clip_ratio/high_mean": 0.0004951114142386359,
      "clip_ratio/low_mean": 0.00035783809050826676,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008529495089533157,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2585.0,
      "completions/mean_length": 572.6171875,
      "completions/mean_terminated_length": 524.7885131835938,
      "completions/min_length": 132.0,
      "completions/min_terminated_length": 132.0,
      "epoch": 7.26130067074949,
      "grad_norm": 0.1298828125,
      "learning_rate": 1e-06,
      "loss": -0.0051,
      "num_tokens": 451866575.0,
      "reward": 0.6026785969734192,
      "reward_std": 0.20388300716876984,
      "rewards/verify_math_reward/mean": 0.6026785969734192,
      "rewards/verify_math_reward/std": 0.48961687088012695,
      "step": 777
    },
    {
      "clip_ratio/high_max": 0.0016962697645794833,
      "clip_ratio/high_mean": 0.0005330779235919181,
      "clip_ratio/low_mean": 0.00035467849852466315,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008877564205249655,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1960.0,
      "completions/mean_length": 625.5814819335938,
      "completions/mean_terminated_length": 558.4630126953125,
      "completions/min_length": 76.0,
      "completions/min_terminated_length": 76.0,
      "epoch": 7.270632837561972,
      "grad_norm": 0.140625,
      "learning_rate": 1e-06,
      "loss": -0.0035,
      "num_tokens": 452442936.0,
      "reward": 0.5580357313156128,
      "reward_std": 0.23334047198295593,
      "rewards/verify_math_reward/mean": 0.5580357313156128,
      "rewards/verify_math_reward/std": 0.49689781665802,
      "step": 778
    },
    {
      "clip_ratio/high_max": 0.0016587730769970221,
      "clip_ratio/high_mean": 0.0004879003981841379,
      "clip_ratio/low_mean": 0.00035009510349937045,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008379955088457791,
      "completions/clipped_ratio": 0.021205357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3254.0,
      "completions/mean_length": 680.1529541015625,
      "completions/mean_terminated_length": 606.1493530273438,
      "completions/min_length": 185.0,
      "completions/min_terminated_length": 185.0,
      "epoch": 7.2799650043744535,
      "grad_norm": 0.12158203125,
      "learning_rate": 1e-06,
      "loss": -0.0053,
      "num_tokens": 453067065.0,
      "reward": 0.4888392984867096,
      "reward_std": 0.2131224423646927,
      "rewards/verify_math_reward/mean": 0.4888392984867096,
      "rewards/verify_math_reward/std": 0.5001546144485474,
      "step": 779
    },
    {
      "clip_ratio/high_max": 0.0017931463626155164,
      "clip_ratio/high_mean": 0.0005471243384818081,
      "clip_ratio/low_mean": 0.00044033032418155926,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009874546667560935,
      "completions/clipped_ratio": 0.0200892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1901.0,
      "completions/mean_length": 599.9408569335938,
      "completions/mean_terminated_length": 528.2677001953125,
      "completions/min_length": 96.0,
      "completions/min_terminated_length": 96.0,
      "epoch": 7.289297171186935,
      "grad_norm": 0.1396484375,
      "learning_rate": 1e-06,
      "loss": -0.0168,
      "num_tokens": 453619108.0,
      "reward": 0.535714328289032,
      "reward_std": 0.2287980318069458,
      "rewards/verify_math_reward/mean": 0.5357142686843872,
      "rewards/verify_math_reward/std": 0.4990014135837555,
      "step": 780
    },
    {
      "clip_ratio/high_max": 0.0018340009501116583,
      "clip_ratio/high_mean": 0.0005197865559694037,
      "clip_ratio/low_mean": 0.00035022596580347454,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.00087001253177732,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2557.0,
      "completions/mean_length": 591.4542846679688,
      "completions/mean_terminated_length": 543.8812255859375,
      "completions/min_length": 107.0,
      "completions/min_terminated_length": 107.0,
      "epoch": 7.298629337999417,
      "grad_norm": 0.1416015625,
      "learning_rate": 1e-06,
      "loss": 0.0074,
      "num_tokens": 454182203.0,
      "reward": 0.598214328289032,
      "reward_std": 0.21177834272384644,
      "rewards/verify_math_reward/mean": 0.5982142686843872,
      "rewards/verify_math_reward/std": 0.49053287506103516,
      "step": 781
    },
    {
      "clip_ratio/high_max": 0.0014043086439414765,
      "clip_ratio/high_mean": 0.000403375347104884,
      "clip_ratio/low_mean": 0.0004151679099777539,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008185432534446591,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3706.0,
      "completions/mean_length": 634.075927734375,
      "completions/mean_terminated_length": 583.1075439453125,
      "completions/min_length": 152.0,
      "completions/min_terminated_length": 152.0,
      "epoch": 7.307961504811899,
      "grad_norm": 0.1337890625,
      "learning_rate": 1e-06,
      "loss": -0.0089,
      "num_tokens": 454790495.0,
      "reward": 0.4776785969734192,
      "reward_std": 0.23401561379432678,
      "rewards/verify_math_reward/mean": 0.4776785671710968,
      "rewards/verify_math_reward/std": 0.4997805058956146,
      "step": 782
    },
    {
      "clip_ratio/high_max": 0.0015813286304364738,
      "clip_ratio/high_mean": 0.00044801169804031815,
      "clip_ratio/low_mean": 0.0002970719417589862,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007450836478710698,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3505.0,
      "completions/mean_length": 622.0848388671875,
      "completions/mean_terminated_length": 570.93994140625,
      "completions/min_length": 139.0,
      "completions/min_terminated_length": 139.0,
      "epoch": 7.31729367162438,
      "grad_norm": 0.1279296875,
      "learning_rate": 1e-06,
      "loss": 0.016,
      "num_tokens": 455379979.0,
      "reward": 0.5580357313156128,
      "reward_std": 0.19238406419754028,
      "rewards/verify_math_reward/mean": 0.5580357313156128,
      "rewards/verify_math_reward/std": 0.49689781665802,
      "step": 783
    },
    {
      "clip_ratio/high_max": 0.001472742427722551,
      "clip_ratio/high_mean": 0.00048223072303699155,
      "clip_ratio/low_mean": 0.0003173981750705934,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007996289045877347,
      "completions/clipped_ratio": 0.0200892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3856.0,
      "completions/mean_length": 605.130615234375,
      "completions/mean_terminated_length": 533.5637817382812,
      "completions/min_length": 128.0,
      "completions/min_terminated_length": 128.0,
      "epoch": 7.326625838436862,
      "grad_norm": 0.1259765625,
      "learning_rate": 1e-06,
      "loss": -0.0162,
      "num_tokens": 455938392.0,
      "reward": 0.5926339626312256,
      "reward_std": 0.19986683130264282,
      "rewards/verify_math_reward/mean": 0.5926339030265808,
      "rewards/verify_math_reward/std": 0.49161848425865173,
      "step": 784
    },
    {
      "clip_ratio/high_max": 0.0015499775236094138,
      "clip_ratio/high_mean": 0.00046867967193975346,
      "clip_ratio/low_mean": 0.00030276298502940335,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007714426546954201,
      "completions/clipped_ratio": 0.006696428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2070.0,
      "completions/mean_length": 566.927490234375,
      "completions/mean_terminated_length": 543.135986328125,
      "completions/min_length": 4.0,
      "completions/min_terminated_length": 4.0,
      "epoch": 7.335958005249344,
      "grad_norm": 0.115234375,
      "learning_rate": 1e-06,
      "loss": -0.0012,
      "num_tokens": 456503423.0,
      "reward": 0.582589328289032,
      "reward_std": 0.19365398585796356,
      "rewards/verify_math_reward/mean": 0.5825892686843872,
      "rewards/verify_math_reward/std": 0.493407279253006,
      "step": 785
    },
    {
      "clip_ratio/high_max": 0.001369633419017191,
      "clip_ratio/high_mean": 0.0004456426138403913,
      "clip_ratio/low_mean": 0.00035350752568774624,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007991501388460165,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3906.0,
      "completions/mean_length": 588.0223388671875,
      "completions/mean_terminated_length": 556.4189453125,
      "completions/min_length": 116.0,
      "completions/min_terminated_length": 116.0,
      "epoch": 7.3452901720618256,
      "grad_norm": 0.1259765625,
      "learning_rate": 1e-06,
      "loss": 0.0054,
      "num_tokens": 457087507.0,
      "reward": 0.5011160969734192,
      "reward_std": 0.19219790399074554,
      "rewards/verify_math_reward/mean": 0.5011160969734192,
      "rewards/verify_math_reward/std": 0.5002779960632324,
      "step": 786
    },
    {
      "clip_ratio/high_max": 0.0013568450876846327,
      "clip_ratio/high_mean": 0.0004112453474363065,
      "clip_ratio/low_mean": 0.00037408362936730555,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007853289989725454,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3739.0,
      "completions/mean_length": 626.2444458007812,
      "completions/mean_terminated_length": 575.1608276367188,
      "completions/min_length": 94.0,
      "completions/min_terminated_length": 94.0,
      "epoch": 7.354622338874307,
      "grad_norm": 0.11865234375,
      "learning_rate": 1e-06,
      "loss": 0.0184,
      "num_tokens": 457675630.0,
      "reward": 0.5491071939468384,
      "reward_std": 0.20520275831222534,
      "rewards/verify_math_reward/mean": 0.5491071343421936,
      "rewards/verify_math_reward/std": 0.49786055088043213,
      "step": 787
    },
    {
      "clip_ratio/high_max": 0.0014830811724095838,
      "clip_ratio/high_mean": 0.00043378133682381304,
      "clip_ratio/low_mean": 0.0003535696736207683,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007873510039644316,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4031.0,
      "completions/mean_length": 621.7857666015625,
      "completions/mean_terminated_length": 586.5343627929688,
      "completions/min_length": 95.0,
      "completions/min_terminated_length": 95.0,
      "epoch": 7.363954505686789,
      "grad_norm": 0.1220703125,
      "learning_rate": 1e-06,
      "loss": 0.0101,
      "num_tokens": 458291454.0,
      "reward": 0.5647321939468384,
      "reward_std": 0.18915246427059174,
      "rewards/verify_math_reward/mean": 0.5647321343421936,
      "rewards/verify_math_reward/std": 0.49606895446777344,
      "step": 788
    },
    {
      "clip_ratio/high_max": 0.0015857654088904383,
      "clip_ratio/high_mean": 0.00044632711296799243,
      "clip_ratio/low_mean": 0.0003439245534764268,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007902516708782059,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4046.0,
      "completions/mean_length": 634.1730346679688,
      "completions/mean_terminated_length": 571.2306518554688,
      "completions/min_length": 41.0,
      "completions/min_terminated_length": 41.0,
      "epoch": 7.373286672499271,
      "grad_norm": 0.123046875,
      "learning_rate": 1e-06,
      "loss": 0.01,
      "num_tokens": 458879977.0,
      "reward": 0.5948660969734192,
      "reward_std": 0.20166738331317902,
      "rewards/verify_math_reward/mean": 0.5948660969734192,
      "rewards/verify_math_reward/std": 0.49119213223457336,
      "step": 789
    },
    {
      "clip_ratio/high_max": 0.00176456842564221,
      "clip_ratio/high_mean": 0.0005661145551130176,
      "clip_ratio/low_mean": 0.00035024219062052,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009163567383438931,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3104.0,
      "completions/mean_length": 566.5301513671875,
      "completions/mean_terminated_length": 518.6187744140625,
      "completions/min_length": 64.0,
      "completions/min_terminated_length": 64.0,
      "epoch": 7.3826188393117524,
      "grad_norm": 0.1513671875,
      "learning_rate": 1e-06,
      "loss": 0.0113,
      "num_tokens": 459428436.0,
      "reward": 0.5714285969734192,
      "reward_std": 0.2574998438358307,
      "rewards/verify_math_reward/mean": 0.5714285969734192,
      "rewards/verify_math_reward/std": 0.49514803290367126,
      "step": 790
    },
    {
      "clip_ratio/high_max": 0.0016188510389838484,
      "clip_ratio/high_mean": 0.00048283214073308045,
      "clip_ratio/low_mean": 0.000309131930634976,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007919640747786616,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3700.0,
      "completions/mean_length": 582.4330444335938,
      "completions/mean_terminated_length": 538.7615966796875,
      "completions/min_length": 72.0,
      "completions/min_terminated_length": 72.0,
      "epoch": 7.391951006124234,
      "grad_norm": 0.1259765625,
      "learning_rate": 1e-06,
      "loss": 0.0229,
      "num_tokens": 459995816.0,
      "reward": 0.5959821939468384,
      "reward_std": 0.19685347378253937,
      "rewards/verify_math_reward/mean": 0.5959821343421936,
      "rewards/verify_math_reward/std": 0.490975022315979,
      "step": 791
    },
    {
      "clip_ratio/high_max": 0.0018546927003626479,
      "clip_ratio/high_mean": 0.0005647272500937106,
      "clip_ratio/low_mean": 0.00031266814812624943,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008773953923082445,
      "completions/clipped_ratio": 0.021205357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2424.0,
      "completions/mean_length": 590.0123291015625,
      "completions/mean_terminated_length": 514.0558471679688,
      "completions/min_length": 36.0,
      "completions/min_terminated_length": 36.0,
      "epoch": 7.401283172936716,
      "grad_norm": 0.13671875,
      "learning_rate": 1e-06,
      "loss": -0.0044,
      "num_tokens": 460544315.0,
      "reward": 0.5546875,
      "reward_std": 0.21533779799938202,
      "rewards/verify_math_reward/mean": 0.5546875,
      "rewards/verify_math_reward/std": 0.4972778558731079,
      "step": 792
    },
    {
      "clip_ratio/high_max": 0.0016667567588228849,
      "clip_ratio/high_mean": 0.00046237490892053756,
      "clip_ratio/low_mean": 0.00022091619075581548,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000683291098312111,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3378.0,
      "completions/mean_length": 568.8092041015625,
      "completions/mean_terminated_length": 524.9683837890625,
      "completions/min_length": 127.0,
      "completions/min_terminated_length": 127.0,
      "epoch": 7.410615339749198,
      "grad_norm": 0.115234375,
      "learning_rate": 1e-06,
      "loss": -0.0057,
      "num_tokens": 461097960.0,
      "reward": 0.6160714626312256,
      "reward_std": 0.17577485740184784,
      "rewards/verify_math_reward/mean": 0.6160714030265808,
      "rewards/verify_math_reward/std": 0.486612468957901,
      "step": 793
    },
    {
      "clip_ratio/high_max": 0.0015939234272082103,
      "clip_ratio/high_mean": 0.00048763796485218336,
      "clip_ratio/low_mean": 0.00032636951834774663,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008140074769471539,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2657.0,
      "completions/mean_length": 596.1830444335938,
      "completions/mean_terminated_length": 564.6531372070312,
      "completions/min_length": 11.0,
      "completions/min_terminated_length": 11.0,
      "epoch": 7.41994750656168,
      "grad_norm": 0.12353515625,
      "learning_rate": 1e-06,
      "loss": 0.0062,
      "num_tokens": 461685220.0,
      "reward": 0.551339328289032,
      "reward_std": 0.22067400813102722,
      "rewards/verify_math_reward/mean": 0.5513392686843872,
      "rewards/verify_math_reward/std": 0.4976350665092468,
      "step": 794
    },
    {
      "clip_ratio/high_max": 0.00141688451913069,
      "clip_ratio/high_mean": 0.00044076187373320863,
      "clip_ratio/low_mean": 0.0002488006248313468,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006895625019751606,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3203.0,
      "completions/mean_length": 570.896240234375,
      "completions/mean_terminated_length": 527.0813598632812,
      "completions/min_length": 69.0,
      "completions/min_terminated_length": 69.0,
      "epoch": 7.429279673374162,
      "grad_norm": 0.125,
      "learning_rate": 1e-06,
      "loss": 0.0035,
      "num_tokens": 462231959.0,
      "reward": 0.6194196939468384,
      "reward_std": 0.18536268174648285,
      "rewards/verify_math_reward/mean": 0.6194196343421936,
      "rewards/verify_math_reward/std": 0.48580074310302734,
      "step": 795
    },
    {
      "clip_ratio/high_max": 0.00145923654690705,
      "clip_ratio/high_mean": 0.00043217743564127886,
      "clip_ratio/low_mean": 0.0002877214802765593,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007198989078460727,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2720.0,
      "completions/mean_length": 676.1473388671875,
      "completions/mean_terminated_length": 637.5485229492188,
      "completions/min_length": 88.0,
      "completions/min_terminated_length": 88.0,
      "epoch": 7.438611840186644,
      "grad_norm": 0.1279296875,
      "learning_rate": 1e-06,
      "loss": 0.0099,
      "num_tokens": 462896443.0,
      "reward": 0.4687500298023224,
      "reward_std": 0.22924308478832245,
      "rewards/verify_math_reward/mean": 0.46875,
      "rewards/verify_math_reward/std": 0.4993011951446533,
      "step": 796
    },
    {
      "clip_ratio/high_max": 0.001531427750705916,
      "clip_ratio/high_mean": 0.0004305337644154861,
      "clip_ratio/low_mean": 0.00045910697940598766,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008896407489373814,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3460.0,
      "completions/mean_length": 614.8995971679688,
      "completions/mean_terminated_length": 575.6094970703125,
      "completions/min_length": 5.0,
      "completions/min_terminated_length": 5.0,
      "epoch": 7.447944006999125,
      "grad_norm": 0.1318359375,
      "learning_rate": 1e-06,
      "loss": -0.0103,
      "num_tokens": 463493369.0,
      "reward": 0.4877232313156128,
      "reward_std": 0.23649832606315613,
      "rewards/verify_math_reward/mean": 0.4877232015132904,
      "rewards/verify_math_reward/std": 0.500128448009491,
      "step": 797
    },
    {
      "clip_ratio/high_max": 0.0018859916326618986,
      "clip_ratio/high_mean": 0.0006090074216444918,
      "clip_ratio/low_mean": 0.0004206171322493901,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010296245682184235,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3956.0,
      "completions/mean_length": 614.5636596679688,
      "completions/mean_terminated_length": 579.2389526367188,
      "completions/min_length": 131.0,
      "completions/min_terminated_length": 131.0,
      "epoch": 7.457276173811607,
      "grad_norm": 0.1376953125,
      "learning_rate": 1e-06,
      "loss": 0.0119,
      "num_tokens": 464103322.0,
      "reward": 0.5948660969734192,
      "reward_std": 0.22413356602191925,
      "rewards/verify_math_reward/mean": 0.5948660969734192,
      "rewards/verify_math_reward/std": 0.49119213223457336,
      "step": 798
    },
    {
      "clip_ratio/high_max": 0.0016947435833571944,
      "clip_ratio/high_mean": 0.000559796584070682,
      "clip_ratio/low_mean": 0.0004089853653113096,
      "clip_ratio/low_min": 1.6587049685767852e-05,
      "clip_ratio/region_mean": 0.0009687819501777994,
      "completions/clipped_ratio": 0.025669642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3366.0,
      "completions/mean_length": 662.6864013671875,
      "completions/mean_terminated_length": 572.2325439453125,
      "completions/min_length": 149.0,
      "completions/min_terminated_length": 149.0,
      "epoch": 7.466608340624089,
      "grad_norm": 0.1259765625,
      "learning_rate": 1e-06,
      "loss": 0.0026,
      "num_tokens": 464694537.0,
      "reward": 0.5803571939468384,
      "reward_std": 0.24243409931659698,
      "rewards/verify_math_reward/mean": 0.5803571343421936,
      "rewards/verify_math_reward/std": 0.4937761127948761,
      "step": 799
    },
    {
      "clip_ratio/high_max": 0.0014676327882625628,
      "clip_ratio/high_mean": 0.000505304971966325,
      "clip_ratio/low_mean": 0.0003050643726965063,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008103693355678843,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3820.0,
      "completions/mean_length": 621.1998291015625,
      "completions/mean_terminated_length": 581.9808349609375,
      "completions/min_length": 154.0,
      "completions/min_terminated_length": 154.0,
      "epoch": 7.475940507436571,
      "grad_norm": 0.126953125,
      "learning_rate": 1e-06,
      "loss": -0.0123,
      "num_tokens": 465301404.0,
      "reward": 0.590401828289032,
      "reward_std": 0.22229206562042236,
      "rewards/verify_math_reward/mean": 0.5904017686843872,
      "rewards/verify_math_reward/std": 0.49203425645828247,
      "step": 800
    },
    {
      "clip_ratio/high_max": 0.0016326555942214327,
      "clip_ratio/high_mean": 0.0005000448104510724,
      "clip_ratio/low_mean": 0.00037784027892939775,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.00087788510700193,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2789.0,
      "completions/mean_length": 647.2846069335938,
      "completions/mean_terminated_length": 580.5858764648438,
      "completions/min_length": 134.0,
      "completions/min_terminated_length": 134.0,
      "epoch": 7.485272674249052,
      "grad_norm": 0.134765625,
      "learning_rate": 1e-06,
      "loss": 0.0064,
      "num_tokens": 465894539.0,
      "reward": 0.5167410969734192,
      "reward_std": 0.22300560772418976,
      "rewards/verify_math_reward/mean": 0.5167410969734192,
      "rewards/verify_math_reward/std": 0.4999987483024597,
      "step": 801
    },
    {
      "clip_ratio/high_max": 0.0018936303849841352,
      "clip_ratio/high_mean": 0.000623231191184459,
      "clip_ratio/low_mean": 0.0004154656953687663,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001038696858813637,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2776.0,
      "completions/mean_length": 632.6049194335938,
      "completions/mean_terminated_length": 569.6340942382812,
      "completions/min_length": 5.0,
      "completions/min_terminated_length": 5.0,
      "epoch": 7.494604841061534,
      "grad_norm": 0.142578125,
      "learning_rate": 1e-06,
      "loss": -0.0034,
      "num_tokens": 466480697.0,
      "reward": 0.5345982313156128,
      "reward_std": 0.2536267638206482,
      "rewards/verify_math_reward/mean": 0.5345982313156128,
      "rewards/verify_math_reward/std": 0.4990801215171814,
      "step": 802
    },
    {
      "clip_ratio/high_max": 0.001562667417601915,
      "clip_ratio/high_mean": 0.000518414274438328,
      "clip_ratio/low_mean": 0.0003117373681789104,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000830151646368904,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4013.0,
      "completions/mean_length": 645.4252319335938,
      "completions/mean_terminated_length": 578.6905517578125,
      "completions/min_length": 127.0,
      "completions/min_terminated_length": 127.0,
      "epoch": 7.503937007874016,
      "grad_norm": 0.1220703125,
      "learning_rate": 1e-06,
      "loss": -0.0021,
      "num_tokens": 467084038.0,
      "reward": 0.5535714626312256,
      "reward_std": 0.22064054012298584,
      "rewards/verify_math_reward/mean": 0.5535714030265808,
      "rewards/verify_math_reward/std": 0.4973994791507721,
      "step": 803
    },
    {
      "clip_ratio/high_max": 0.0016991870797937736,
      "clip_ratio/high_mean": 0.0005164495009921666,
      "clip_ratio/low_mean": 0.0003523468687944842,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008687963800184662,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1982.0,
      "completions/mean_length": 600.036865234375,
      "completions/mean_terminated_length": 552.580322265625,
      "completions/min_length": 27.0,
      "completions/min_terminated_length": 27.0,
      "epoch": 7.5132691746864975,
      "grad_norm": 0.134765625,
      "learning_rate": 1e-06,
      "loss": -0.0031,
      "num_tokens": 467664359.0,
      "reward": 0.5535714626312256,
      "reward_std": 0.24723871052265167,
      "rewards/verify_math_reward/mean": 0.5535714030265808,
      "rewards/verify_math_reward/std": 0.4973995089530945,
      "step": 804
    },
    {
      "clip_ratio/high_max": 0.0017386214040016057,
      "clip_ratio/high_mean": 0.0005429102311609313,
      "clip_ratio/low_mean": 0.00034907324879895896,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008919834854168585,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2433.0,
      "completions/mean_length": 602.3359375,
      "completions/mean_terminated_length": 542.8524780273438,
      "completions/min_length": 113.0,
      "completions/min_terminated_length": 113.0,
      "epoch": 7.522601341498979,
      "grad_norm": 0.142578125,
      "learning_rate": 1e-06,
      "loss": 0.0066,
      "num_tokens": 468227764.0,
      "reward": 0.5457589626312256,
      "reward_std": 0.23154176771640778,
      "rewards/verify_math_reward/mean": 0.5457589030265808,
      "rewards/verify_math_reward/std": 0.4981798231601715,
      "step": 805
    },
    {
      "clip_ratio/high_max": 0.0014294162710939418,
      "clip_ratio/high_mean": 0.0004229120048648838,
      "clip_ratio/low_mean": 0.00026226252475680667,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006851745201856829,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3511.0,
      "completions/mean_length": 631.4609375,
      "completions/mean_terminated_length": 572.4733276367188,
      "completions/min_length": 113.0,
      "completions/min_terminated_length": 113.0,
      "epoch": 7.531933508311461,
      "grad_norm": 0.1083984375,
      "learning_rate": 1e-06,
      "loss": -0.0019,
      "num_tokens": 468811401.0,
      "reward": 0.5680803656578064,
      "reward_std": 0.16905026137828827,
      "rewards/verify_math_reward/mean": 0.5680803656578064,
      "rewards/verify_math_reward/std": 0.4956200420856476,
      "step": 806
    },
    {
      "clip_ratio/high_max": 0.001720678735182446,
      "clip_ratio/high_mean": 0.00045973018529821275,
      "clip_ratio/low_mean": 0.00036290575133079983,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008226359382206283,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3888.0,
      "completions/mean_length": 632.536865234375,
      "completions/mean_terminated_length": 581.5458374023438,
      "completions/min_length": 5.0,
      "completions/min_terminated_length": 5.0,
      "epoch": 7.541265675123943,
      "grad_norm": 0.12255859375,
      "learning_rate": 1e-06,
      "loss": 0.0033,
      "num_tokens": 469412546.0,
      "reward": 0.5178571939468384,
      "reward_std": 0.19261160492897034,
      "rewards/verify_math_reward/mean": 0.5178571343421936,
      "rewards/verify_math_reward/std": 0.4999600946903229,
      "step": 807
    },
    {
      "clip_ratio/high_max": 0.0014549345269188052,
      "clip_ratio/high_mean": 0.00045358947397744487,
      "clip_ratio/low_mean": 0.00032638281209074194,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007799722761774319,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2353.0,
      "completions/mean_length": 633.5045166015625,
      "completions/mean_terminated_length": 598.3720092773438,
      "completions/min_length": 120.0,
      "completions/min_terminated_length": 120.0,
      "epoch": 7.550597841936424,
      "grad_norm": 0.12255859375,
      "learning_rate": 1e-06,
      "loss": 0.0037,
      "num_tokens": 470022694.0,
      "reward": 0.4977678656578064,
      "reward_std": 0.20793946087360382,
      "rewards/verify_math_reward/mean": 0.4977678656578064,
      "rewards/verify_math_reward/std": 0.5002743005752563,
      "step": 808
    },
    {
      "clip_ratio/high_max": 0.001693226299721573,
      "clip_ratio/high_mean": 0.0005225888533004763,
      "clip_ratio/low_mean": 0.00032883471340028336,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008514235723851016,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3600.0,
      "completions/mean_length": 638.3136596679688,
      "completions/mean_terminated_length": 595.3367309570312,
      "completions/min_length": 3.0,
      "completions/min_terminated_length": 3.0,
      "epoch": 7.559930008748906,
      "grad_norm": 0.125,
      "learning_rate": 1e-06,
      "loss": 0.0062,
      "num_tokens": 470637391.0,
      "reward": 0.551339328289032,
      "reward_std": 0.24461443722248077,
      "rewards/verify_math_reward/mean": 0.5513392686843872,
      "rewards/verify_math_reward/std": 0.4976350665092468,
      "step": 809
    },
    {
      "clip_ratio/high_max": 0.0012140335056756157,
      "clip_ratio/high_mean": 0.00038195894353521,
      "clip_ratio/low_mean": 0.0002825527674303885,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006645117059633776,
      "completions/clipped_ratio": 0.0078125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3615.0,
      "completions/mean_length": 570.375,
      "completions/mean_terminated_length": 542.6141967773438,
      "completions/min_length": 143.0,
      "completions/min_terminated_length": 143.0,
      "epoch": 7.569262175561388,
      "grad_norm": 0.11767578125,
      "learning_rate": 1e-06,
      "loss": -0.002,
      "num_tokens": 471221815.0,
      "reward": 0.5479910969734192,
      "reward_std": 0.18388956785202026,
      "rewards/verify_math_reward/mean": 0.5479910969734192,
      "rewards/verify_math_reward/std": 0.49796950817108154,
      "step": 810
    },
    {
      "clip_ratio/high_max": 0.0014798217916904832,
      "clip_ratio/high_mean": 0.00041883209007664846,
      "clip_ratio/low_mean": 0.00032986845434379575,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007487005432267324,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3755.0,
      "completions/mean_length": 648.0011596679688,
      "completions/mean_terminated_length": 609.0846557617188,
      "completions/min_length": 14.0,
      "completions/min_terminated_length": 14.0,
      "epoch": 7.57859434237387,
      "grad_norm": 0.11767578125,
      "learning_rate": 1e-06,
      "loss": 0.0162,
      "num_tokens": 471847016.0,
      "reward": 0.5401785969734192,
      "reward_std": 0.18878155946731567,
      "rewards/verify_math_reward/mean": 0.5401785969734192,
      "rewards/verify_math_reward/std": 0.49866142868995667,
      "step": 811
    },
    {
      "clip_ratio/high_max": 0.001788241143913183,
      "clip_ratio/high_mean": 0.0005103839109779074,
      "clip_ratio/low_mean": 0.000431636027769855,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009420199339729152,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3774.0,
      "completions/mean_length": 655.0546875,
      "completions/mean_terminated_length": 604.3952026367188,
      "completions/min_length": 156.0,
      "completions/min_terminated_length": 156.0,
      "epoch": 7.587926509186351,
      "grad_norm": 0.1279296875,
      "learning_rate": 1e-06,
      "loss": 0.0044,
      "num_tokens": 472473673.0,
      "reward": 0.5066964626312256,
      "reward_std": 0.23912444710731506,
      "rewards/verify_math_reward/mean": 0.5066964030265808,
      "rewards/verify_math_reward/std": 0.5002344250679016,
      "step": 812
    },
    {
      "clip_ratio/high_max": 0.0016826707324071322,
      "clip_ratio/high_mean": 0.0004826609840620222,
      "clip_ratio/low_mean": 0.00019433401951118867,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006769950123270974,
      "completions/clipped_ratio": 0.021205357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1799.0,
      "completions/mean_length": 616.1395263671875,
      "completions/mean_terminated_length": 540.7491455078125,
      "completions/min_length": 117.0,
      "completions/min_terminated_length": 117.0,
      "epoch": 7.597258675998834,
      "grad_norm": 0.1103515625,
      "learning_rate": 1e-06,
      "loss": -0.013,
      "num_tokens": 473035366.0,
      "reward": 0.606026828289032,
      "reward_std": 0.16668446362018585,
      "rewards/verify_math_reward/mean": 0.6060267686843872,
      "rewards/verify_math_reward/std": 0.48890194296836853,
      "step": 813
    },
    {
      "clip_ratio/high_max": 0.0020013900084450142,
      "clip_ratio/high_mean": 0.0005548718265799835,
      "clip_ratio/low_mean": 0.0003268764485255815,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008817482830636436,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3075.0,
      "completions/mean_length": 540.7042846679688,
      "completions/mean_terminated_length": 496.5141296386719,
      "completions/min_length": 101.0,
      "completions/min_terminated_length": 101.0,
      "epoch": 7.606590842811316,
      "grad_norm": 0.1396484375,
      "learning_rate": 1e-06,
      "loss": 0.002,
      "num_tokens": 473563957.0,
      "reward": 0.640625,
      "reward_std": 0.21522751450538635,
      "rewards/verify_math_reward/mean": 0.640625,
      "rewards/verify_math_reward/std": 0.48008525371551514,
      "step": 814
    },
    {
      "clip_ratio/high_max": 0.0015992808030205197,
      "clip_ratio/high_mean": 0.00044640013334174,
      "clip_ratio/low_mean": 0.0003525676007711809,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007989677169462084,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2608.0,
      "completions/mean_length": 610.911865234375,
      "completions/mean_terminated_length": 547.5465698242188,
      "completions/min_length": 115.0,
      "completions/min_terminated_length": 115.0,
      "epoch": 7.615923009623797,
      "grad_norm": 0.126953125,
      "learning_rate": 1e-06,
      "loss": -0.0103,
      "num_tokens": 474131334.0,
      "reward": 0.5345982313156128,
      "reward_std": 0.1994117796421051,
      "rewards/verify_math_reward/mean": 0.5345982313156128,
      "rewards/verify_math_reward/std": 0.4990801215171814,
      "step": 815
    },
    {
      "clip_ratio/high_max": 0.0015622702030668734,
      "clip_ratio/high_mean": 0.00044291848939792544,
      "clip_ratio/low_mean": 0.000254774897371135,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006976933846090105,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3201.0,
      "completions/mean_length": 621.5167846679688,
      "completions/mean_terminated_length": 582.3013916015625,
      "completions/min_length": 134.0,
      "completions/min_terminated_length": 134.0,
      "epoch": 7.625255176436279,
      "grad_norm": 0.12109375,
      "learning_rate": 1e-06,
      "loss": 0.0045,
      "num_tokens": 474733405.0,
      "reward": 0.5212053656578064,
      "reward_std": 0.20099589228630066,
      "rewards/verify_math_reward/mean": 0.5212053656578064,
      "rewards/verify_math_reward/std": 0.49982914328575134,
      "step": 816
    },
    {
      "clip_ratio/high_max": 0.0018664657654881012,
      "clip_ratio/high_mean": 0.0005914838224043706,
      "clip_ratio/low_mean": 0.00024880816079075885,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008402919856962399,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2398.0,
      "completions/mean_length": 609.3984375,
      "completions/mean_terminated_length": 546.0056762695312,
      "completions/min_length": 13.0,
      "completions/min_terminated_length": 13.0,
      "epoch": 7.634587343248761,
      "grad_norm": 0.10986328125,
      "learning_rate": 1e-06,
      "loss": -0.0069,
      "num_tokens": 475299522.0,
      "reward": 0.559151828289032,
      "reward_std": 0.15770578384399414,
      "rewards/verify_math_reward/mean": 0.5591517686843872,
      "rewards/verify_math_reward/std": 0.496766060590744,
      "step": 817
    },
    {
      "clip_ratio/high_max": 0.001357779909085366,
      "clip_ratio/high_mean": 0.0004008281947562864,
      "clip_ratio/low_mean": 0.0003323059210060819,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007331341175813577,
      "completions/clipped_ratio": 0.024553571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3910.0,
      "completions/mean_length": 664.1295166015625,
      "completions/mean_terminated_length": 577.74365234375,
      "completions/min_length": 29.0,
      "completions/min_terminated_length": 29.0,
      "epoch": 7.6439195100612425,
      "grad_norm": 0.12109375,
      "learning_rate": 1e-06,
      "loss": -0.0133,
      "num_tokens": 475895886.0,
      "reward": 0.5446428656578064,
      "reward_std": 0.19787125289440155,
      "rewards/verify_math_reward/mean": 0.5446428656578064,
      "rewards/verify_math_reward/std": 0.4982811510562897,
      "step": 818
    },
    {
      "clip_ratio/high_max": 0.0013974533867440186,
      "clip_ratio/high_mean": 0.00043150204078301613,
      "clip_ratio/low_mean": 0.00034697368744218693,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007784757353874738,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3819.0,
      "completions/mean_length": 657.693115234375,
      "completions/mean_terminated_length": 599.152099609375,
      "completions/min_length": 166.0,
      "completions/min_terminated_length": 166.0,
      "epoch": 7.653251676873724,
      "grad_norm": 0.130859375,
      "learning_rate": 1e-06,
      "loss": 0.0025,
      "num_tokens": 476521307.0,
      "reward": 0.5412946939468384,
      "reward_std": 0.2233087122440338,
      "rewards/verify_math_reward/mean": 0.5412946343421936,
      "rewards/verify_math_reward/std": 0.49857014417648315,
      "step": 819
    },
    {
      "clip_ratio/high_max": 0.001806898575523519,
      "clip_ratio/high_mean": 0.0006096012002672069,
      "clip_ratio/low_mean": 0.0002712697975084666,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008808709849290608,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4077.0,
      "completions/mean_length": 577.2109375,
      "completions/mean_terminated_length": 525.4053955078125,
      "completions/min_length": 19.0,
      "completions/min_terminated_length": 19.0,
      "epoch": 7.662583843686206,
      "grad_norm": 0.1328125,
      "learning_rate": 1e-06,
      "loss": -0.0051,
      "num_tokens": 477067808.0,
      "reward": 0.6037946939468384,
      "reward_std": 0.2060961276292801,
      "rewards/verify_math_reward/mean": 0.6037946343421936,
      "rewards/verify_math_reward/std": 0.48938119411468506,
      "step": 820
    },
    {
      "clip_ratio/high_max": 0.0016304007822327549,
      "clip_ratio/high_mean": 0.0005361508647183655,
      "clip_ratio/low_mean": 0.00040439952840642945,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009405503815287375,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3089.0,
      "completions/mean_length": 666.3080444335938,
      "completions/mean_terminated_length": 588.0045166015625,
      "completions/min_length": 140.0,
      "completions/min_terminated_length": 140.0,
      "epoch": 7.671916010498688,
      "grad_norm": 0.1337890625,
      "learning_rate": 1e-06,
      "loss": 0.0056,
      "num_tokens": 477661988.0,
      "reward": 0.5323660969734192,
      "reward_std": 0.2503633499145508,
      "rewards/verify_math_reward/mean": 0.5323660969734192,
      "rewards/verify_math_reward/std": 0.4992299973964691,
      "step": 821
    },
    {
      "clip_ratio/high_max": 0.0014184003975969972,
      "clip_ratio/high_mean": 0.000398283656068088,
      "clip_ratio/low_mean": 0.0003752951065507659,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007735787530691596,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3017.0,
      "completions/mean_length": 643.6886596679688,
      "completions/mean_terminated_length": 564.8687133789062,
      "completions/min_length": 147.0,
      "completions/min_terminated_length": 147.0,
      "epoch": 7.681248177311169,
      "grad_norm": 0.125,
      "learning_rate": 1e-06,
      "loss": -0.0041,
      "num_tokens": 478253109.0,
      "reward": 0.5089285969734192,
      "reward_std": 0.20601874589920044,
      "rewards/verify_math_reward/mean": 0.5089285969734192,
      "rewards/verify_math_reward/std": 0.5001994967460632,
      "step": 822
    },
    {
      "clip_ratio/high_max": 0.001694536106697342,
      "clip_ratio/high_mean": 0.00048725585725151177,
      "clip_ratio/low_mean": 0.0002923149418165849,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007795707961122389,
      "completions/clipped_ratio": 0.024553571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3446.0,
      "completions/mean_length": 614.138427734375,
      "completions/mean_terminated_length": 526.4942626953125,
      "completions/min_length": 64.0,
      "completions/min_terminated_length": 64.0,
      "epoch": 7.690580344123651,
      "grad_norm": 0.134765625,
      "learning_rate": 1e-06,
      "loss": -0.0095,
      "num_tokens": 478799377.0,
      "reward": 0.5803571939468384,
      "reward_std": 0.21804973483085632,
      "rewards/verify_math_reward/mean": 0.5803571343421936,
      "rewards/verify_math_reward/std": 0.4937761425971985,
      "step": 823
    },
    {
      "clip_ratio/high_max": 0.001586648628290277,
      "clip_ratio/high_mean": 0.00044741310102835996,
      "clip_ratio/low_mean": 0.00035425917997145007,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008016722656520869,
      "completions/clipped_ratio": 0.006696428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4038.0,
      "completions/mean_length": 575.5949096679688,
      "completions/mean_terminated_length": 551.86181640625,
      "completions/min_length": 76.0,
      "completions/min_terminated_length": 76.0,
      "epoch": 7.699912510936133,
      "grad_norm": 0.11865234375,
      "learning_rate": 1e-06,
      "loss": 0.0117,
      "num_tokens": 479377478.0,
      "reward": 0.512276828289032,
      "reward_std": 0.193587988615036,
      "rewards/verify_math_reward/mean": 0.5122767686843872,
      "rewards/verify_math_reward/std": 0.500128448009491,
      "step": 824
    },
    {
      "clip_ratio/high_max": 0.0016389518677897286,
      "clip_ratio/high_mean": 0.0005119308898429153,
      "clip_ratio/low_mean": 0.0004532880793703953,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009652189673943212,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3913.0,
      "completions/mean_length": 615.474365234375,
      "completions/mean_terminated_length": 568.2274169921875,
      "completions/min_length": 3.0,
      "completions/min_terminated_length": 3.0,
      "epoch": 7.7092446777486145,
      "grad_norm": 0.1416015625,
      "learning_rate": 1e-06,
      "loss": -0.0117,
      "num_tokens": 479974839.0,
      "reward": 0.5792410969734192,
      "reward_std": 0.23300601541996002,
      "rewards/verify_math_reward/mean": 0.5792410969734192,
      "rewards/verify_math_reward/std": 0.49395665526390076,
      "step": 825
    },
    {
      "clip_ratio/high_max": 0.0013268217981021735,
      "clip_ratio/high_mean": 0.0003788411020195781,
      "clip_ratio/low_mean": 0.00033185177494488016,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007106928760549636,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3009.0,
      "completions/mean_length": 636.4252319335938,
      "completions/mean_terminated_length": 557.439453125,
      "completions/min_length": 138.0,
      "completions/min_terminated_length": 138.0,
      "epoch": 7.718576844561096,
      "grad_norm": 0.12353515625,
      "learning_rate": 1e-06,
      "loss": 0.0068,
      "num_tokens": 480557268.0,
      "reward": 0.5613839626312256,
      "reward_std": 0.18532174825668335,
      "rewards/verify_math_reward/mean": 0.5613839030265808,
      "rewards/verify_math_reward/std": 0.496494859457016,
      "step": 826
    },
    {
      "clip_ratio/high_max": 0.0016341294431185815,
      "clip_ratio/high_mean": 0.0005368804891077161,
      "clip_ratio/low_mean": 0.000411711231663503,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009485917289566714,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3866.0,
      "completions/mean_length": 583.5335083007812,
      "completions/mean_terminated_length": 551.8896484375,
      "completions/min_length": 148.0,
      "completions/min_terminated_length": 148.0,
      "epoch": 7.727909011373578,
      "grad_norm": 0.13671875,
      "learning_rate": 1e-06,
      "loss": 0.0001,
      "num_tokens": 481142946.0,
      "reward": 0.5602678656578064,
      "reward_std": 0.2356332242488861,
      "rewards/verify_math_reward/mean": 0.5602678656578064,
      "rewards/verify_math_reward/std": 0.4966317415237427,
      "step": 827
    },
    {
      "clip_ratio/high_max": 0.0020589468367688823,
      "clip_ratio/high_mean": 0.00061184364494693,
      "clip_ratio/low_mean": 0.000335012699224535,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009468563348491443,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2809.0,
      "completions/mean_length": 639.5982666015625,
      "completions/mean_terminated_length": 572.7508544921875,
      "completions/min_length": 123.0,
      "completions/min_terminated_length": 123.0,
      "epoch": 7.73724117818606,
      "grad_norm": 0.1318359375,
      "learning_rate": 1e-06,
      "loss": 0.0012,
      "num_tokens": 481739138.0,
      "reward": 0.4899553656578064,
      "reward_std": 0.21974080801010132,
      "rewards/verify_math_reward/mean": 0.4899553656578064,
      "rewards/verify_math_reward/std": 0.5001782774925232,
      "step": 828
    },
    {
      "clip_ratio/high_max": 0.0014042636730664526,
      "clip_ratio/high_mean": 0.00040027656075380946,
      "clip_ratio/low_mean": 0.00028463592821026396,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006849125002190704,
      "completions/clipped_ratio": 0.021205357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3789.0,
      "completions/mean_length": 602.0145263671875,
      "completions/mean_terminated_length": 526.318115234375,
      "completions/min_length": 85.0,
      "completions/min_terminated_length": 85.0,
      "epoch": 7.746573344998541,
      "grad_norm": 0.126953125,
      "learning_rate": 1e-06,
      "loss": 0.0031,
      "num_tokens": 482290863.0,
      "reward": 0.5959821939468384,
      "reward_std": 0.19888931512832642,
      "rewards/verify_math_reward/mean": 0.5959821343421936,
      "rewards/verify_math_reward/std": 0.490975022315979,
      "step": 829
    },
    {
      "clip_ratio/high_max": 0.0015067167068991694,
      "clip_ratio/high_mean": 0.00046176001535513933,
      "clip_ratio/low_mean": 0.0002626921981345731,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007244522140581466,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3329.0,
      "completions/mean_length": 617.8515625,
      "completions/mean_terminated_length": 558.6322631835938,
      "completions/min_length": 7.0,
      "completions/min_terminated_length": 7.0,
      "epoch": 7.755905511811024,
      "grad_norm": 0.11767578125,
      "learning_rate": 1e-06,
      "loss": -0.0069,
      "num_tokens": 482875834.0,
      "reward": 0.5256696939468384,
      "reward_std": 0.21147316694259644,
      "rewards/verify_math_reward/mean": 0.5256696343421936,
      "rewards/verify_math_reward/std": 0.4996195435523987,
      "step": 830
    },
    {
      "clip_ratio/high_max": 0.0017874856630442082,
      "clip_ratio/high_mean": 0.0005050203840255563,
      "clip_ratio/low_mean": 0.000261511263374814,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007665316479688045,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3012.0,
      "completions/mean_length": 628.4542846679688,
      "completions/mean_terminated_length": 573.413818359375,
      "completions/min_length": 114.0,
      "completions/min_terminated_length": 114.0,
      "epoch": 7.765237678623506,
      "grad_norm": 0.10693359375,
      "learning_rate": 1e-06,
      "loss": 0.0001,
      "num_tokens": 483468321.0,
      "reward": 0.551339328289032,
      "reward_std": 0.19426269829273224,
      "rewards/verify_math_reward/mean": 0.5513392686843872,
      "rewards/verify_math_reward/std": 0.4976350665092468,
      "step": 831
    },
    {
      "clip_ratio/high_max": 0.0015487432574445847,
      "clip_ratio/high_mean": 0.0004490351029744488,
      "clip_ratio/low_mean": 0.00037584222036457504,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008248773301602341,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3966.0,
      "completions/mean_length": 663.177490234375,
      "completions/mean_terminated_length": 596.7860717773438,
      "completions/min_length": 154.0,
      "completions/min_terminated_length": 154.0,
      "epoch": 7.7745698454359875,
      "grad_norm": 0.1240234375,
      "learning_rate": 1e-06,
      "loss": 0.0148,
      "num_tokens": 484074440.0,
      "reward": 0.5022321939468384,
      "reward_std": 0.18464843928813934,
      "rewards/verify_math_reward/mean": 0.5022321343421936,
      "rewards/verify_math_reward/std": 0.5002743005752563,
      "step": 832
    },
    {
      "clip_ratio/high_max": 0.0013228818834249978,
      "clip_ratio/high_mean": 0.00035903547359339427,
      "clip_ratio/low_mean": 0.00034873718891503813,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007077726659190375,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3382.0,
      "completions/mean_length": 580.5714721679688,
      "completions/mean_terminated_length": 536.8768310546875,
      "completions/min_length": 148.0,
      "completions/min_terminated_length": 148.0,
      "epoch": 7.783902012248469,
      "grad_norm": 0.1279296875,
      "learning_rate": 1e-06,
      "loss": 0.0042,
      "num_tokens": 484637768.0,
      "reward": 0.5613839626312256,
      "reward_std": 0.2009527087211609,
      "rewards/verify_math_reward/mean": 0.5613839030265808,
      "rewards/verify_math_reward/std": 0.496494859457016,
      "step": 833
    },
    {
      "clip_ratio/high_max": 0.0014881354891258525,
      "clip_ratio/high_mean": 0.0003941183897495648,
      "clip_ratio/low_mean": 0.00033160380439767323,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007257221918735013,
      "completions/clipped_ratio": 0.004464285714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2027.0,
      "completions/mean_length": 533.6517944335938,
      "completions/mean_terminated_length": 517.6771850585938,
      "completions/min_length": 87.0,
      "completions/min_terminated_length": 87.0,
      "epoch": 7.793234179060951,
      "grad_norm": 0.12060546875,
      "learning_rate": 1e-06,
      "loss": 0.0113,
      "num_tokens": 485196632.0,
      "reward": 0.6082589626312256,
      "reward_std": 0.20316511392593384,
      "rewards/verify_math_reward/mean": 0.6082589030265808,
      "rewards/verify_math_reward/std": 0.48841196298599243,
      "step": 834
    },
    {
      "clip_ratio/high_max": 0.0013740085478275432,
      "clip_ratio/high_mean": 0.0003876993386029426,
      "clip_ratio/low_mean": 0.00024111458844799927,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006288139406933624,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3709.0,
      "completions/mean_length": 644.1317138671875,
      "completions/mean_terminated_length": 581.3704223632812,
      "completions/min_length": 150.0,
      "completions/min_terminated_length": 150.0,
      "epoch": 7.802566345873433,
      "grad_norm": 0.11328125,
      "learning_rate": 1e-06,
      "loss": 0.0136,
      "num_tokens": 485800878.0,
      "reward": 0.5636160969734192,
      "reward_std": 0.17799797654151917,
      "rewards/verify_math_reward/mean": 0.5636160969734192,
      "rewards/verify_math_reward/std": 0.49621346592903137,
      "step": 835
    },
    {
      "clip_ratio/high_max": 0.0015182806509983493,
      "clip_ratio/high_mean": 0.0005289736777740472,
      "clip_ratio/low_mean": 0.0003237526958628223,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008527263762516668,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2466.0,
      "completions/mean_length": 587.7824096679688,
      "completions/mean_terminated_length": 536.1325073242188,
      "completions/min_length": 122.0,
      "completions/min_terminated_length": 122.0,
      "epoch": 7.811898512685914,
      "grad_norm": 0.13671875,
      "learning_rate": 1e-06,
      "loss": 0.003,
      "num_tokens": 486374955.0,
      "reward": 0.5848214626312256,
      "reward_std": 0.20464006066322327,
      "rewards/verify_math_reward/mean": 0.5848214030265808,
      "rewards/verify_math_reward/std": 0.49302801489830017,
      "step": 836
    },
    {
      "clip_ratio/high_max": 0.0011973083001066698,
      "clip_ratio/high_mean": 0.00036646658156769263,
      "clip_ratio/low_mean": 0.00034716221784947265,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007136288108995359,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3990.0,
      "completions/mean_length": 657.4777221679688,
      "completions/mean_terminated_length": 590.97607421875,
      "completions/min_length": 86.0,
      "completions/min_terminated_length": 86.0,
      "epoch": 7.821230679498396,
      "grad_norm": 0.1201171875,
      "learning_rate": 1e-06,
      "loss": 0.0187,
      "num_tokens": 486980327.0,
      "reward": 0.6049107313156128,
      "reward_std": 0.19186343252658844,
      "rewards/verify_math_reward/mean": 0.6049107313156128,
      "rewards/verify_math_reward/std": 0.48914292454719543,
      "step": 837
    },
    {
      "clip_ratio/high_max": 0.0018675804612939828,
      "clip_ratio/high_mean": 0.0005910816469167912,
      "clip_ratio/low_mean": 0.00022987412319253053,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008209557640839193,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3371.0,
      "completions/mean_length": 589.6373291015625,
      "completions/mean_terminated_length": 538.0147094726562,
      "completions/min_length": 137.0,
      "completions/min_terminated_length": 137.0,
      "epoch": 7.830562846310878,
      "grad_norm": 0.1328125,
      "learning_rate": 1e-06,
      "loss": 0.0145,
      "num_tokens": 487542538.0,
      "reward": 0.582589328289032,
      "reward_std": 0.19617946445941925,
      "rewards/verify_math_reward/mean": 0.5825892686843872,
      "rewards/verify_math_reward/std": 0.4934072494506836,
      "step": 838
    },
    {
      "clip_ratio/high_max": 0.0017806277119234437,
      "clip_ratio/high_mean": 0.0005850000522968912,
      "clip_ratio/low_mean": 0.00038751102852074837,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009725110694489558,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3113.0,
      "completions/mean_length": 554.8515625,
      "completions/mean_terminated_length": 522.9493408203125,
      "completions/min_length": 85.0,
      "completions/min_terminated_length": 85.0,
      "epoch": 7.83989501312336,
      "grad_norm": 0.1396484375,
      "learning_rate": 1e-06,
      "loss": 0.0089,
      "num_tokens": 488102197.0,
      "reward": 0.5703125,
      "reward_std": 0.2285723090171814,
      "rewards/verify_math_reward/mean": 0.5703125,
      "rewards/verify_math_reward/std": 0.49530795216560364,
      "step": 839
    },
    {
      "clip_ratio/high_max": 0.001589763724950899,
      "clip_ratio/high_mean": 0.0004951285475272016,
      "clip_ratio/low_mean": 0.0003442013744461292,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008393299222007045,
      "completions/clipped_ratio": 0.004464285714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2171.0,
      "completions/mean_length": 558.1283569335938,
      "completions/mean_terminated_length": 542.2634887695312,
      "completions/min_length": 149.0,
      "completions/min_terminated_length": 149.0,
      "epoch": 7.849227179935841,
      "grad_norm": 0.123046875,
      "learning_rate": 1e-06,
      "loss": -0.0015,
      "num_tokens": 488670776.0,
      "reward": 0.5569196939468384,
      "reward_std": 0.20023521780967712,
      "rewards/verify_math_reward/mean": 0.5569196343421936,
      "rewards/verify_math_reward/std": 0.4970270097255707,
      "step": 840
    },
    {
      "clip_ratio/high_max": 0.0018499267971492372,
      "clip_ratio/high_mean": 0.0005242909760454495,
      "clip_ratio/low_mean": 0.0003799465844167571,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009042375577337225,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2832.0,
      "completions/mean_length": 571.3114013671875,
      "completions/mean_terminated_length": 511.2996826171875,
      "completions/min_length": 20.0,
      "completions/min_terminated_length": 20.0,
      "epoch": 7.858559346748323,
      "grad_norm": 0.142578125,
      "learning_rate": 1e-06,
      "loss": 0.0114,
      "num_tokens": 489218631.0,
      "reward": 0.5948660969734192,
      "reward_std": 0.2321811467409134,
      "rewards/verify_math_reward/mean": 0.5948660969734192,
      "rewards/verify_math_reward/std": 0.49119213223457336,
      "step": 841
    },
    {
      "clip_ratio/high_max": 0.0016465964463350247,
      "clip_ratio/high_mean": 0.0005022620300678682,
      "clip_ratio/low_mean": 0.00042260734466026406,
      "clip_ratio/low_min": 1.0539629329286981e-05,
      "clip_ratio/region_mean": 0.0009248693768313387,
      "completions/clipped_ratio": 0.004464285714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2819.0,
      "completions/mean_length": 600.6574096679688,
      "completions/mean_terminated_length": 584.9832153320312,
      "completions/min_length": 149.0,
      "completions/min_terminated_length": 149.0,
      "epoch": 7.867891513560805,
      "grad_norm": 0.12890625,
      "learning_rate": 1e-06,
      "loss": 0.0142,
      "num_tokens": 489825004.0,
      "reward": 0.512276828289032,
      "reward_std": 0.22286362946033478,
      "rewards/verify_math_reward/mean": 0.5122767686843872,
      "rewards/verify_math_reward/std": 0.500128448009491,
      "step": 842
    },
    {
      "clip_ratio/high_max": 0.0017361270383844385,
      "clip_ratio/high_mean": 0.000524047238741332,
      "clip_ratio/low_mean": 0.0003687840110160323,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008928312572606956,
      "completions/clipped_ratio": 0.005580357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3633.0,
      "completions/mean_length": 535.2902221679688,
      "completions/mean_terminated_length": 515.3086547851562,
      "completions/min_length": 78.0,
      "completions/min_terminated_length": 78.0,
      "epoch": 7.8772236803732865,
      "grad_norm": 0.1337890625,
      "learning_rate": 1e-06,
      "loss": 0.0075,
      "num_tokens": 490373312.0,
      "reward": 0.5736607313156128,
      "reward_std": 0.21714681386947632,
      "rewards/verify_math_reward/mean": 0.5736607313156128,
      "rewards/verify_math_reward/std": 0.4948205351829529,
      "step": 843
    },
    {
      "clip_ratio/high_max": 0.0015636389744031476,
      "clip_ratio/high_mean": 0.00047459722964049433,
      "clip_ratio/low_mean": 0.0003665347717287659,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008411319840888609,
      "completions/clipped_ratio": 0.0200892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2477.0,
      "completions/mean_length": 596.6752319335938,
      "completions/mean_terminated_length": 524.9351196289062,
      "completions/min_length": 130.0,
      "completions/min_terminated_length": 130.0,
      "epoch": 7.886555847185768,
      "grad_norm": 0.12451171875,
      "learning_rate": 1e-06,
      "loss": 0.008,
      "num_tokens": 490917357.0,
      "reward": 0.609375,
      "reward_std": 0.19576901197433472,
      "rewards/verify_math_reward/mean": 0.609375,
      "rewards/verify_math_reward/std": 0.48816296458244324,
      "step": 844
    },
    {
      "clip_ratio/high_max": 0.0017464211996411905,
      "clip_ratio/high_mean": 0.0005997047271648626,
      "clip_ratio/low_mean": 0.0003516608542213362,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009513655941191246,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3439.0,
      "completions/mean_length": 590.2154541015625,
      "completions/mean_terminated_length": 550.646728515625,
      "completions/min_length": 155.0,
      "completions/min_terminated_length": 155.0,
      "epoch": 7.89588801399825,
      "grad_norm": 0.134765625,
      "learning_rate": 1e-06,
      "loss": -0.0099,
      "num_tokens": 491502582.0,
      "reward": 0.551339328289032,
      "reward_std": 0.22849632799625397,
      "rewards/verify_math_reward/mean": 0.5513392686843872,
      "rewards/verify_math_reward/std": 0.4976350665092468,
      "step": 845
    },
    {
      "clip_ratio/high_max": 0.0014702572334499564,
      "clip_ratio/high_mean": 0.0004401191754368483,
      "clip_ratio/low_mean": 0.0003532125886067661,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007933317669994722,
      "completions/clipped_ratio": 0.0078125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2273.0,
      "completions/mean_length": 566.5636596679688,
      "completions/mean_terminated_length": 538.7728271484375,
      "completions/min_length": 126.0,
      "completions/min_terminated_length": 126.0,
      "epoch": 7.905220180810732,
      "grad_norm": 0.130859375,
      "learning_rate": 1e-06,
      "loss": 0.0026,
      "num_tokens": 492078215.0,
      "reward": 0.5647321939468384,
      "reward_std": 0.2274732142686844,
      "rewards/verify_math_reward/mean": 0.5647321343421936,
      "rewards/verify_math_reward/std": 0.49606895446777344,
      "step": 846
    },
    {
      "clip_ratio/high_max": 0.001387545023135317,
      "clip_ratio/high_mean": 0.00039264502515834465,
      "clip_ratio/low_mean": 0.00034647212044092157,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007391171459403267,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2247.0,
      "completions/mean_length": 589.3326416015625,
      "completions/mean_terminated_length": 553.751953125,
      "completions/min_length": 3.0,
      "completions/min_terminated_length": 3.0,
      "epoch": 7.914552347623214,
      "grad_norm": 0.1259765625,
      "learning_rate": 1e-06,
      "loss": 0.0001,
      "num_tokens": 492670225.0,
      "reward": 0.5334821939468384,
      "reward_std": 0.19756846129894257,
      "rewards/verify_math_reward/mean": 0.5334821343421936,
      "rewards/verify_math_reward/std": 0.49915629625320435,
      "step": 847
    },
    {
      "clip_ratio/high_max": 0.0015947225383570185,
      "clip_ratio/high_mean": 0.0005433679509678768,
      "clip_ratio/low_mean": 0.00039698560999568144,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009403535850651679,
      "completions/clipped_ratio": 0.021205357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4054.0,
      "completions/mean_length": 631.8538208007812,
      "completions/mean_terminated_length": 556.8038330078125,
      "completions/min_length": 118.0,
      "completions/min_terminated_length": 118.0,
      "epoch": 7.923884514435695,
      "grad_norm": 0.1416015625,
      "learning_rate": 1e-06,
      "loss": -0.0014,
      "num_tokens": 493250070.0,
      "reward": 0.5223214626312256,
      "reward_std": 0.221356600522995,
      "rewards/verify_math_reward/mean": 0.5223214030265808,
      "rewards/verify_math_reward/std": 0.49978047609329224,
      "step": 848
    },
    {
      "clip_ratio/high_max": 0.0018653158276720205,
      "clip_ratio/high_mean": 0.0006010659449202649,
      "clip_ratio/low_mean": 0.00031246157220721216,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009135275104199536,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3219.0,
      "completions/mean_length": 611.099365234375,
      "completions/mean_terminated_length": 559.792724609375,
      "completions/min_length": 103.0,
      "completions/min_terminated_length": 103.0,
      "epoch": 7.933216681248178,
      "grad_norm": 0.1376953125,
      "learning_rate": 1e-06,
      "loss": 0.0077,
      "num_tokens": 493835599.0,
      "reward": 0.546875,
      "reward_std": 0.2418368011713028,
      "rewards/verify_math_reward/mean": 0.546875,
      "rewards/verify_math_reward/std": 0.4980759024620056,
      "step": 849
    },
    {
      "clip_ratio/high_max": 0.0014806769668211928,
      "clip_ratio/high_mean": 0.0003980743955480648,
      "clip_ratio/low_mean": 0.00033593845381574283,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007340128595387796,
      "completions/clipped_ratio": 0.021205357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3864.0,
      "completions/mean_length": 664.3660888671875,
      "completions/mean_terminated_length": 590.0205078125,
      "completions/min_length": 115.0,
      "completions/min_terminated_length": 115.0,
      "epoch": 7.942548848060659,
      "grad_norm": 0.1171875,
      "learning_rate": 1e-06,
      "loss": 0.004,
      "num_tokens": 494447823.0,
      "reward": 0.520089328289032,
      "reward_std": 0.1957676112651825,
      "rewards/verify_math_reward/mean": 0.5200892686843872,
      "rewards/verify_math_reward/std": 0.4998753070831299,
      "step": 850
    },
    {
      "clip_ratio/high_max": 0.0016430820141977165,
      "clip_ratio/high_mean": 0.0004774774679390248,
      "clip_ratio/low_mean": 0.0002885843025524082,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007660617720830487,
      "completions/clipped_ratio": 0.0200892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3250.0,
      "completions/mean_length": 590.1685791015625,
      "completions/mean_terminated_length": 518.2949829101562,
      "completions/min_length": 115.0,
      "completions/min_terminated_length": 115.0,
      "epoch": 7.951881014873141,
      "grad_norm": 0.1357421875,
      "learning_rate": 1e-06,
      "loss": -0.0007,
      "num_tokens": 494989038.0,
      "reward": 0.5145089626312256,
      "reward_std": 0.21782150864601135,
      "rewards/verify_math_reward/mean": 0.5145089030265808,
      "rewards/verify_math_reward/std": 0.5000685453414917,
      "step": 851
    },
    {
      "clip_ratio/high_max": 0.0016200119262066437,
      "clip_ratio/high_mean": 0.0004887748599458064,
      "clip_ratio/low_mean": 0.0003507403177991364,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008395151726290351,
      "completions/clipped_ratio": 0.0234375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3150.0,
      "completions/mean_length": 673.3984375,
      "completions/mean_terminated_length": 591.2559814453125,
      "completions/min_length": 131.0,
      "completions/min_terminated_length": 131.0,
      "epoch": 7.961213181685623,
      "grad_norm": 0.12060546875,
      "learning_rate": 1e-06,
      "loss": 0.0079,
      "num_tokens": 495595203.0,
      "reward": 0.5267857313156128,
      "reward_std": 0.20497384667396545,
      "rewards/verify_math_reward/mean": 0.5267857313156128,
      "rewards/verify_math_reward/std": 0.4995608627796173,
      "step": 852
    },
    {
      "clip_ratio/high_max": 0.001667117568104004,
      "clip_ratio/high_mean": 0.0005183360390219605,
      "clip_ratio/low_mean": 0.0003972172396515816,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009155532734439475,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2391.0,
      "completions/mean_length": 601.5658569335938,
      "completions/mean_terminated_length": 558.1322021484375,
      "completions/min_length": 96.0,
      "completions/min_terminated_length": 96.0,
      "epoch": 7.970545348498105,
      "grad_norm": 0.130859375,
      "learning_rate": 1e-06,
      "loss": 0.0043,
      "num_tokens": 496170022.0,
      "reward": 0.6015625,
      "reward_std": 0.23897428810596466,
      "rewards/verify_math_reward/mean": 0.6015625,
      "rewards/verify_math_reward/std": 0.48984986543655396,
      "step": 853
    },
    {
      "clip_ratio/high_max": 0.00172699805443699,
      "clip_ratio/high_mean": 0.0005003368455618329,
      "clip_ratio/low_mean": 0.00039065806743110443,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008909949046937982,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4086.0,
      "completions/mean_length": 600.7254638671875,
      "completions/mean_terminated_length": 557.2813720703125,
      "completions/min_length": 113.0,
      "completions/min_terminated_length": 113.0,
      "epoch": 7.979877515310586,
      "grad_norm": 0.130859375,
      "learning_rate": 1e-06,
      "loss": 0.0175,
      "num_tokens": 496748664.0,
      "reward": 0.5926339626312256,
      "reward_std": 0.22199669480323792,
      "rewards/verify_math_reward/mean": 0.5926339030265808,
      "rewards/verify_math_reward/std": 0.49161845445632935,
      "step": 854
    },
    {
      "clip_ratio/high_max": 0.0017066129184968304,
      "clip_ratio/high_mean": 0.0005071540376775374,
      "clip_ratio/low_mean": 0.00031170578449746245,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000818859824903484,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3714.0,
      "completions/mean_length": 618.8314819335938,
      "completions/mean_terminated_length": 571.630126953125,
      "completions/min_length": 114.0,
      "completions/min_terminated_length": 114.0,
      "epoch": 7.989209682123068,
      "grad_norm": 0.12353515625,
      "learning_rate": 1e-06,
      "loss": 0.0049,
      "num_tokens": 497355097.0,
      "reward": 0.5055803656578064,
      "reward_std": 0.22594577074050903,
      "rewards/verify_math_reward/mean": 0.5055803656578064,
      "rewards/verify_math_reward/std": 0.5002480745315552,
      "step": 855
    },
    {
      "clip_ratio/high_max": 0.0014975002650317037,
      "clip_ratio/high_mean": 0.0004614462129666208,
      "clip_ratio/low_mean": 0.00026589575156776846,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007273419596458552,
      "completions/clipped_ratio": 0.0028409090909090606,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2522.0,
      "completions/mean_length": 651.2471923828125,
      "completions/mean_terminated_length": 641.4330444335938,
      "completions/min_length": 139.0,
      "completions/min_terminated_length": 139.0,
      "epoch": 7.99854184893555,
      "grad_norm": 0.1181640625,
      "learning_rate": 1e-06,
      "loss": -0.0062,
      "num_tokens": 497941726.0,
      "reward": 0.578125,
      "reward_std": 0.19820895791053772,
      "rewards/verify_math_reward/mean": 0.578125,
      "rewards/verify_math_reward/std": 0.4941346049308777,
      "step": 856
    },
    {
      "clip_ratio/high_max": 0.0017468438973082812,
      "clip_ratio/high_mean": 0.00045737842401649687,
      "clip_ratio/low_mean": 0.00021875535117032996,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006761337695024849,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3038.0,
      "completions/mean_length": 632.7980346679688,
      "completions/mean_terminated_length": 573.8331909179688,
      "completions/min_length": 148.0,
      "completions/min_terminated_length": 148.0,
      "epoch": 8.009332166812483,
      "grad_norm": 0.11669921875,
      "learning_rate": 1e-06,
      "loss": -0.0082,
      "num_tokens": 498541689.0,
      "reward": 0.5390625,
      "reward_std": 0.17788033187389374,
      "rewards/verify_math_reward/mean": 0.5390625,
      "rewards/verify_math_reward/std": 0.4987502098083496,
      "step": 857
    },
    {
      "clip_ratio/high_max": 0.0016137808270286769,
      "clip_ratio/high_mean": 0.0005060916400907445,
      "clip_ratio/low_mean": 0.00031017795913612645,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008162696040017181,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2810.0,
      "completions/mean_length": 585.9140625,
      "completions/mean_terminated_length": 550.2987060546875,
      "completions/min_length": 167.0,
      "completions/min_terminated_length": 167.0,
      "epoch": 8.018664333624963,
      "grad_norm": 0.1279296875,
      "learning_rate": 1e-06,
      "loss": 0.0194,
      "num_tokens": 499125660.0,
      "reward": 0.6127232313156128,
      "reward_std": 0.2267257571220398,
      "rewards/verify_math_reward/mean": 0.6127232313156128,
      "rewards/verify_math_reward/std": 0.4873998463153839,
      "step": 858
    },
    {
      "clip_ratio/high_max": 0.0014067975353100337,
      "clip_ratio/high_mean": 0.0004142784707710234,
      "clip_ratio/low_mean": 0.000287919314928331,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007021977953627356,
      "completions/clipped_ratio": 0.0279017857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3589.0,
      "completions/mean_length": 677.8326416015625,
      "completions/mean_terminated_length": 579.72216796875,
      "completions/min_length": 105.0,
      "completions/min_terminated_length": 105.0,
      "epoch": 8.027996500437446,
      "grad_norm": 0.126953125,
      "learning_rate": 1e-06,
      "loss": -0.0054,
      "num_tokens": 499715190.0,
      "reward": 0.515625,
      "reward_std": 0.20098520815372467,
      "rewards/verify_math_reward/mean": 0.515625,
      "rewards/verify_math_reward/std": 0.5000349283218384,
      "step": 859
    },
    {
      "clip_ratio/high_max": 0.0015272035479938495,
      "clip_ratio/high_mean": 0.0004498293205870141,
      "clip_ratio/low_mean": 0.0002851049209766643,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000734934239744689,
      "completions/clipped_ratio": 0.024553571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4089.0,
      "completions/mean_length": 686.5045166015625,
      "completions/mean_terminated_length": 600.681884765625,
      "completions/min_length": 66.0,
      "completions/min_terminated_length": 66.0,
      "epoch": 8.037328667249927,
      "grad_norm": 0.119140625,
      "learning_rate": 1e-06,
      "loss": 0.004,
      "num_tokens": 500320754.0,
      "reward": 0.5133928656578064,
      "reward_std": 0.20298008620738983,
      "rewards/verify_math_reward/mean": 0.5133928656578064,
      "rewards/verify_math_reward/std": 0.500099778175354,
      "step": 860
    },
    {
      "clip_ratio/high_max": 0.0017860611260402948,
      "clip_ratio/high_mean": 0.0004994106366211781,
      "clip_ratio/low_mean": 0.0002945915612144745,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007940021796457586,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3768.0,
      "completions/mean_length": 623.6484375,
      "completions/mean_terminated_length": 556.4926147460938,
      "completions/min_length": 138.0,
      "completions/min_terminated_length": 138.0,
      "epoch": 8.04666083406241,
      "grad_norm": 0.12451171875,
      "learning_rate": 1e-06,
      "loss": 0.0032,
      "num_tokens": 500894959.0,
      "reward": 0.5926339626312256,
      "reward_std": 0.18242643773555756,
      "rewards/verify_math_reward/mean": 0.5926339030265808,
      "rewards/verify_math_reward/std": 0.49161848425865173,
      "step": 861
    },
    {
      "clip_ratio/high_max": 0.0016310338151015458,
      "clip_ratio/high_mean": 0.000524143026495949,
      "clip_ratio/low_mean": 0.0003509167015636194,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008750597244215896,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2303.0,
      "completions/mean_length": 605.2600708007812,
      "completions/mean_terminated_length": 549.8515014648438,
      "completions/min_length": 103.0,
      "completions/min_terminated_length": 103.0,
      "epoch": 8.05599300087489,
      "grad_norm": 0.1328125,
      "learning_rate": 1e-06,
      "loss": 0.0087,
      "num_tokens": 501461360.0,
      "reward": 0.5725446939468384,
      "reward_std": 0.2188713699579239,
      "rewards/verify_math_reward/mean": 0.5725446343421936,
      "rewards/verify_math_reward/std": 0.49498558044433594,
      "step": 862
    },
    {
      "clip_ratio/high_max": 0.001550317799228651,
      "clip_ratio/high_mean": 0.00044595811823455733,
      "clip_ratio/low_mean": 0.0003373031987621289,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007832613182472414,
      "completions/clipped_ratio": 0.0078125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3992.0,
      "completions/mean_length": 603.9129638671875,
      "completions/mean_terminated_length": 576.4161987304688,
      "completions/min_length": 3.0,
      "completions/min_terminated_length": 3.0,
      "epoch": 8.065325167687373,
      "grad_norm": 0.1259765625,
      "learning_rate": 1e-06,
      "loss": -0.0028,
      "num_tokens": 502078130.0,
      "reward": 0.5323660969734192,
      "reward_std": 0.19813409447669983,
      "rewards/verify_math_reward/mean": 0.5323660969734192,
      "rewards/verify_math_reward/std": 0.4992299973964691,
      "step": 863
    },
    {
      "clip_ratio/high_max": 0.0015432717054864042,
      "clip_ratio/high_mean": 0.0004493577710036334,
      "clip_ratio/low_mean": 0.00025994005727625336,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007092978230502922,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2681.0,
      "completions/mean_length": 638.0011596679688,
      "completions/mean_terminated_length": 575.12841796875,
      "completions/min_length": 119.0,
      "completions/min_terminated_length": 119.0,
      "epoch": 8.074657334499854,
      "grad_norm": 0.109375,
      "learning_rate": 1e-06,
      "loss": 0.0019,
      "num_tokens": 502670131.0,
      "reward": 0.5446428656578064,
      "reward_std": 0.1755484640598297,
      "rewards/verify_math_reward/mean": 0.5446428656578064,
      "rewards/verify_math_reward/std": 0.4982811510562897,
      "step": 864
    },
    {
      "clip_ratio/high_max": 0.0013704040520678973,
      "clip_ratio/high_mean": 0.0003949880742766254,
      "clip_ratio/low_mean": 0.00026574883054308884,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006607368941331515,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2025.0,
      "completions/mean_length": 585.3627319335938,
      "completions/mean_terminated_length": 549.7418212890625,
      "completions/min_length": 120.0,
      "completions/min_terminated_length": 120.0,
      "epoch": 8.083989501312336,
      "grad_norm": 0.119140625,
      "learning_rate": 1e-06,
      "loss": 0.0025,
      "num_tokens": 503255176.0,
      "reward": 0.535714328289032,
      "reward_std": 0.19892504811286926,
      "rewards/verify_math_reward/mean": 0.5357142686843872,
      "rewards/verify_math_reward/std": 0.4990014135837555,
      "step": 865
    },
    {
      "clip_ratio/high_max": 0.001541744650239707,
      "clip_ratio/high_mean": 0.0004173470326804818,
      "clip_ratio/low_mean": 0.0003949804329295148,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008123274710669648,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3504.0,
      "completions/mean_length": 554.9542846679688,
      "completions/mean_terminated_length": 519.0247802734375,
      "completions/min_length": 148.0,
      "completions/min_terminated_length": 148.0,
      "epoch": 8.093321668124817,
      "grad_norm": 0.1318359375,
      "learning_rate": 1e-06,
      "loss": 0.0058,
      "num_tokens": 503801191.0,
      "reward": 0.5546875,
      "reward_std": 0.2021905481815338,
      "rewards/verify_math_reward/mean": 0.5546875,
      "rewards/verify_math_reward/std": 0.4972778558731079,
      "step": 866
    },
    {
      "clip_ratio/high_max": 0.0014549273764714599,
      "clip_ratio/high_mean": 0.0004922729364125189,
      "clip_ratio/low_mean": 0.0003672184179777105,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008594913520028058,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3110.0,
      "completions/mean_length": 664.5748291015625,
      "completions/mean_terminated_length": 586.231689453125,
      "completions/min_length": 141.0,
      "completions/min_terminated_length": 141.0,
      "epoch": 8.1026538349373,
      "grad_norm": 0.1240234375,
      "learning_rate": 1e-06,
      "loss": -0.005,
      "num_tokens": 504395434.0,
      "reward": 0.515625,
      "reward_std": 0.2237556129693985,
      "rewards/verify_math_reward/mean": 0.515625,
      "rewards/verify_math_reward/std": 0.5000349283218384,
      "step": 867
    },
    {
      "clip_ratio/high_max": 0.001549756049826101,
      "clip_ratio/high_mean": 0.0004758235231747676,
      "clip_ratio/low_mean": 0.00033826272351689113,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008140862391883275,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3846.0,
      "completions/mean_length": 652.9654541015625,
      "completions/mean_terminated_length": 574.3572998046875,
      "completions/min_length": 79.0,
      "completions/min_terminated_length": 79.0,
      "epoch": 8.11198600174978,
      "grad_norm": 0.1259765625,
      "learning_rate": 1e-06,
      "loss": 0.0048,
      "num_tokens": 504996371.0,
      "reward": 0.546875,
      "reward_std": 0.22138871252536774,
      "rewards/verify_math_reward/mean": 0.546875,
      "rewards/verify_math_reward/std": 0.4980759024620056,
      "step": 868
    },
    {
      "clip_ratio/high_max": 0.0018592611049825791,
      "clip_ratio/high_mean": 0.0006670099746770575,
      "clip_ratio/low_mean": 0.0003113480520369194,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009783580208022613,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4070.0,
      "completions/mean_length": 620.5167846679688,
      "completions/mean_terminated_length": 573.3382568359375,
      "completions/min_length": 158.0,
      "completions/min_terminated_length": 158.0,
      "epoch": 8.121318168562263,
      "grad_norm": 0.134765625,
      "learning_rate": 1e-06,
      "loss": 0.01,
      "num_tokens": 505585362.0,
      "reward": 0.5345982313156128,
      "reward_std": 0.22462351620197296,
      "rewards/verify_math_reward/mean": 0.5345982313156128,
      "rewards/verify_math_reward/std": 0.4990801215171814,
      "step": 869
    },
    {
      "clip_ratio/high_max": 0.001431892036634963,
      "clip_ratio/high_mean": 0.00047108674198170775,
      "clip_ratio/low_mean": 0.0002798161339114813,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007509028839649545,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3136.0,
      "completions/mean_length": 600.5971069335938,
      "completions/mean_terminated_length": 565.1307373046875,
      "completions/min_length": 83.0,
      "completions/min_terminated_length": 83.0,
      "epoch": 8.130650335374744,
      "grad_norm": 0.1220703125,
      "learning_rate": 1e-06,
      "loss": -0.0015,
      "num_tokens": 506172017.0,
      "reward": 0.5524553656578064,
      "reward_std": 0.1953885406255722,
      "rewards/verify_math_reward/mean": 0.5524553656578064,
      "rewards/verify_math_reward/std": 0.49751853942871094,
      "step": 870
    },
    {
      "clip_ratio/high_max": 0.001366339684864215,
      "clip_ratio/high_mean": 0.00037103207421296247,
      "clip_ratio/low_mean": 0.00021886720855945896,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0005898992758375243,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3893.0,
      "completions/mean_length": 616.9107666015625,
      "completions/mean_terminated_length": 561.6870727539062,
      "completions/min_length": 58.0,
      "completions/min_terminated_length": 58.0,
      "epoch": 8.139982502187227,
      "grad_norm": 0.1103515625,
      "learning_rate": 1e-06,
      "loss": 0.0024,
      "num_tokens": 506762737.0,
      "reward": 0.5725446939468384,
      "reward_std": 0.14530527591705322,
      "rewards/verify_math_reward/mean": 0.5725446343421936,
      "rewards/verify_math_reward/std": 0.49498558044433594,
      "step": 871
    },
    {
      "clip_ratio/high_max": 0.001973413000087021,
      "clip_ratio/high_mean": 0.000655198623007891,
      "clip_ratio/low_mean": 0.00040153223619654455,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010567308781901374,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2826.0,
      "completions/mean_length": 610.005615234375,
      "completions/mean_terminated_length": 550.6527099609375,
      "completions/min_length": 113.0,
      "completions/min_terminated_length": 113.0,
      "epoch": 8.149314668999708,
      "grad_norm": 0.138671875,
      "learning_rate": 1e-06,
      "loss": -0.0009,
      "num_tokens": 507344262.0,
      "reward": 0.5502232313156128,
      "reward_std": 0.2408924549818039,
      "rewards/verify_math_reward/mean": 0.5502232313156128,
      "rewards/verify_math_reward/std": 0.49774909019470215,
      "step": 872
    },
    {
      "clip_ratio/high_max": 0.001684861388639547,
      "clip_ratio/high_mean": 0.0004855446362626026,
      "clip_ratio/low_mean": 0.00039934766266469524,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000884892295289319,
      "completions/clipped_ratio": 0.021205357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2789.0,
      "completions/mean_length": 620.5480346679688,
      "completions/mean_terminated_length": 545.2531127929688,
      "completions/min_length": 134.0,
      "completions/min_terminated_length": 134.0,
      "epoch": 8.15864683581219,
      "grad_norm": 0.1318359375,
      "learning_rate": 1e-06,
      "loss": 0.0095,
      "num_tokens": 507915161.0,
      "reward": 0.5959821939468384,
      "reward_std": 0.22131451964378357,
      "rewards/verify_math_reward/mean": 0.5959821343421936,
      "rewards/verify_math_reward/std": 0.490975022315979,
      "step": 873
    },
    {
      "clip_ratio/high_max": 0.0015606476281391224,
      "clip_ratio/high_mean": 0.00042545598171273014,
      "clip_ratio/low_mean": 0.00034125207560009585,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007667080399187398,
      "completions/clipped_ratio": 0.029017857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3050.0,
      "completions/mean_length": 680.0346069335938,
      "completions/mean_terminated_length": 577.9483032226562,
      "completions/min_length": 90.0,
      "completions/min_terminated_length": 90.0,
      "epoch": 8.167979002624673,
      "grad_norm": 0.12890625,
      "learning_rate": 1e-06,
      "loss": -0.0118,
      "num_tokens": 508510536.0,
      "reward": 0.5111607313156128,
      "reward_std": 0.21319912374019623,
      "rewards/verify_math_reward/mean": 0.5111607313156128,
      "rewards/verify_math_reward/std": 0.5001546144485474,
      "step": 874
    },
    {
      "clip_ratio/high_max": 0.0017300440740655176,
      "clip_ratio/high_mean": 0.0005284130988911784,
      "clip_ratio/low_mean": 0.0004068345499490533,
      "clip_ratio/low_min": 9.404152478964534e-06,
      "clip_ratio/region_mean": 0.0009352476290587219,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3966.0,
      "completions/mean_length": 580.1183471679688,
      "completions/mean_terminated_length": 536.4180908203125,
      "completions/min_length": 116.0,
      "completions/min_terminated_length": 116.0,
      "epoch": 8.177311169437154,
      "grad_norm": 0.140625,
      "learning_rate": 1e-06,
      "loss": -0.0019,
      "num_tokens": 509065762.0,
      "reward": 0.551339328289032,
      "reward_std": 0.23045022785663605,
      "rewards/verify_math_reward/mean": 0.5513392686843872,
      "rewards/verify_math_reward/std": 0.4976350665092468,
      "step": 875
    },
    {
      "clip_ratio/high_max": 0.0018924691394204274,
      "clip_ratio/high_mean": 0.0005723708322875609,
      "clip_ratio/low_mean": 0.0003268855334681575,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008992563516585506,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3051.0,
      "completions/mean_length": 545.927490234375,
      "completions/mean_terminated_length": 501.8022766113281,
      "completions/min_length": 27.0,
      "completions/min_terminated_length": 27.0,
      "epoch": 8.186643336249636,
      "grad_norm": 0.1376953125,
      "learning_rate": 1e-06,
      "loss": -0.0035,
      "num_tokens": 509602385.0,
      "reward": 0.6350446939468384,
      "reward_std": 0.1925356090068817,
      "rewards/verify_math_reward/mean": 0.6350446343421936,
      "rewards/verify_math_reward/std": 0.481686532497406,
      "step": 876
    },
    {
      "clip_ratio/high_max": 0.0017362269009026932,
      "clip_ratio/high_mean": 0.0005923977596467012,
      "clip_ratio/low_mean": 0.00032261033845770726,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009150081050393055,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3127.0,
      "completions/mean_length": 588.0670166015625,
      "completions/mean_terminated_length": 544.465576171875,
      "completions/min_length": 22.0,
      "completions/min_terminated_length": 22.0,
      "epoch": 8.195975503062117,
      "grad_norm": 0.140625,
      "learning_rate": 1e-06,
      "loss": -0.006,
      "num_tokens": 510183957.0,
      "reward": 0.5625,
      "reward_std": 0.2516414523124695,
      "rewards/verify_math_reward/mean": 0.5625,
      "rewards/verify_math_reward/std": 0.49635544419288635,
      "step": 877
    },
    {
      "clip_ratio/high_max": 0.0015896023887762567,
      "clip_ratio/high_mean": 0.0004748531939640088,
      "clip_ratio/low_mean": 0.00027660682280838955,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007514600101785618,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3736.0,
      "completions/mean_length": 615.380615234375,
      "completions/mean_terminated_length": 564.1370239257812,
      "completions/min_length": 29.0,
      "completions/min_terminated_length": 29.0,
      "epoch": 8.2053076698746,
      "grad_norm": 0.1279296875,
      "learning_rate": 1e-06,
      "loss": 0.0083,
      "num_tokens": 510775410.0,
      "reward": 0.5479910969734192,
      "reward_std": 0.20804892480373383,
      "rewards/verify_math_reward/mean": 0.5479910969734192,
      "rewards/verify_math_reward/std": 0.49796950817108154,
      "step": 878
    },
    {
      "clip_ratio/high_max": 0.00159483886363887,
      "clip_ratio/high_mean": 0.0004746732116700514,
      "clip_ratio/low_mean": 0.00036302139142208034,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008376946052521816,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3666.0,
      "completions/mean_length": 620.8092041015625,
      "completions/mean_terminated_length": 565.6473999023438,
      "completions/min_length": 75.0,
      "completions/min_terminated_length": 75.0,
      "epoch": 8.21463983668708,
      "grad_norm": 0.12890625,
      "learning_rate": 1e-06,
      "loss": 0.009,
      "num_tokens": 511366687.0,
      "reward": 0.5758928656578064,
      "reward_std": 0.2101939469575882,
      "rewards/verify_math_reward/mean": 0.5758928656578064,
      "rewards/verify_math_reward/std": 0.49448272585868835,
      "step": 879
    },
    {
      "clip_ratio/high_max": 0.001336234889095067,
      "clip_ratio/high_mean": 0.0004071303610544419,
      "clip_ratio/low_mean": 0.00035937676511821337,
      "clip_ratio/low_min": 1.193659318232676e-05,
      "clip_ratio/region_mean": 0.0007665071143492241,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3234.0,
      "completions/mean_length": 662.5580444335938,
      "completions/mean_terminated_length": 608.0589599609375,
      "completions/min_length": 146.0,
      "completions/min_terminated_length": 146.0,
      "epoch": 8.223972003499563,
      "grad_norm": 0.12451171875,
      "learning_rate": 1e-06,
      "loss": 0.0062,
      "num_tokens": 511992147.0,
      "reward": 0.4933035969734192,
      "reward_std": 0.2275192141532898,
      "rewards/verify_math_reward/mean": 0.4933035671710968,
      "rewards/verify_math_reward/std": 0.5002344250679016,
      "step": 880
    },
    {
      "clip_ratio/high_max": 0.0013866669723938685,
      "clip_ratio/high_mean": 0.0004217528789922653,
      "clip_ratio/low_mean": 0.00035159288358954655,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007733457623544382,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2742.0,
      "completions/mean_length": 579.2767944335938,
      "completions/mean_terminated_length": 539.5846557617188,
      "completions/min_length": 103.0,
      "completions/min_terminated_length": 103.0,
      "epoch": 8.233304170312044,
      "grad_norm": 0.1298828125,
      "learning_rate": 1e-06,
      "loss": 0.0022,
      "num_tokens": 512565643.0,
      "reward": 0.5334821939468384,
      "reward_std": 0.20963124930858612,
      "rewards/verify_math_reward/mean": 0.5334821343421936,
      "rewards/verify_math_reward/std": 0.49915632605552673,
      "step": 881
    },
    {
      "clip_ratio/high_max": 0.0015374592385342112,
      "clip_ratio/high_mean": 0.00041051021048588154,
      "clip_ratio/low_mean": 0.0003006694947202959,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007111797053767077,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2941.0,
      "completions/mean_length": 630.0580444335938,
      "completions/mean_terminated_length": 571.0465698242188,
      "completions/min_length": 123.0,
      "completions/min_terminated_length": 123.0,
      "epoch": 8.242636337124527,
      "grad_norm": 0.1181640625,
      "learning_rate": 1e-06,
      "loss": -0.0128,
      "num_tokens": 513172247.0,
      "reward": 0.5066964626312256,
      "reward_std": 0.1846049576997757,
      "rewards/verify_math_reward/mean": 0.5066964030265808,
      "rewards/verify_math_reward/std": 0.5002344250679016,
      "step": 882
    },
    {
      "clip_ratio/high_max": 0.001493399352511915,
      "clip_ratio/high_mean": 0.00046457714313419274,
      "clip_ratio/low_mean": 0.0003219285948716788,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007865057505114237,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3827.0,
      "completions/mean_length": 592.3683471679688,
      "completions/mean_terminated_length": 540.7859497070312,
      "completions/min_length": 3.0,
      "completions/min_terminated_length": 3.0,
      "epoch": 8.251968503937007,
      "grad_norm": 0.1298828125,
      "learning_rate": 1e-06,
      "loss": 0.0062,
      "num_tokens": 513739473.0,
      "reward": 0.5234375,
      "reward_std": 0.20418290793895721,
      "rewards/verify_math_reward/mean": 0.5234375,
      "rewards/verify_math_reward/std": 0.49972933530807495,
      "step": 883
    },
    {
      "clip_ratio/high_max": 0.0017472907547926297,
      "clip_ratio/high_mean": 0.000569048528291205,
      "clip_ratio/low_mean": 0.00032808198898237606,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008971305105660576,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2687.0,
      "completions/mean_length": 614.0592041015625,
      "completions/mean_terminated_length": 558.790283203125,
      "completions/min_length": 115.0,
      "completions/min_terminated_length": 115.0,
      "epoch": 8.26130067074949,
      "grad_norm": 0.130859375,
      "learning_rate": 1e-06,
      "loss": -0.0027,
      "num_tokens": 514315126.0,
      "reward": 0.546875,
      "reward_std": 0.23694662749767303,
      "rewards/verify_math_reward/mean": 0.546875,
      "rewards/verify_math_reward/std": 0.4980759024620056,
      "step": 884
    },
    {
      "clip_ratio/high_max": 0.001324815347288677,
      "clip_ratio/high_mean": 0.00040013637385527545,
      "clip_ratio/low_mean": 0.00033200555776602414,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007321419179788791,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3376.0,
      "completions/mean_length": 603.4777221679688,
      "completions/mean_terminated_length": 556.06787109375,
      "completions/min_length": 23.0,
      "completions/min_terminated_length": 23.0,
      "epoch": 8.27063283756197,
      "grad_norm": 0.12060546875,
      "learning_rate": 1e-06,
      "loss": 0.0027,
      "num_tokens": 514904130.0,
      "reward": 0.5580357313156128,
      "reward_std": 0.1940016895532608,
      "rewards/verify_math_reward/mean": 0.5580357313156128,
      "rewards/verify_math_reward/std": 0.49689781665802,
      "step": 885
    },
    {
      "clip_ratio/high_max": 0.00172890922294755,
      "clip_ratio/high_mean": 0.0005497201705111365,
      "clip_ratio/low_mean": 0.0003320003434055252,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000881720520737872,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3352.0,
      "completions/mean_length": 633.2824096679688,
      "completions/mean_terminated_length": 578.318603515625,
      "completions/min_length": 7.0,
      "completions/min_terminated_length": 7.0,
      "epoch": 8.279965004374453,
      "grad_norm": 0.130859375,
      "learning_rate": 1e-06,
      "loss": -0.0013,
      "num_tokens": 515504991.0,
      "reward": 0.5290178656578064,
      "reward_std": 0.22721359133720398,
      "rewards/verify_math_reward/mean": 0.5290178656578064,
      "rewards/verify_math_reward/std": 0.49943605065345764,
      "step": 886
    },
    {
      "clip_ratio/high_max": 0.0017464477114117472,
      "clip_ratio/high_mean": 0.0005554093470436783,
      "clip_ratio/low_mean": 0.000385710791533711,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009411201281182002,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3482.0,
      "completions/mean_length": 611.4553833007812,
      "completions/mean_terminated_length": 556.1451416015625,
      "completions/min_length": 126.0,
      "completions/min_terminated_length": 126.0,
      "epoch": 8.289297171186934,
      "grad_norm": 0.1396484375,
      "learning_rate": 1e-06,
      "loss": -0.0111,
      "num_tokens": 516080047.0,
      "reward": 0.5792410969734192,
      "reward_std": 0.2399599701166153,
      "rewards/verify_math_reward/mean": 0.5792410969734192,
      "rewards/verify_math_reward/std": 0.49395665526390076,
      "step": 887
    },
    {
      "clip_ratio/high_max": 0.0013682266153409728,
      "clip_ratio/high_mean": 0.00042073265478848043,
      "clip_ratio/low_mean": 0.00032234091304417234,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007430735649904818,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2794.0,
      "completions/mean_length": 623.6864013671875,
      "completions/mean_terminated_length": 560.5534057617188,
      "completions/min_length": 152.0,
      "completions/min_terminated_length": 152.0,
      "epoch": 8.298629337999417,
      "grad_norm": 0.11328125,
      "learning_rate": 1e-06,
      "loss": -0.0049,
      "num_tokens": 516663334.0,
      "reward": 0.5580357313156128,
      "reward_std": 0.19422556459903717,
      "rewards/verify_math_reward/mean": 0.5580357313156128,
      "rewards/verify_math_reward/std": 0.49689778685569763,
      "step": 888
    },
    {
      "clip_ratio/high_max": 0.0017015608918882208,
      "clip_ratio/high_mean": 0.0005660527640429791,
      "clip_ratio/low_mean": 0.00033774187238577724,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009037946379066852,
      "completions/clipped_ratio": 0.006696428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2412.0,
      "completions/mean_length": 571.3158569335938,
      "completions/mean_terminated_length": 547.553955078125,
      "completions/min_length": 127.0,
      "completions/min_terminated_length": 127.0,
      "epoch": 8.307961504811898,
      "grad_norm": 0.12890625,
      "learning_rate": 1e-06,
      "loss": -0.0025,
      "num_tokens": 517237145.0,
      "reward": 0.629464328289032,
      "reward_std": 0.22267001867294312,
      "rewards/verify_math_reward/mean": 0.6294642686843872,
      "rewards/verify_math_reward/std": 0.4832179844379425,
      "step": 889
    },
    {
      "clip_ratio/high_max": 0.001672685058110801,
      "clip_ratio/high_mean": 0.0005197747411784803,
      "clip_ratio/low_mean": 0.00034178967371190083,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008615644155725022,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4022.0,
      "completions/mean_length": 606.5770263671875,
      "completions/mean_terminated_length": 575.1408081054688,
      "completions/min_length": 97.0,
      "completions/min_terminated_length": 97.0,
      "epoch": 8.31729367162438,
      "grad_norm": 0.1337890625,
      "learning_rate": 1e-06,
      "loss": 0.0063,
      "num_tokens": 517841462.0,
      "reward": 0.5580357313156128,
      "reward_std": 0.20351210236549377,
      "rewards/verify_math_reward/mean": 0.5580357313156128,
      "rewards/verify_math_reward/std": 0.49689778685569763,
      "step": 890
    },
    {
      "clip_ratio/high_max": 0.0017001188316498883,
      "clip_ratio/high_mean": 0.0004989069307157479,
      "clip_ratio/low_mean": 0.0003887211655637657,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008876280899130506,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3648.0,
      "completions/mean_length": 647.0201416015625,
      "completions/mean_terminated_length": 568.2762451171875,
      "completions/min_length": 73.0,
      "completions/min_terminated_length": 73.0,
      "epoch": 8.326625838436861,
      "grad_norm": 0.12890625,
      "learning_rate": 1e-06,
      "loss": 0.0004,
      "num_tokens": 518435216.0,
      "reward": 0.5234375,
      "reward_std": 0.2187972068786621,
      "rewards/verify_math_reward/mean": 0.5234375,
      "rewards/verify_math_reward/std": 0.49972933530807495,
      "step": 891
    },
    {
      "clip_ratio/high_max": 0.0013056448278803146,
      "clip_ratio/high_mean": 0.0003652055950169597,
      "clip_ratio/low_mean": 0.00026464711004337005,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000629852710517298,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3523.0,
      "completions/mean_length": 560.3660888671875,
      "completions/mean_terminated_length": 520.4605102539062,
      "completions/min_length": 120.0,
      "completions/min_terminated_length": 120.0,
      "epoch": 8.335958005249344,
      "grad_norm": 0.1201171875,
      "learning_rate": 1e-06,
      "loss": 0.0142,
      "num_tokens": 518984328.0,
      "reward": 0.5245535969734192,
      "reward_std": 0.16578476130962372,
      "rewards/verify_math_reward/mean": 0.5245535969734192,
      "rewards/verify_math_reward/std": 0.4996756613254547,
      "step": 892
    },
    {
      "clip_ratio/high_max": 0.0015380546774395043,
      "clip_ratio/high_mean": 0.0004793887990217627,
      "clip_ratio/low_mean": 0.0004007861812169722,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008801749663689407,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2718.0,
      "completions/mean_length": 621.4654541015625,
      "completions/mean_terminated_length": 554.267333984375,
      "completions/min_length": 75.0,
      "completions/min_terminated_length": 75.0,
      "epoch": 8.345290172061826,
      "grad_norm": 0.134765625,
      "learning_rate": 1e-06,
      "loss": 0.0033,
      "num_tokens": 519552665.0,
      "reward": 0.5390625,
      "reward_std": 0.2326594591140747,
      "rewards/verify_math_reward/mean": 0.5390625,
      "rewards/verify_math_reward/std": 0.4987502098083496,
      "step": 893
    },
    {
      "clip_ratio/high_max": 0.001598576205651625,
      "clip_ratio/high_mean": 0.0004480711572796281,
      "clip_ratio/low_mean": 0.00033639293883425125,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007844641063456947,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3772.0,
      "completions/mean_length": 589.677490234375,
      "completions/mean_terminated_length": 534.0215454101562,
      "completions/min_length": 129.0,
      "completions/min_terminated_length": 129.0,
      "epoch": 8.354622338874307,
      "grad_norm": 0.1240234375,
      "learning_rate": 1e-06,
      "loss": 0.0049,
      "num_tokens": 520117168.0,
      "reward": 0.559151828289032,
      "reward_std": 0.1829584687948227,
      "rewards/verify_math_reward/mean": 0.5591517686843872,
      "rewards/verify_math_reward/std": 0.496766060590744,
      "step": 894
    },
    {
      "clip_ratio/high_max": 0.0017098172429541592,
      "clip_ratio/high_mean": 0.0004787050828554129,
      "clip_ratio/low_mean": 0.0003222945865672955,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008009996477085224,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3288.0,
      "completions/mean_length": 630.203125,
      "completions/mean_terminated_length": 587.1254272460938,
      "completions/min_length": 140.0,
      "completions/min_terminated_length": 140.0,
      "epoch": 8.36395450568679,
      "grad_norm": 0.130859375,
      "learning_rate": 1e-06,
      "loss": -0.0095,
      "num_tokens": 520720246.0,
      "reward": 0.5133928656578064,
      "reward_std": 0.23247580230236053,
      "rewards/verify_math_reward/mean": 0.5133928656578064,
      "rewards/verify_math_reward/std": 0.500099778175354,
      "step": 895
    },
    {
      "clip_ratio/high_max": 0.001812557320590713,
      "clip_ratio/high_mean": 0.0005399327089889994,
      "clip_ratio/low_mean": 0.00036290496541369066,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009028376880451106,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2805.0,
      "completions/mean_length": 595.1596069335938,
      "completions/mean_terminated_length": 559.6380615234375,
      "completions/min_length": 112.0,
      "completions/min_terminated_length": 112.0,
      "epoch": 8.37328667249927,
      "grad_norm": 0.1337890625,
      "learning_rate": 1e-06,
      "loss": -0.0006,
      "num_tokens": 521300029.0,
      "reward": 0.5736607313156128,
      "reward_std": 0.22007529437541962,
      "rewards/verify_math_reward/mean": 0.5736607313156128,
      "rewards/verify_math_reward/std": 0.4948205351829529,
      "step": 896
    },
    {
      "clip_ratio/high_max": 0.0017018866819853429,
      "clip_ratio/high_mean": 0.0005414952763658221,
      "clip_ratio/low_mean": 0.00036395597271621227,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009054512547663762,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3087.0,
      "completions/mean_length": 579.1640625,
      "completions/mean_terminated_length": 523.3412475585938,
      "completions/min_length": 111.0,
      "completions/min_terminated_length": 111.0,
      "epoch": 8.382618839311753,
      "grad_norm": 0.1416015625,
      "learning_rate": 1e-06,
      "loss": 0.0156,
      "num_tokens": 521862336.0,
      "reward": 0.5524553656578064,
      "reward_std": 0.23157496750354767,
      "rewards/verify_math_reward/mean": 0.5524553656578064,
      "rewards/verify_math_reward/std": 0.49751853942871094,
      "step": 897
    },
    {
      "clip_ratio/high_max": 0.0017398423597114743,
      "clip_ratio/high_mean": 0.0004925803399373763,
      "clip_ratio/low_mean": 0.0002706510886127944,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007632314345755731,
      "completions/clipped_ratio": 0.021205357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3169.0,
      "completions/mean_length": 619.138427734375,
      "completions/mean_terminated_length": 543.81298828125,
      "completions/min_length": 146.0,
      "completions/min_terminated_length": 146.0,
      "epoch": 8.391951006124234,
      "grad_norm": 0.1279296875,
      "learning_rate": 1e-06,
      "loss": -0.0002,
      "num_tokens": 522416276.0,
      "reward": 0.5926339626312256,
      "reward_std": 0.17652484774589539,
      "rewards/verify_math_reward/mean": 0.5926339030265808,
      "rewards/verify_math_reward/std": 0.49161848425865173,
      "step": 898
    },
    {
      "clip_ratio/high_max": 0.0018027021014859201,
      "clip_ratio/high_mean": 0.0005538426175917266,
      "clip_ratio/low_mean": 0.000342303311242631,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008961459388956428,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4075.0,
      "completions/mean_length": 581.357177734375,
      "completions/mean_terminated_length": 517.4545288085938,
      "completions/min_length": 99.0,
      "completions/min_terminated_length": 99.0,
      "epoch": 8.401283172936717,
      "grad_norm": 0.140625,
      "learning_rate": 1e-06,
      "loss": -0.0162,
      "num_tokens": 522963740.0,
      "reward": 0.6149553656578064,
      "reward_std": 0.194911390542984,
      "rewards/verify_math_reward/mean": 0.6149553656578064,
      "rewards/verify_math_reward/std": 0.4868776500225067,
      "step": 899
    },
    {
      "clip_ratio/high_max": 0.001807417580494075,
      "clip_ratio/high_mean": 0.0005836440525399667,
      "clip_ratio/low_mean": 0.00032918493911893165,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009128289784712251,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3649.0,
      "completions/mean_length": 598.7299194335938,
      "completions/mean_terminated_length": 539.18505859375,
      "completions/min_length": 131.0,
      "completions/min_terminated_length": 131.0,
      "epoch": 8.410615339749198,
      "grad_norm": 0.1328125,
      "learning_rate": 1e-06,
      "loss": 0.0139,
      "num_tokens": 523519162.0,
      "reward": 0.5881696939468384,
      "reward_std": 0.24081578850746155,
      "rewards/verify_math_reward/mean": 0.5881696343421936,
      "rewards/verify_math_reward/std": 0.4924395978450775,
      "step": 900
    },
    {
      "clip_ratio/high_max": 0.001764618269589846,
      "clip_ratio/high_mean": 0.0006486259676421469,
      "clip_ratio/low_mean": 0.00039002792368592054,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001038653904288367,
      "completions/clipped_ratio": 0.0078125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3339.0,
      "completions/mean_length": 594.5267944335938,
      "completions/mean_terminated_length": 566.9561767578125,
      "completions/min_length": 83.0,
      "completions/min_terminated_length": 83.0,
      "epoch": 8.41994750656168,
      "grad_norm": 0.140625,
      "learning_rate": 1e-06,
      "loss": -0.006,
      "num_tokens": 524106306.0,
      "reward": 0.5714285969734192,
      "reward_std": 0.27230456471443176,
      "rewards/verify_math_reward/mean": 0.5714285969734192,
      "rewards/verify_math_reward/std": 0.49514806270599365,
      "step": 901
    },
    {
      "clip_ratio/high_max": 0.0017868690065370174,
      "clip_ratio/high_mean": 0.0005425563986136694,
      "clip_ratio/low_mean": 0.00028931020460731816,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008318666150444187,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3925.0,
      "completions/mean_length": 599.1808471679688,
      "completions/mean_terminated_length": 535.6022338867188,
      "completions/min_length": 99.0,
      "completions/min_terminated_length": 99.0,
      "epoch": 8.429279673374161,
      "grad_norm": 0.130859375,
      "learning_rate": 1e-06,
      "loss": 0.0081,
      "num_tokens": 524658764.0,
      "reward": 0.6383928656578064,
      "reward_std": 0.2141416221857071,
      "rewards/verify_math_reward/mean": 0.6383928656578064,
      "rewards/verify_math_reward/std": 0.4807341694831848,
      "step": 902
    },
    {
      "clip_ratio/high_max": 0.0019628958307293942,
      "clip_ratio/high_mean": 0.0005937970076956844,
      "clip_ratio/low_mean": 0.00027018056778160826,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008639775796837057,
      "completions/clipped_ratio": 0.0234375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2488.0,
      "completions/mean_length": 624.4788208007812,
      "completions/mean_terminated_length": 541.1622924804688,
      "completions/min_length": 84.0,
      "completions/min_terminated_length": 84.0,
      "epoch": 8.438611840186644,
      "grad_norm": 0.130859375,
      "learning_rate": 1e-06,
      "loss": 0.0016,
      "num_tokens": 525219713.0,
      "reward": 0.5636160969734192,
      "reward_std": 0.20820976793766022,
      "rewards/verify_math_reward/mean": 0.5636160969734192,
      "rewards/verify_math_reward/std": 0.49621346592903137,
      "step": 903
    },
    {
      "clip_ratio/high_max": 0.0018190665196016198,
      "clip_ratio/high_mean": 0.0005865290181645832,
      "clip_ratio/low_mean": 0.00030423904979670624,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008907680658012396,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3736.0,
      "completions/mean_length": 621.4375,
      "completions/mean_terminated_length": 582.2212524414062,
      "completions/min_length": 80.0,
      "completions/min_terminated_length": 80.0,
      "epoch": 8.447944006999125,
      "grad_norm": 0.12890625,
      "learning_rate": 1e-06,
      "loss": -0.0116,
      "num_tokens": 525832361.0,
      "reward": 0.5613839626312256,
      "reward_std": 0.2127137929201126,
      "rewards/verify_math_reward/mean": 0.5613839030265808,
      "rewards/verify_math_reward/std": 0.496494859457016,
      "step": 904
    },
    {
      "clip_ratio/high_max": 0.0014852175208943663,
      "clip_ratio/high_mean": 0.0004753192365569703,
      "clip_ratio/low_mean": 0.0003577102679628297,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008330295122505049,
      "completions/clipped_ratio": 0.0078125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3497.0,
      "completions/mean_length": 591.8460083007812,
      "completions/mean_terminated_length": 564.2542114257812,
      "completions/min_length": 109.0,
      "completions/min_terminated_length": 109.0,
      "epoch": 8.457276173811607,
      "grad_norm": 0.1328125,
      "learning_rate": 1e-06,
      "loss": -0.0043,
      "num_tokens": 526429847.0,
      "reward": 0.5948660969734192,
      "reward_std": 0.22135479748249054,
      "rewards/verify_math_reward/mean": 0.5948660969734192,
      "rewards/verify_math_reward/std": 0.49119213223457336,
      "step": 905
    },
    {
      "clip_ratio/high_max": 0.0015947114407026675,
      "clip_ratio/high_mean": 0.0005013735790271312,
      "clip_ratio/low_mean": 0.0003463989635292819,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008477725486955023,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3874.0,
      "completions/mean_length": 601.2801513671875,
      "completions/mean_terminated_length": 545.8084106445312,
      "completions/min_length": 4.0,
      "completions/min_terminated_length": 4.0,
      "epoch": 8.466608340624088,
      "grad_norm": 0.1279296875,
      "learning_rate": 1e-06,
      "loss": -0.0073,
      "num_tokens": 526997170.0,
      "reward": 0.582589328289032,
      "reward_std": 0.2290155589580536,
      "rewards/verify_math_reward/mean": 0.5825892686843872,
      "rewards/verify_math_reward/std": 0.493407279253006,
      "step": 906
    },
    {
      "clip_ratio/high_max": 0.0015031150460345089,
      "clip_ratio/high_mean": 0.0004442498941443773,
      "clip_ratio/low_mean": 0.00031960154296939436,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007638514266545826,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4062.0,
      "completions/mean_length": 638.3671875,
      "completions/mean_terminated_length": 571.4959716796875,
      "completions/min_length": 19.0,
      "completions/min_terminated_length": 19.0,
      "epoch": 8.47594050743657,
      "grad_norm": 0.1259765625,
      "learning_rate": 1e-06,
      "loss": -0.0051,
      "num_tokens": 527584963.0,
      "reward": 0.515625,
      "reward_std": 0.21602025628089905,
      "rewards/verify_math_reward/mean": 0.515625,
      "rewards/verify_math_reward/std": 0.5000349283218384,
      "step": 907
    },
    {
      "clip_ratio/high_max": 0.0016060644757089904,
      "clip_ratio/high_mean": 0.00042741316269712115,
      "clip_ratio/low_mean": 0.0004014176520286128,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008288308108603815,
      "completions/clipped_ratio": 0.021205357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3937.0,
      "completions/mean_length": 672.1986694335938,
      "completions/mean_terminated_length": 598.0227661132812,
      "completions/min_length": 145.0,
      "completions/min_terminated_length": 145.0,
      "epoch": 8.485272674249051,
      "grad_norm": 0.1259765625,
      "learning_rate": 1e-06,
      "loss": 0.0025,
      "num_tokens": 528192661.0,
      "reward": 0.527901828289032,
      "reward_std": 0.20783139765262604,
      "rewards/verify_math_reward/mean": 0.5279017686843872,
      "rewards/verify_math_reward/std": 0.49949970841407776,
      "step": 908
    },
    {
      "clip_ratio/high_max": 0.0017307647158304462,
      "clip_ratio/high_mean": 0.0005103231901557592,
      "clip_ratio/low_mean": 0.0003590532381849698,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008693764434610785,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2978.0,
      "completions/mean_length": 589.1830444335938,
      "completions/mean_terminated_length": 545.5955200195312,
      "completions/min_length": 162.0,
      "completions/min_terminated_length": 162.0,
      "epoch": 8.494604841061534,
      "grad_norm": 0.1259765625,
      "learning_rate": 1e-06,
      "loss": 0.0098,
      "num_tokens": 528761569.0,
      "reward": 0.5546875,
      "reward_std": 0.21060903370380402,
      "rewards/verify_math_reward/mean": 0.5546875,
      "rewards/verify_math_reward/std": 0.4972778558731079,
      "step": 909
    },
    {
      "clip_ratio/high_max": 0.0014862043444736628,
      "clip_ratio/high_mean": 0.000418830484704813,
      "clip_ratio/low_mean": 0.0003197139348003475,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007385444196188473,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3903.0,
      "completions/mean_length": 613.7210083007812,
      "completions/mean_terminated_length": 574.4176025390625,
      "completions/min_length": 150.0,
      "completions/min_terminated_length": 150.0,
      "epoch": 8.503937007874015,
      "grad_norm": 0.12353515625,
      "learning_rate": 1e-06,
      "loss": -0.003,
      "num_tokens": 529370567.0,
      "reward": 0.551339328289032,
      "reward_std": 0.1918955147266388,
      "rewards/verify_math_reward/mean": 0.5513392686843872,
      "rewards/verify_math_reward/std": 0.4976350665092468,
      "step": 910
    },
    {
      "clip_ratio/high_max": 0.0019107366424577776,
      "clip_ratio/high_mean": 0.0005375444156925369,
      "clip_ratio/low_mean": 0.0003362418125334443,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008737862326597678,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2913.0,
      "completions/mean_length": 535.8973388671875,
      "completions/mean_terminated_length": 503.8243408203125,
      "completions/min_length": 88.0,
      "completions/min_terminated_length": 88.0,
      "epoch": 8.513269174686497,
      "grad_norm": 0.142578125,
      "learning_rate": 1e-06,
      "loss": 0.0118,
      "num_tokens": 529906819.0,
      "reward": 0.637276828289032,
      "reward_std": 0.21214716136455536,
      "rewards/verify_math_reward/mean": 0.6372767686843872,
      "rewards/verify_math_reward/std": 0.481054425239563,
      "step": 911
    },
    {
      "clip_ratio/high_max": 0.001241295964064193,
      "clip_ratio/high_mean": 0.0003295468716260075,
      "clip_ratio/low_mean": 0.0003396690580075301,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006692159340673243,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2876.0,
      "completions/mean_length": 642.2723388671875,
      "completions/mean_terminated_length": 591.4246826171875,
      "completions/min_length": 106.0,
      "completions/min_terminated_length": 106.0,
      "epoch": 8.52260134149898,
      "grad_norm": 0.107421875,
      "learning_rate": 1e-06,
      "loss": 0.0041,
      "num_tokens": 530523247.0,
      "reward": 0.4743303656578064,
      "reward_std": 0.18013623356819153,
      "rewards/verify_math_reward/mean": 0.4743303656578064,
      "rewards/verify_math_reward/std": 0.4996195435523987,
      "step": 912
    },
    {
      "clip_ratio/high_max": 0.0017086550105887,
      "clip_ratio/high_mean": 0.0005698565296370361,
      "clip_ratio/low_mean": 0.00038646468397018907,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009563212051943992,
      "completions/clipped_ratio": 0.021205357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3678.0,
      "completions/mean_length": 650.2902221679688,
      "completions/mean_terminated_length": 575.6396484375,
      "completions/min_length": 21.0,
      "completions/min_terminated_length": 21.0,
      "epoch": 8.531933508311461,
      "grad_norm": 0.1376953125,
      "learning_rate": 1e-06,
      "loss": -0.0207,
      "num_tokens": 531113371.0,
      "reward": 0.5245535969734192,
      "reward_std": 0.2622414231300354,
      "rewards/verify_math_reward/mean": 0.5245535969734192,
      "rewards/verify_math_reward/std": 0.4996756613254547,
      "step": 913
    },
    {
      "clip_ratio/high_max": 0.001687218486040365,
      "clip_ratio/high_mean": 0.0005116880656714784,
      "clip_ratio/low_mean": 0.00029640564264354907,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008080937104750774,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3667.0,
      "completions/mean_length": 553.7745971679688,
      "completions/mean_terminated_length": 517.8331298828125,
      "completions/min_length": 85.0,
      "completions/min_terminated_length": 85.0,
      "epoch": 8.541265675123944,
      "grad_norm": 0.130859375,
      "learning_rate": 1e-06,
      "loss": 0.0119,
      "num_tokens": 531666953.0,
      "reward": 0.598214328289032,
      "reward_std": 0.2105737179517746,
      "rewards/verify_math_reward/mean": 0.5982142686843872,
      "rewards/verify_math_reward/std": 0.49053287506103516,
      "step": 914
    },
    {
      "clip_ratio/high_max": 0.0017619947393541224,
      "clip_ratio/high_mean": 0.0005559896108024986,
      "clip_ratio/low_mean": 0.00040431408615404507,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009603036887710914,
      "completions/clipped_ratio": 0.006696428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1687.0,
      "completions/mean_length": 547.2835083007812,
      "completions/mean_terminated_length": 523.3595581054688,
      "completions/min_length": 96.0,
      "completions/min_terminated_length": 96.0,
      "epoch": 8.550597841936424,
      "grad_norm": 0.142578125,
      "learning_rate": 1e-06,
      "loss": 0.0063,
      "num_tokens": 532221319.0,
      "reward": 0.5848214626312256,
      "reward_std": 0.24525383114814758,
      "rewards/verify_math_reward/mean": 0.5848214030265808,
      "rewards/verify_math_reward/std": 0.49302801489830017,
      "step": 915
    },
    {
      "clip_ratio/high_max": 0.0018598817950987723,
      "clip_ratio/high_mean": 0.000556495856926631,
      "clip_ratio/low_mean": 0.0002807374728490686,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008372333286388312,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3952.0,
      "completions/mean_length": 689.4676513671875,
      "completions/mean_terminated_length": 639.3148193359375,
      "completions/min_length": 163.0,
      "completions/min_terminated_length": 163.0,
      "epoch": 8.559930008748907,
      "grad_norm": 0.11328125,
      "learning_rate": 1e-06,
      "loss": -0.0036,
      "num_tokens": 532884850.0,
      "reward": 0.463169664144516,
      "reward_std": 0.20293870568275452,
      "rewards/verify_math_reward/mean": 0.4631696343421936,
      "rewards/verify_math_reward/std": 0.49892017245292664,
      "step": 916
    },
    {
      "clip_ratio/high_max": 0.0014546530601364793,
      "clip_ratio/high_mean": 0.0003646654126896465,
      "clip_ratio/low_mean": 0.0003260940266045509,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006907594361109659,
      "completions/clipped_ratio": 0.021205357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3705.0,
      "completions/mean_length": 644.5960083007812,
      "completions/mean_terminated_length": 569.8220825195312,
      "completions/min_length": 6.0,
      "completions/min_terminated_length": 6.0,
      "epoch": 8.569262175561388,
      "grad_norm": 0.12353515625,
      "learning_rate": 1e-06,
      "loss": 0.007,
      "num_tokens": 533472992.0,
      "reward": 0.5323660969734192,
      "reward_std": 0.18359534442424774,
      "rewards/verify_math_reward/mean": 0.5323660969734192,
      "rewards/verify_math_reward/std": 0.4992299973964691,
      "step": 917
    },
    {
      "clip_ratio/high_max": 0.0015795235085533932,
      "clip_ratio/high_mean": 0.00046314455744322913,
      "clip_ratio/low_mean": 0.0003992133807741993,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008623579442428309,
      "completions/clipped_ratio": 0.006696428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2971.0,
      "completions/mean_length": 577.5100708007812,
      "completions/mean_terminated_length": 553.7899169921875,
      "completions/min_length": 14.0,
      "completions/min_terminated_length": 14.0,
      "epoch": 8.57859434237387,
      "grad_norm": 0.1328125,
      "learning_rate": 1e-06,
      "loss": -0.0053,
      "num_tokens": 534059841.0,
      "reward": 0.5245535969734192,
      "reward_std": 0.2202172726392746,
      "rewards/verify_math_reward/mean": 0.5245535969734192,
      "rewards/verify_math_reward/std": 0.4996756911277771,
      "step": 918
    },
    {
      "clip_ratio/high_max": 0.0017360732308588922,
      "clip_ratio/high_mean": 0.0004725495975890226,
      "clip_ratio/low_mean": 0.0003561232105084855,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008286728188977577,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4052.0,
      "completions/mean_length": 590.1506958007812,
      "completions/mean_terminated_length": 534.5022583007812,
      "completions/min_length": 147.0,
      "completions/min_terminated_length": 147.0,
      "epoch": 8.587926509186351,
      "grad_norm": 0.1279296875,
      "learning_rate": 1e-06,
      "loss": 0.0038,
      "num_tokens": 534626792.0,
      "reward": 0.578125,
      "reward_std": 0.21685144305229187,
      "rewards/verify_math_reward/mean": 0.578125,
      "rewards/verify_math_reward/std": 0.4941346049308777,
      "step": 919
    },
    {
      "clip_ratio/high_max": 0.001625804466129921,
      "clip_ratio/high_mean": 0.0004773313697796766,
      "clip_ratio/low_mean": 0.0003072223100843985,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007845536779313989,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3969.0,
      "completions/mean_length": 633.7678833007812,
      "completions/mean_terminated_length": 578.8118286132812,
      "completions/min_length": 11.0,
      "completions/min_terminated_length": 11.0,
      "epoch": 8.597258675998834,
      "grad_norm": 0.123046875,
      "learning_rate": 1e-06,
      "loss": -0.0061,
      "num_tokens": 535220032.0,
      "reward": 0.5055803656578064,
      "reward_std": 0.2061368077993393,
      "rewards/verify_math_reward/mean": 0.5055803656578064,
      "rewards/verify_math_reward/std": 0.5002480745315552,
      "step": 920
    },
    {
      "clip_ratio/high_max": 0.0016396561859437497,
      "clip_ratio/high_mean": 0.00047457321988986223,
      "clip_ratio/low_mean": 0.00030810346515863785,
      "clip_ratio/low_min": 6.510416824312415e-06,
      "clip_ratio/region_mean": 0.0007826766786820372,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3387.0,
      "completions/mean_length": 604.4319458007812,
      "completions/mean_terminated_length": 540.9488525390625,
      "completions/min_length": 11.0,
      "completions/min_terminated_length": 11.0,
      "epoch": 8.606590842811315,
      "grad_norm": 0.12158203125,
      "learning_rate": 1e-06,
      "loss": 0.0067,
      "num_tokens": 535778747.0,
      "reward": 0.574776828289032,
      "reward_std": 0.20955273509025574,
      "rewards/verify_math_reward/mean": 0.5747767686843872,
      "rewards/verify_math_reward/std": 0.49465295672416687,
      "step": 921
    },
    {
      "clip_ratio/high_max": 0.001730506738567783,
      "clip_ratio/high_mean": 0.0004712760774054914,
      "clip_ratio/low_mean": 0.00031183343355678517,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007831095163055579,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2737.0,
      "completions/mean_length": 622.1160888671875,
      "completions/mean_terminated_length": 570.9716796875,
      "completions/min_length": 128.0,
      "completions/min_terminated_length": 128.0,
      "epoch": 8.615923009623797,
      "grad_norm": 0.1259765625,
      "learning_rate": 1e-06,
      "loss": 0.0122,
      "num_tokens": 536367027.0,
      "reward": 0.5089285969734192,
      "reward_std": 0.21117106080055237,
      "rewards/verify_math_reward/mean": 0.5089285969734192,
      "rewards/verify_math_reward/std": 0.5001994967460632,
      "step": 922
    },
    {
      "clip_ratio/high_max": 0.0016254750898951897,
      "clip_ratio/high_mean": 0.0004993629991076887,
      "clip_ratio/low_mean": 0.0003328687778321182,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008322317871716223,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2945.0,
      "completions/mean_length": 619.6920166015625,
      "completions/mean_terminated_length": 564.512451171875,
      "completions/min_length": 132.0,
      "completions/min_terminated_length": 132.0,
      "epoch": 8.625255176436278,
      "grad_norm": 0.13671875,
      "learning_rate": 1e-06,
      "loss": 0.0072,
      "num_tokens": 536961111.0,
      "reward": 0.5636160969734192,
      "reward_std": 0.21432778239250183,
      "rewards/verify_math_reward/mean": 0.5636160969734192,
      "rewards/verify_math_reward/std": 0.49621346592903137,
      "step": 923
    },
    {
      "clip_ratio/high_max": 0.001721602246107068,
      "clip_ratio/high_mean": 0.0005219461934302672,
      "clip_ratio/low_mean": 0.00037469376366061624,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008966399600467412,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3136.0,
      "completions/mean_length": 544.1953125,
      "completions/mean_terminated_length": 508.15667724609375,
      "completions/min_length": 12.0,
      "completions/min_terminated_length": 12.0,
      "epoch": 8.63458734324876,
      "grad_norm": 0.1455078125,
      "learning_rate": 1e-06,
      "loss": -0.0005,
      "num_tokens": 537504166.0,
      "reward": 0.5256696939468384,
      "reward_std": 0.2325085699558258,
      "rewards/verify_math_reward/mean": 0.5256696343421936,
      "rewards/verify_math_reward/std": 0.4996195435523987,
      "step": 924
    },
    {
      "clip_ratio/high_max": 0.0015194864399745711,
      "clip_ratio/high_mean": 0.0003873891405419272,
      "clip_ratio/low_mean": 0.0002880939252918324,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006754830624231545,
      "completions/clipped_ratio": 0.0200892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3740.0,
      "completions/mean_length": 638.0569458007812,
      "completions/mean_terminated_length": 567.1651611328125,
      "completions/min_length": 120.0,
      "completions/min_terminated_length": 120.0,
      "epoch": 8.643919510061242,
      "grad_norm": 0.1181640625,
      "learning_rate": 1e-06,
      "loss": -0.0067,
      "num_tokens": 538094113.0,
      "reward": 0.5178571939468384,
      "reward_std": 0.17461372911930084,
      "rewards/verify_math_reward/mean": 0.5178571343421936,
      "rewards/verify_math_reward/std": 0.4999600946903229,
      "step": 925
    },
    {
      "clip_ratio/high_max": 0.0017375241059198743,
      "clip_ratio/high_mean": 0.0005021859069529455,
      "clip_ratio/low_mean": 0.0003631916688391357,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008653775994389434,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3134.0,
      "completions/mean_length": 587.3660888671875,
      "completions/mean_terminated_length": 547.7652587890625,
      "completions/min_length": 96.0,
      "completions/min_terminated_length": 96.0,
      "epoch": 8.653251676873724,
      "grad_norm": 0.1337890625,
      "learning_rate": 1e-06,
      "loss": 0.0153,
      "num_tokens": 538667313.0,
      "reward": 0.559151828289032,
      "reward_std": 0.20756672322750092,
      "rewards/verify_math_reward/mean": 0.5591517686843872,
      "rewards/verify_math_reward/std": 0.496766060590744,
      "step": 926
    },
    {
      "clip_ratio/high_max": 0.0016017565640140674,
      "clip_ratio/high_mean": 0.0005030020404319657,
      "clip_ratio/low_mean": 0.0002697615398119524,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007727635820629075,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2242.0,
      "completions/mean_length": 576.4017944335938,
      "completions/mean_terminated_length": 516.4767456054688,
      "completions/min_length": 57.0,
      "completions/min_terminated_length": 57.0,
      "epoch": 8.662583843686207,
      "grad_norm": 0.12255859375,
      "learning_rate": 1e-06,
      "loss": 0.0042,
      "num_tokens": 539213545.0,
      "reward": 0.5703125,
      "reward_std": 0.18716463446617126,
      "rewards/verify_math_reward/mean": 0.5703125,
      "rewards/verify_math_reward/std": 0.49530795216560364,
      "step": 927
    },
    {
      "clip_ratio/high_max": 0.0015024595559225418,
      "clip_ratio/high_mean": 0.00048641150249295606,
      "clip_ratio/low_mean": 0.0004322054810472764,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009186169900203822,
      "completions/clipped_ratio": 0.0200892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2956.0,
      "completions/mean_length": 636.2377319335938,
      "completions/mean_terminated_length": 565.3086547851562,
      "completions/min_length": 22.0,
      "completions/min_terminated_length": 22.0,
      "epoch": 8.671916010498688,
      "grad_norm": 0.1279296875,
      "learning_rate": 1e-06,
      "loss": 0.0102,
      "num_tokens": 539798990.0,
      "reward": 0.527901828289032,
      "reward_std": 0.23334342241287231,
      "rewards/verify_math_reward/mean": 0.5279017686843872,
      "rewards/verify_math_reward/std": 0.49949970841407776,
      "step": 928
    },
    {
      "clip_ratio/high_max": 0.0017354858355247416,
      "clip_ratio/high_mean": 0.0004994459477529745,
      "clip_ratio/low_mean": 0.00038305372936520143,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008824996702969656,
      "completions/clipped_ratio": 0.0200892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3582.0,
      "completions/mean_length": 636.1439819335938,
      "completions/mean_terminated_length": 565.2130126953125,
      "completions/min_length": 150.0,
      "completions/min_terminated_length": 150.0,
      "epoch": 8.68124817731117,
      "grad_norm": 0.140625,
      "learning_rate": 1e-06,
      "loss": 0.0171,
      "num_tokens": 540383943.0,
      "reward": 0.5569196939468384,
      "reward_std": 0.21733295917510986,
      "rewards/verify_math_reward/mean": 0.5569196343421936,
      "rewards/verify_math_reward/std": 0.49702703952789307,
      "step": 929
    },
    {
      "clip_ratio/high_max": 0.001857759898484801,
      "clip_ratio/high_mean": 0.0005302980175656558,
      "clip_ratio/low_mean": 0.0004215425344682444,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009518405659036944,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2731.0,
      "completions/mean_length": 608.7756958007812,
      "completions/mean_terminated_length": 549.40185546875,
      "completions/min_length": 100.0,
      "completions/min_terminated_length": 100.0,
      "epoch": 8.690580344123651,
      "grad_norm": 0.142578125,
      "learning_rate": 1e-06,
      "loss": -0.0039,
      "num_tokens": 540954278.0,
      "reward": 0.5848214626312256,
      "reward_std": 0.23435191810131073,
      "rewards/verify_math_reward/mean": 0.5848214030265808,
      "rewards/verify_math_reward/std": 0.49302801489830017,
      "step": 930
    },
    {
      "clip_ratio/high_max": 0.0019121235836792039,
      "clip_ratio/high_mean": 0.0005733485695600393,
      "clip_ratio/low_mean": 0.0002883373324493732,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000861685887684871,
      "completions/clipped_ratio": 0.0200892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2975.0,
      "completions/mean_length": 681.7667846679688,
      "completions/mean_terminated_length": 611.7711181640625,
      "completions/min_length": 143.0,
      "completions/min_terminated_length": 143.0,
      "epoch": 8.699912510936134,
      "grad_norm": 0.11767578125,
      "learning_rate": 1e-06,
      "loss": -0.0076,
      "num_tokens": 541584853.0,
      "reward": 0.5089285969734192,
      "reward_std": 0.214814230799675,
      "rewards/verify_math_reward/mean": 0.5089285969734192,
      "rewards/verify_math_reward/std": 0.5001994967460632,
      "step": 931
    },
    {
      "clip_ratio/high_max": 0.0017335543361696182,
      "clip_ratio/high_mean": 0.0005107215254156472,
      "clip_ratio/low_mean": 0.00042112849746445136,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009318500187873724,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2328.0,
      "completions/mean_length": 611.8170166015625,
      "completions/mean_terminated_length": 552.4949340820312,
      "completions/min_length": 4.0,
      "completions/min_terminated_length": 4.0,
      "epoch": 8.709244677748615,
      "grad_norm": 0.138671875,
      "learning_rate": 1e-06,
      "loss": -0.0003,
      "num_tokens": 542165209.0,
      "reward": 0.5368303656578064,
      "reward_std": 0.21086977422237396,
      "rewards/verify_math_reward/mean": 0.5368303656578064,
      "rewards/verify_math_reward/std": 0.49892017245292664,
      "step": 932
    },
    {
      "clip_ratio/high_max": 0.0015037285702419467,
      "clip_ratio/high_mean": 0.0003934235918450213,
      "clip_ratio/low_mean": 0.00019559146517167392,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0005890150528102822,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2591.0,
      "completions/mean_length": 662.232177734375,
      "completions/mean_terminated_length": 599.7999877929688,
      "completions/min_length": 142.0,
      "completions/min_terminated_length": 142.0,
      "epoch": 8.718576844561097,
      "grad_norm": 0.1142578125,
      "learning_rate": 1e-06,
      "loss": 0.002,
      "num_tokens": 542781721.0,
      "reward": 0.5658482313156128,
      "reward_std": 0.1543617695569992,
      "rewards/verify_math_reward/mean": 0.5658482313156128,
      "rewards/verify_math_reward/std": 0.49592188000679016,
      "step": 933
    },
    {
      "clip_ratio/high_max": 0.0014332880964502692,
      "clip_ratio/high_mean": 0.000394102932318674,
      "clip_ratio/low_mean": 0.0003036897494439472,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006977926789204503,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3637.0,
      "completions/mean_length": 633.5926513671875,
      "completions/mean_terminated_length": 590.5570678710938,
      "completions/min_length": 89.0,
      "completions/min_terminated_length": 89.0,
      "epoch": 8.727909011373578,
      "grad_norm": 0.1142578125,
      "learning_rate": 1e-06,
      "loss": 0.0142,
      "num_tokens": 543390532.0,
      "reward": 0.5011160969734192,
      "reward_std": 0.1767512857913971,
      "rewards/verify_math_reward/mean": 0.5011160969734192,
      "rewards/verify_math_reward/std": 0.5002779960632324,
      "step": 934
    },
    {
      "clip_ratio/high_max": 0.0018787748595059384,
      "clip_ratio/high_mean": 0.0005562072410612018,
      "clip_ratio/low_mean": 0.000399612187038656,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009558194296914735,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2410.0,
      "completions/mean_length": 611.9888916015625,
      "completions/mean_terminated_length": 544.6074829101562,
      "completions/min_length": 8.0,
      "completions/min_terminated_length": 8.0,
      "epoch": 8.73724117818606,
      "grad_norm": 0.13671875,
      "learning_rate": 1e-06,
      "loss": 0.0181,
      "num_tokens": 543948706.0,
      "reward": 0.5602678656578064,
      "reward_std": 0.21763533353805542,
      "rewards/verify_math_reward/mean": 0.5602678656578064,
      "rewards/verify_math_reward/std": 0.4966317415237427,
      "step": 935
    },
    {
      "clip_ratio/high_max": 0.0017085592990042642,
      "clip_ratio/high_mean": 0.0005232095326164199,
      "clip_ratio/low_mean": 0.00034663820576952276,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008698477295183693,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3782.0,
      "completions/mean_length": 607.091552734375,
      "completions/mean_terminated_length": 551.7120361328125,
      "completions/min_length": 105.0,
      "completions/min_terminated_length": 105.0,
      "epoch": 8.746573344998541,
      "grad_norm": 0.1328125,
      "learning_rate": 1e-06,
      "loss": 0.0103,
      "num_tokens": 544523428.0,
      "reward": 0.5569196939468384,
      "reward_std": 0.1906581073999405,
      "rewards/verify_math_reward/mean": 0.5569196343421936,
      "rewards/verify_math_reward/std": 0.4970270097255707,
      "step": 936
    },
    {
      "clip_ratio/high_max": 0.0014151353498164099,
      "clip_ratio/high_mean": 0.0003932183233246178,
      "clip_ratio/low_mean": 0.00035517708670340653,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007483954195777187,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3904.0,
      "completions/mean_length": 596.171875,
      "completions/mean_terminated_length": 560.66064453125,
      "completions/min_length": 112.0,
      "completions/min_terminated_length": 112.0,
      "epoch": 8.755905511811024,
      "grad_norm": 0.1240234375,
      "learning_rate": 1e-06,
      "loss": 0.0133,
      "num_tokens": 545113950.0,
      "reward": 0.5881696939468384,
      "reward_std": 0.2212025374174118,
      "rewards/verify_math_reward/mean": 0.5881696343421936,
      "rewards/verify_math_reward/std": 0.4924395978450775,
      "step": 937
    },
    {
      "clip_ratio/high_max": 0.0016391848967032274,
      "clip_ratio/high_mean": 0.0005453678943467821,
      "clip_ratio/low_mean": 0.0004193331365058839,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009647010410844814,
      "completions/clipped_ratio": 0.024553571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3744.0,
      "completions/mean_length": 668.4866333007812,
      "completions/mean_terminated_length": 582.2105102539062,
      "completions/min_length": 145.0,
      "completions/min_terminated_length": 145.0,
      "epoch": 8.765237678623505,
      "grad_norm": 0.13671875,
      "learning_rate": 1e-06,
      "loss": 0.0002,
      "num_tokens": 545709474.0,
      "reward": 0.5390625,
      "reward_std": 0.23616454005241394,
      "rewards/verify_math_reward/mean": 0.5390625,
      "rewards/verify_math_reward/std": 0.4987502098083496,
      "step": 938
    },
    {
      "clip_ratio/high_max": 0.0018827369021892082,
      "clip_ratio/high_mean": 0.0005811306814393902,
      "clip_ratio/low_mean": 0.00035539788541427697,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000936528564125183,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3390.0,
      "completions/mean_length": 523.2377319335938,
      "completions/mean_terminated_length": 491.0506896972656,
      "completions/min_length": 64.0,
      "completions/min_terminated_length": 64.0,
      "epoch": 8.774569845435988,
      "grad_norm": 0.15234375,
      "learning_rate": 1e-06,
      "loss": 0.0044,
      "num_tokens": 546229503.0,
      "reward": 0.621651828289032,
      "reward_std": 0.22936254739761353,
      "rewards/verify_math_reward/mean": 0.6216517686843872,
      "rewards/verify_math_reward/std": 0.4852459728717804,
      "step": 939
    },
    {
      "clip_ratio/high_max": 0.0014341303749461076,
      "clip_ratio/high_mean": 0.0003922824550954829,
      "clip_ratio/low_mean": 0.0003290212607680587,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007213037156361679,
      "completions/clipped_ratio": 0.0323660714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3679.0,
      "completions/mean_length": 675.1517944335938,
      "completions/mean_terminated_length": 560.7289428710938,
      "completions/min_length": 111.0,
      "completions/min_terminated_length": 111.0,
      "epoch": 8.783902012248468,
      "grad_norm": 0.1279296875,
      "learning_rate": 1e-06,
      "loss": -0.0016,
      "num_tokens": 546809335.0,
      "reward": 0.5424107313156128,
      "reward_std": 0.17394223809242249,
      "rewards/verify_math_reward/mean": 0.5424107313156128,
      "rewards/verify_math_reward/std": 0.4984763562679291,
      "step": 940
    },
    {
      "clip_ratio/high_max": 0.001455961568353814,
      "clip_ratio/high_mean": 0.00040871775604500726,
      "clip_ratio/low_mean": 0.00029301418430804915,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007017319389888144,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2651.0,
      "completions/mean_length": 573.099365234375,
      "completions/mean_terminated_length": 541.3615112304688,
      "completions/min_length": 143.0,
      "completions/min_terminated_length": 143.0,
      "epoch": 8.793234179060951,
      "grad_norm": 0.11572265625,
      "learning_rate": 1e-06,
      "loss": -0.0069,
      "num_tokens": 547369128.0,
      "reward": 0.6004464626312256,
      "reward_std": 0.205833300948143,
      "rewards/verify_math_reward/mean": 0.6004464030265808,
      "rewards/verify_math_reward/std": 0.49008017778396606,
      "step": 941
    },
    {
      "clip_ratio/high_max": 0.001447814769562683,
      "clip_ratio/high_mean": 0.0004180887731308758,
      "clip_ratio/low_mean": 0.00031519942911018006,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007332881918955536,
      "completions/clipped_ratio": 0.0200892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3975.0,
      "completions/mean_length": 619.9967041015625,
      "completions/mean_terminated_length": 548.734619140625,
      "completions/min_length": 149.0,
      "completions/min_terminated_length": 149.0,
      "epoch": 8.802566345873432,
      "grad_norm": 0.115234375,
      "learning_rate": 1e-06,
      "loss": -0.0099,
      "num_tokens": 547936013.0,
      "reward": 0.543526828289032,
      "reward_std": 0.1766757071018219,
      "rewards/verify_math_reward/mean": 0.5435267686843872,
      "rewards/verify_math_reward/std": 0.49838000535964966,
      "step": 942
    },
    {
      "clip_ratio/high_max": 0.0017402688117726939,
      "clip_ratio/high_mean": 0.0005268613151656609,
      "clip_ratio/low_mean": 0.0003360545338182419,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008629158592157182,
      "completions/clipped_ratio": 0.006696428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3254.0,
      "completions/mean_length": 568.5,
      "completions/mean_terminated_length": 544.7191162109375,
      "completions/min_length": 100.0,
      "completions/min_terminated_length": 100.0,
      "epoch": 8.811898512685914,
      "grad_norm": 0.1279296875,
      "learning_rate": 1e-06,
      "loss": 0.0036,
      "num_tokens": 548510949.0,
      "reward": 0.59375,
      "reward_std": 0.21969692409038544,
      "rewards/verify_math_reward/mean": 0.59375,
      "rewards/verify_math_reward/std": 0.4914066195487976,
      "step": 943
    },
    {
      "clip_ratio/high_max": 0.001513290316779603,
      "clip_ratio/high_mean": 0.0004044888418093251,
      "clip_ratio/low_mean": 0.0002366639280353411,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006411527729142108,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2697.0,
      "completions/mean_length": 634.3314819335938,
      "completions/mean_terminated_length": 579.3843383789062,
      "completions/min_length": 120.0,
      "completions/min_terminated_length": 120.0,
      "epoch": 8.821230679498395,
      "grad_norm": 0.1044921875,
      "learning_rate": 1e-06,
      "loss": -0.0125,
      "num_tokens": 549107558.0,
      "reward": 0.5636160969734192,
      "reward_std": 0.17803147435188293,
      "rewards/verify_math_reward/mean": 0.5636160969734192,
      "rewards/verify_math_reward/std": 0.49621346592903137,
      "step": 944
    },
    {
      "clip_ratio/high_max": 0.0013947562583780382,
      "clip_ratio/high_mean": 0.00038034964654798387,
      "clip_ratio/low_mean": 0.0002776464652924915,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000657996119116433,
      "completions/clipped_ratio": 0.021205357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4080.0,
      "completions/mean_length": 652.15625,
      "completions/mean_terminated_length": 577.546142578125,
      "completions/min_length": 127.0,
      "completions/min_terminated_length": 127.0,
      "epoch": 8.830562846310878,
      "grad_norm": 0.11474609375,
      "learning_rate": 1e-06,
      "loss": 0.0109,
      "num_tokens": 549697938.0,
      "reward": 0.5189732313156128,
      "reward_std": 0.18644829094409943,
      "rewards/verify_math_reward/mean": 0.5189732313156128,
      "rewards/verify_math_reward/std": 0.49991893768310547,
      "step": 945
    },
    {
      "clip_ratio/high_max": 0.0016548984131077304,
      "clip_ratio/high_mean": 0.0004959760540259595,
      "clip_ratio/low_mean": 0.0003435495328858451,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008395255986215489,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2153.0,
      "completions/mean_length": 575.107177734375,
      "completions/mean_terminated_length": 519.219970703125,
      "completions/min_length": 161.0,
      "completions/min_terminated_length": 161.0,
      "epoch": 8.83989501312336,
      "grad_norm": 0.126953125,
      "learning_rate": 1e-06,
      "loss": -0.0065,
      "num_tokens": 550239634.0,
      "reward": 0.5993303656578064,
      "reward_std": 0.20339125394821167,
      "rewards/verify_math_reward/mean": 0.5993303656578064,
      "rewards/verify_math_reward/std": 0.49030786752700806,
      "step": 946
    },
    {
      "clip_ratio/high_max": 0.0016845199843373848,
      "clip_ratio/high_mean": 0.0005099602622067323,
      "clip_ratio/low_mean": 0.0003712894717864401,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008812497280814569,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4045.0,
      "completions/mean_length": 640.068115234375,
      "completions/mean_terminated_length": 581.22705078125,
      "completions/min_length": 150.0,
      "completions/min_terminated_length": 150.0,
      "epoch": 8.849227179935841,
      "grad_norm": 0.125,
      "learning_rate": 1e-06,
      "loss": 0.0171,
      "num_tokens": 550834215.0,
      "reward": 0.5345982313156128,
      "reward_std": 0.21740712225437164,
      "rewards/verify_math_reward/mean": 0.5345982313156128,
      "rewards/verify_math_reward/std": 0.4990801215171814,
      "step": 947
    },
    {
      "clip_ratio/high_max": 0.0016339108569809468,
      "clip_ratio/high_mean": 0.0005902967563997663,
      "clip_ratio/low_mean": 0.00034587329855639837,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009361700476802071,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3251.0,
      "completions/mean_length": 654.1239013671875,
      "completions/mean_terminated_length": 595.5221557617188,
      "completions/min_length": 95.0,
      "completions/min_terminated_length": 95.0,
      "epoch": 8.858559346748324,
      "grad_norm": 0.1298828125,
      "learning_rate": 1e-06,
      "loss": 0.0037,
      "num_tokens": 551448990.0,
      "reward": 0.5535714626312256,
      "reward_std": 0.2292533665895462,
      "rewards/verify_math_reward/mean": 0.5535714030265808,
      "rewards/verify_math_reward/std": 0.4973995089530945,
      "step": 948
    },
    {
      "clip_ratio/high_max": 0.0015780063531565247,
      "clip_ratio/high_mean": 0.0004718756790680345,
      "clip_ratio/low_mean": 0.00032239909978670767,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007942747856759524,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3677.0,
      "completions/mean_length": 621.9564819335938,
      "completions/mean_terminated_length": 558.7920532226562,
      "completions/min_length": 132.0,
      "completions/min_terminated_length": 132.0,
      "epoch": 8.867891513560805,
      "grad_norm": 0.1396484375,
      "learning_rate": 1e-06,
      "loss": 0.0164,
      "num_tokens": 552027495.0,
      "reward": 0.5390625,
      "reward_std": 0.22352877259254456,
      "rewards/verify_math_reward/mean": 0.5390625,
      "rewards/verify_math_reward/std": 0.4987502098083496,
      "step": 949
    },
    {
      "clip_ratio/high_max": 0.0014388215749931987,
      "clip_ratio/high_mean": 0.0004559162789519178,
      "clip_ratio/low_mean": 0.0002821906871304236,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007381069628991099,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2787.0,
      "completions/mean_length": 616.154052734375,
      "completions/mean_terminated_length": 568.9163208007812,
      "completions/min_length": 17.0,
      "completions/min_terminated_length": 17.0,
      "epoch": 8.877223680373287,
      "grad_norm": 0.12353515625,
      "learning_rate": 1e-06,
      "loss": -0.0091,
      "num_tokens": 552620473.0,
      "reward": 0.5558035969734192,
      "reward_std": 0.21075919270515442,
      "rewards/verify_math_reward/mean": 0.5558035969734192,
      "rewards/verify_math_reward/std": 0.49715372920036316,
      "step": 950
    },
    {
      "clip_ratio/high_max": 0.0016259537251244183,
      "clip_ratio/high_mean": 0.0005211812249399372,
      "clip_ratio/low_mean": 0.0003111157900548278,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008322970115841599,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2260.0,
      "completions/mean_length": 577.6864013671875,
      "completions/mean_terminated_length": 537.976318359375,
      "completions/min_length": 22.0,
      "completions/min_terminated_length": 22.0,
      "epoch": 8.886555847185768,
      "grad_norm": 0.1259765625,
      "learning_rate": 1e-06,
      "loss": -0.0057,
      "num_tokens": 553191160.0,
      "reward": 0.5212053656578064,
      "reward_std": 0.19238333404064178,
      "rewards/verify_math_reward/mean": 0.5212053656578064,
      "rewards/verify_math_reward/std": 0.49982914328575134,
      "step": 951
    },
    {
      "clip_ratio/high_max": 0.001612585181646864,
      "clip_ratio/high_mean": 0.0005515304546861444,
      "clip_ratio/low_mean": 0.00037551792172507703,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009270483924410655,
      "completions/clipped_ratio": 0.005580357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3260.0,
      "completions/mean_length": 585.6529541015625,
      "completions/mean_terminated_length": 565.9539794921875,
      "completions/min_length": 151.0,
      "completions/min_terminated_length": 151.0,
      "epoch": 8.89588801399825,
      "grad_norm": 0.1298828125,
      "learning_rate": 1e-06,
      "loss": 0.0,
      "num_tokens": 553785473.0,
      "reward": 0.5558035969734192,
      "reward_std": 0.24123060703277588,
      "rewards/verify_math_reward/mean": 0.5558035969734192,
      "rewards/verify_math_reward/std": 0.49715372920036316,
      "step": 952
    },
    {
      "clip_ratio/high_max": 0.0016049309633672237,
      "clip_ratio/high_mean": 0.000490850989763203,
      "clip_ratio/low_mean": 0.0003469750622571155,
      "clip_ratio/low_min": 1.0735142495832406e-05,
      "clip_ratio/region_mean": 0.000837826057249913,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2830.0,
      "completions/mean_length": 571.872802734375,
      "completions/mean_terminated_length": 540.1239013671875,
      "completions/min_length": 11.0,
      "completions/min_terminated_length": 11.0,
      "epoch": 8.905220180810732,
      "grad_norm": 0.1328125,
      "learning_rate": 1e-06,
      "loss": 0.0052,
      "num_tokens": 554356279.0,
      "reward": 0.5636160969734192,
      "reward_std": 0.21624736487865448,
      "rewards/verify_math_reward/mean": 0.5636160969734192,
      "rewards/verify_math_reward/std": 0.49621346592903137,
      "step": 953
    },
    {
      "clip_ratio/high_max": 0.001735047599140671,
      "clip_ratio/high_mean": 0.0005252989367363625,
      "clip_ratio/low_mean": 0.0003084070613112999,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.00083370600532362,
      "completions/clipped_ratio": 0.005580357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4056.0,
      "completions/mean_length": 573.0792846679688,
      "completions/mean_terminated_length": 553.309814453125,
      "completions/min_length": 122.0,
      "completions/min_terminated_length": 122.0,
      "epoch": 8.914552347623214,
      "grad_norm": 0.119140625,
      "learning_rate": 1e-06,
      "loss": 0.0082,
      "num_tokens": 554940510.0,
      "reward": 0.5814732313156128,
      "reward_std": 0.20305635035037994,
      "rewards/verify_math_reward/mean": 0.5814732313156128,
      "rewards/verify_math_reward/std": 0.4935929775238037,
      "step": 954
    },
    {
      "clip_ratio/high_max": 0.0015454856838914566,
      "clip_ratio/high_mean": 0.0005216419679072715,
      "clip_ratio/low_mean": 0.0003173057908725241,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008389477634409559,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2679.0,
      "completions/mean_length": 617.6864013671875,
      "completions/mean_terminated_length": 574.453125,
      "completions/min_length": 114.0,
      "completions/min_terminated_length": 114.0,
      "epoch": 8.923884514435695,
      "grad_norm": 0.1201171875,
      "learning_rate": 1e-06,
      "loss": 0.0184,
      "num_tokens": 555532621.0,
      "reward": 0.5502232313156128,
      "reward_std": 0.21462947130203247,
      "rewards/verify_math_reward/mean": 0.5502232313156128,
      "rewards/verify_math_reward/std": 0.49774909019470215,
      "step": 955
    },
    {
      "clip_ratio/high_max": 0.001508560942056647,
      "clip_ratio/high_mean": 0.00044567480642854207,
      "clip_ratio/low_mean": 0.000273632164976334,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007193069659479079,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2332.0,
      "completions/mean_length": 634.8984375,
      "completions/mean_terminated_length": 575.9693603515625,
      "completions/min_length": 47.0,
      "completions/min_terminated_length": 47.0,
      "epoch": 8.933216681248178,
      "grad_norm": 0.1298828125,
      "learning_rate": 1e-06,
      "loss": 0.0043,
      "num_tokens": 556130282.0,
      "reward": 0.5345982313156128,
      "reward_std": 0.19155851006507874,
      "rewards/verify_math_reward/mean": 0.5345982313156128,
      "rewards/verify_math_reward/std": 0.4990801215171814,
      "step": 956
    },
    {
      "clip_ratio/high_max": 0.0013757557690041722,
      "clip_ratio/high_mean": 0.00043258950836388976,
      "clip_ratio/low_mean": 0.00033068390450807783,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007632734059370705,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3216.0,
      "completions/mean_length": 643.578125,
      "completions/mean_terminated_length": 584.796875,
      "completions/min_length": 74.0,
      "completions/min_terminated_length": 74.0,
      "epoch": 8.942548848060659,
      "grad_norm": 0.1298828125,
      "learning_rate": 1e-06,
      "loss": -0.0049,
      "num_tokens": 556737168.0,
      "reward": 0.5033482313156128,
      "reward_std": 0.2127547711133957,
      "rewards/verify_math_reward/mean": 0.5033482313156128,
      "rewards/verify_math_reward/std": 0.5002680420875549,
      "step": 957
    },
    {
      "clip_ratio/high_max": 0.0016217505508393515,
      "clip_ratio/high_mean": 0.000501149289902969,
      "clip_ratio/low_mean": 0.00035087403148281737,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008520233359377016,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3266.0,
      "completions/mean_length": 660.2511596679688,
      "completions/mean_terminated_length": 597.782958984375,
      "completions/min_length": 114.0,
      "completions/min_terminated_length": 114.0,
      "epoch": 8.951881014873141,
      "grad_norm": 0.119140625,
      "learning_rate": 1e-06,
      "loss": 0.0019,
      "num_tokens": 557358465.0,
      "reward": 0.5133928656578064,
      "reward_std": 0.20249292254447937,
      "rewards/verify_math_reward/mean": 0.5133928656578064,
      "rewards/verify_math_reward/std": 0.500099778175354,
      "step": 958
    },
    {
      "clip_ratio/high_max": 0.001791343160221004,
      "clip_ratio/high_mean": 0.0005761517129485583,
      "clip_ratio/low_mean": 0.0003766217503198277,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009527734773655538,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3468.0,
      "completions/mean_length": 620.9453125,
      "completions/mean_terminated_length": 553.7371826171875,
      "completions/min_length": 136.0,
      "completions/min_terminated_length": 136.0,
      "epoch": 8.961213181685622,
      "grad_norm": 0.1337890625,
      "learning_rate": 1e-06,
      "loss": -0.0078,
      "num_tokens": 557936408.0,
      "reward": 0.5870535969734192,
      "reward_std": 0.23052439093589783,
      "rewards/verify_math_reward/mean": 0.5870535969734192,
      "rewards/verify_math_reward/std": 0.49263834953308105,
      "step": 959
    },
    {
      "clip_ratio/high_max": 0.0017940017096407246,
      "clip_ratio/high_mean": 0.0005707343857466185,
      "clip_ratio/low_mean": 0.00037262102762269933,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000943355388699274,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3319.0,
      "completions/mean_length": 611.4710083007812,
      "completions/mean_terminated_length": 564.169677734375,
      "completions/min_length": 149.0,
      "completions/min_terminated_length": 149.0,
      "epoch": 8.970545348498105,
      "grad_norm": 0.1328125,
      "learning_rate": 1e-06,
      "loss": 0.0152,
      "num_tokens": 558531854.0,
      "reward": 0.5546875,
      "reward_std": 0.21853618323802948,
      "rewards/verify_math_reward/mean": 0.5546875,
      "rewards/verify_math_reward/std": 0.4972778558731079,
      "step": 960
    },
    {
      "clip_ratio/high_max": 0.0013003966305404902,
      "clip_ratio/high_mean": 0.00037684658400394255,
      "clip_ratio/low_mean": 0.0002891404033107392,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006659869865188739,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3764.0,
      "completions/mean_length": 584.8381958007812,
      "completions/mean_terminated_length": 545.2088012695312,
      "completions/min_length": 3.0,
      "completions/min_terminated_length": 3.0,
      "epoch": 8.979877515310585,
      "grad_norm": 0.12255859375,
      "learning_rate": 1e-06,
      "loss": 0.0018,
      "num_tokens": 559103469.0,
      "reward": 0.6194196939468384,
      "reward_std": 0.18588444590568542,
      "rewards/verify_math_reward/mean": 0.6194196343421936,
      "rewards/verify_math_reward/std": 0.48580074310302734,
      "step": 961
    },
    {
      "clip_ratio/high_max": 0.0017467885804762773,
      "clip_ratio/high_mean": 0.0005397544890684003,
      "clip_ratio/low_mean": 0.00033204105056938715,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008717955324755167,
      "completions/clipped_ratio": 0.021205357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3104.0,
      "completions/mean_length": 635.724365234375,
      "completions/mean_terminated_length": 560.7582397460938,
      "completions/min_length": 130.0,
      "completions/min_terminated_length": 130.0,
      "epoch": 8.989209682123068,
      "grad_norm": 0.138671875,
      "learning_rate": 1e-06,
      "loss": -0.0193,
      "num_tokens": 559692526.0,
      "reward": 0.5055803656578064,
      "reward_std": 0.2199697643518448,
      "rewards/verify_math_reward/mean": 0.5055803656578064,
      "rewards/verify_math_reward/std": 0.5002480745315552,
      "step": 962
    },
    {
      "clip_ratio/high_max": 0.0016496466269018129,
      "clip_ratio/high_mean": 0.00047832449854467995,
      "clip_ratio/low_mean": 0.00039233260883975163,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008706570988579188,
      "completions/clipped_ratio": 0.014204545454545414,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3891.0,
      "completions/mean_length": 637.0966186523438,
      "completions/mean_terminated_length": 587.2564697265625,
      "completions/min_length": 86.0,
      "completions/min_terminated_length": 86.0,
      "epoch": 8.998541848935549,
      "grad_norm": 0.126953125,
      "learning_rate": 1e-06,
      "loss": 0.0017,
      "num_tokens": 560307979.0,
      "reward": 0.4854910969734192,
      "reward_std": 0.21729834377765656,
      "rewards/verify_math_reward/mean": 0.4854910671710968,
      "rewards/verify_math_reward/std": 0.5000686049461365,
      "step": 963
    },
    {
      "clip_ratio/high_max": 0.0015838597200854565,
      "clip_ratio/high_mean": 0.000544780214227103,
      "clip_ratio/low_mean": 0.00034177267389168264,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008865528870956041,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4056.0,
      "completions/mean_length": 635.2299194335938,
      "completions/mean_terminated_length": 604.0518188476562,
      "completions/min_length": 148.0,
      "completions/min_terminated_length": 148.0,
      "epoch": 9.009332166812483,
      "grad_norm": 0.1298828125,
      "learning_rate": 1e-06,
      "loss": 0.0011,
      "num_tokens": 560936585.0,
      "reward": 0.5167410969734192,
      "reward_std": 0.23157496750354767,
      "rewards/verify_math_reward/mean": 0.5167410969734192,
      "rewards/verify_math_reward/std": 0.4999987483024597,
      "step": 964
    },
    {
      "clip_ratio/high_max": 0.0015604611680828384,
      "clip_ratio/high_mean": 0.00045380116466731124,
      "clip_ratio/low_mean": 0.0002617700657765454,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007155712255553226,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2156.0,
      "completions/mean_length": 559.2467041015625,
      "completions/mean_terminated_length": 523.3607177734375,
      "completions/min_length": 134.0,
      "completions/min_terminated_length": 134.0,
      "epoch": 9.018664333624963,
      "grad_norm": 0.1328125,
      "learning_rate": 1e-06,
      "loss": 0.0098,
      "num_tokens": 561486790.0,
      "reward": 0.6149553656578064,
      "reward_std": 0.18280622363090515,
      "rewards/verify_math_reward/mean": 0.6149553656578064,
      "rewards/verify_math_reward/std": 0.4868776500225067,
      "step": 965
    },
    {
      "clip_ratio/high_max": 0.0018859688234442729,
      "clip_ratio/high_mean": 0.0005737175087006108,
      "clip_ratio/low_mean": 0.0003881120976529928,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000961829596235475,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2468.0,
      "completions/mean_length": 606.96875,
      "completions/mean_terminated_length": 527.3104858398438,
      "completions/min_length": 126.0,
      "completions/min_terminated_length": 126.0,
      "epoch": 9.027996500437446,
      "grad_norm": 0.1416015625,
      "learning_rate": 1e-06,
      "loss": 0.0067,
      "num_tokens": 562045386.0,
      "reward": 0.5502232313156128,
      "reward_std": 0.208427295088768,
      "rewards/verify_math_reward/mean": 0.5502232313156128,
      "rewards/verify_math_reward/std": 0.49774909019470215,
      "step": 966
    },
    {
      "clip_ratio/high_max": 0.0016008473776309984,
      "clip_ratio/high_mean": 0.0004954031207944354,
      "clip_ratio/low_mean": 0.0003710629986244385,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008664661168040766,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2614.0,
      "completions/mean_length": 650.2589721679688,
      "completions/mean_terminated_length": 571.5889892578125,
      "completions/min_length": 140.0,
      "completions/min_terminated_length": 140.0,
      "epoch": 9.037328667249927,
      "grad_norm": 0.1416015625,
      "learning_rate": 1e-06,
      "loss": 0.0006,
      "num_tokens": 562636498.0,
      "reward": 0.5446428656578064,
      "reward_std": 0.239656463265419,
      "rewards/verify_math_reward/mean": 0.5446428656578064,
      "rewards/verify_math_reward/std": 0.49828118085861206,
      "step": 967
    },
    {
      "clip_ratio/high_max": 0.001595270895450085,
      "clip_ratio/high_mean": 0.0004952483645865868,
      "clip_ratio/low_mean": 0.0003451965735621343,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008404449472436681,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4066.0,
      "completions/mean_length": 602.5960083007812,
      "completions/mean_terminated_length": 567.14990234375,
      "completions/min_length": 122.0,
      "completions/min_terminated_length": 122.0,
      "epoch": 9.04666083406241,
      "grad_norm": 0.1279296875,
      "learning_rate": 1e-06,
      "loss": -0.0029,
      "num_tokens": 563245408.0,
      "reward": 0.5033482313156128,
      "reward_std": 0.23548056185245514,
      "rewards/verify_math_reward/mean": 0.5033482313156128,
      "rewards/verify_math_reward/std": 0.5002680420875549,
      "step": 968
    },
    {
      "clip_ratio/high_max": 0.001770732998920721,
      "clip_ratio/high_mean": 0.0006120005477896484,
      "clip_ratio/low_mean": 0.000363428881200889,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009754294169397326,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2194.0,
      "completions/mean_length": 598.794677734375,
      "completions/mean_terminated_length": 559.3228149414062,
      "completions/min_length": 98.0,
      "completions/min_terminated_length": 98.0,
      "epoch": 9.05599300087489,
      "grad_norm": 0.1396484375,
      "learning_rate": 1e-06,
      "loss": 0.0073,
      "num_tokens": 563825608.0,
      "reward": 0.5502232313156128,
      "reward_std": 0.2438220977783203,
      "rewards/verify_math_reward/mean": 0.5502232313156128,
      "rewards/verify_math_reward/std": 0.49774909019470215,
      "step": 969
    },
    {
      "clip_ratio/high_max": 0.0019437053215369815,
      "clip_ratio/high_mean": 0.0006528858348247013,
      "clip_ratio/low_mean": 0.0003233873865156056,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009762732279341435,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3323.0,
      "completions/mean_length": 558.6975708007812,
      "completions/mean_terminated_length": 518.7731323242188,
      "completions/min_length": 139.0,
      "completions/min_terminated_length": 139.0,
      "epoch": 9.065325167687373,
      "grad_norm": 0.1318359375,
      "learning_rate": 1e-06,
      "loss": 0.0132,
      "num_tokens": 564369769.0,
      "reward": 0.5803571939468384,
      "reward_std": 0.2322482466697693,
      "rewards/verify_math_reward/mean": 0.5803571343421936,
      "rewards/verify_math_reward/std": 0.4937761425971985,
      "step": 970
    },
    {
      "clip_ratio/high_max": 0.0016274704685201868,
      "clip_ratio/high_mean": 0.0005316773524555174,
      "clip_ratio/low_mean": 0.000334410725372436,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008660880839670426,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3861.0,
      "completions/mean_length": 652.6596069335938,
      "completions/mean_terminated_length": 594.032958984375,
      "completions/min_length": 121.0,
      "completions/min_terminated_length": 121.0,
      "epoch": 9.074657334499854,
      "grad_norm": 0.1318359375,
      "learning_rate": 1e-06,
      "loss": 0.006,
      "num_tokens": 564985520.0,
      "reward": 0.512276828289032,
      "reward_std": 0.22646865248680115,
      "rewards/verify_math_reward/mean": 0.5122767686843872,
      "rewards/verify_math_reward/std": 0.500128448009491,
      "step": 971
    },
    {
      "clip_ratio/high_max": 0.00183391165046487,
      "clip_ratio/high_mean": 0.0005753153495788865,
      "clip_ratio/low_mean": 0.0003095582123933127,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008848735687934095,
      "completions/clipped_ratio": 0.006696428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3624.0,
      "completions/mean_length": 563.9375,
      "completions/mean_terminated_length": 540.1258544921875,
      "completions/min_length": 11.0,
      "completions/min_terminated_length": 11.0,
      "epoch": 9.083989501312336,
      "grad_norm": 0.13671875,
      "learning_rate": 1e-06,
      "loss": -0.0083,
      "num_tokens": 565549152.0,
      "reward": 0.590401828289032,
      "reward_std": 0.23093554377555847,
      "rewards/verify_math_reward/mean": 0.5904017686843872,
      "rewards/verify_math_reward/std": 0.49203425645828247,
      "step": 972
    },
    {
      "clip_ratio/high_max": 0.0016102681729535107,
      "clip_ratio/high_mean": 0.0005075933613625239,
      "clip_ratio/low_mean": 0.00030177611552062444,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008093694632407278,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3285.0,
      "completions/mean_length": 599.513427734375,
      "completions/mean_terminated_length": 560.0496826171875,
      "completions/min_length": 5.0,
      "completions/min_terminated_length": 5.0,
      "epoch": 9.093321668124817,
      "grad_norm": 0.11962890625,
      "learning_rate": 1e-06,
      "loss": 0.0017,
      "num_tokens": 566133380.0,
      "reward": 0.5736607313156128,
      "reward_std": 0.188932403922081,
      "rewards/verify_math_reward/mean": 0.5736607313156128,
      "rewards/verify_math_reward/std": 0.4948205351829529,
      "step": 973
    },
    {
      "clip_ratio/high_max": 0.0014658632471764577,
      "clip_ratio/high_mean": 0.0004125052128074458,
      "clip_ratio/low_mean": 0.00030730440084880684,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000719809624570189,
      "completions/clipped_ratio": 0.0200892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3616.0,
      "completions/mean_length": 664.513427734375,
      "completions/mean_terminated_length": 594.1640625,
      "completions/min_length": 138.0,
      "completions/min_terminated_length": 138.0,
      "epoch": 9.1026538349373,
      "grad_norm": 0.1259765625,
      "learning_rate": 1e-06,
      "loss": -0.0052,
      "num_tokens": 566735168.0,
      "reward": 0.5089285969734192,
      "reward_std": 0.20343543589115143,
      "rewards/verify_math_reward/mean": 0.5089285969734192,
      "rewards/verify_math_reward/std": 0.5001994967460632,
      "step": 974
    },
    {
      "clip_ratio/high_max": 0.0014222874833649257,
      "clip_ratio/high_mean": 0.0003877350225138798,
      "clip_ratio/low_mean": 0.000343247693081139,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007309827069548192,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3809.0,
      "completions/mean_length": 635.6395263671875,
      "completions/mean_terminated_length": 568.715576171875,
      "completions/min_length": 130.0,
      "completions/min_terminated_length": 130.0,
      "epoch": 9.11198600174978,
      "grad_norm": 0.12890625,
      "learning_rate": 1e-06,
      "loss": -0.0026,
      "num_tokens": 567340581.0,
      "reward": 0.4799107313156128,
      "reward_std": 0.20688427984714508,
      "rewards/verify_math_reward/mean": 0.4799107015132904,
      "rewards/verify_math_reward/std": 0.4998752772808075,
      "step": 975
    },
    {
      "clip_ratio/high_max": 0.0015988921968528302,
      "clip_ratio/high_mean": 0.0004650440851037274,
      "clip_ratio/low_mean": 0.0003426979819778353,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000807742075267015,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3360.0,
      "completions/mean_length": 606.1015625,
      "completions/mean_terminated_length": 538.6063842773438,
      "completions/min_length": 174.0,
      "completions/min_terminated_length": 174.0,
      "epoch": 9.121318168562263,
      "grad_norm": 0.1376953125,
      "learning_rate": 1e-06,
      "loss": 0.0065,
      "num_tokens": 567906192.0,
      "reward": 0.512276828289032,
      "reward_std": 0.22500188648700714,
      "rewards/verify_math_reward/mean": 0.5122767686843872,
      "rewards/verify_math_reward/std": 0.500128448009491,
      "step": 976
    },
    {
      "clip_ratio/high_max": 0.0014862458629067987,
      "clip_ratio/high_mean": 0.000497794334705759,
      "clip_ratio/low_mean": 0.00037054970061944914,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008683440528329811,
      "completions/clipped_ratio": 0.0200892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3323.0,
      "completions/mean_length": 677.53125,
      "completions/mean_terminated_length": 607.4487915039062,
      "completions/min_length": 163.0,
      "completions/min_terminated_length": 163.0,
      "epoch": 9.130650335374744,
      "grad_norm": 0.12109375,
      "learning_rate": 1e-06,
      "loss": 0.0028,
      "num_tokens": 568532484.0,
      "reward": 0.5022321939468384,
      "reward_std": 0.2364223450422287,
      "rewards/verify_math_reward/mean": 0.5022321343421936,
      "rewards/verify_math_reward/std": 0.5002742409706116,
      "step": 977
    },
    {
      "clip_ratio/high_max": 0.001443858863240166,
      "clip_ratio/high_mean": 0.0003764739296912012,
      "clip_ratio/low_mean": 0.00032745573957981833,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007039296669972828,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4004.0,
      "completions/mean_length": 644.1439819335938,
      "completions/mean_terminated_length": 565.33447265625,
      "completions/min_length": 58.0,
      "completions/min_terminated_length": 58.0,
      "epoch": 9.139982502187227,
      "grad_norm": 0.130859375,
      "learning_rate": 1e-06,
      "loss": -0.0003,
      "num_tokens": 569116485.0,
      "reward": 0.5479910969734192,
      "reward_std": 0.1867859810590744,
      "rewards/verify_math_reward/mean": 0.5479910969734192,
      "rewards/verify_math_reward/std": 0.49796950817108154,
      "step": 978
    },
    {
      "clip_ratio/high_max": 0.0018931880340460339,
      "clip_ratio/high_mean": 0.0005805788216548535,
      "clip_ratio/low_mean": 0.0003178017252594145,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008983805510069942,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3165.0,
      "completions/mean_length": 609.4699096679688,
      "completions/mean_terminated_length": 566.1344604492188,
      "completions/min_length": 75.0,
      "completions/min_terminated_length": 75.0,
      "epoch": 9.149314668999708,
      "grad_norm": 0.138671875,
      "learning_rate": 1e-06,
      "loss": 0.0135,
      "num_tokens": 569712306.0,
      "reward": 0.5814732313156128,
      "reward_std": 0.2284930944442749,
      "rewards/verify_math_reward/mean": 0.5814732313156128,
      "rewards/verify_math_reward/std": 0.4935929775238037,
      "step": 979
    },
    {
      "clip_ratio/high_max": 0.0016218523360294057,
      "clip_ratio/high_mean": 0.0005201477135869936,
      "clip_ratio/low_mean": 0.0003666365460048837,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008867842439030937,
      "completions/clipped_ratio": 0.030133928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3315.0,
      "completions/mean_length": 687.2779541015625,
      "completions/mean_terminated_length": 581.3682250976562,
      "completions/min_length": 57.0,
      "completions/min_terminated_length": 57.0,
      "epoch": 9.15864683581219,
      "grad_norm": 0.134765625,
      "learning_rate": 1e-06,
      "loss": 0.0009,
      "num_tokens": 570308715.0,
      "reward": 0.527901828289032,
      "reward_std": 0.2483748346567154,
      "rewards/verify_math_reward/mean": 0.5279017686843872,
      "rewards/verify_math_reward/std": 0.49949970841407776,
      "step": 980
    },
    {
      "clip_ratio/high_max": 0.0017747420506566414,
      "clip_ratio/high_mean": 0.0005133899981046852,
      "clip_ratio/low_mean": 0.00028014496490413876,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007935349694889737,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3589.0,
      "completions/mean_length": 616.3694458007812,
      "completions/mean_terminated_length": 553.1033935546875,
      "completions/min_length": 88.0,
      "completions/min_terminated_length": 88.0,
      "epoch": 9.167979002624673,
      "grad_norm": 0.125,
      "learning_rate": 1e-06,
      "loss": -0.0007,
      "num_tokens": 570892870.0,
      "reward": 0.5345982313156128,
      "reward_std": 0.22300449013710022,
      "rewards/verify_math_reward/mean": 0.5345982313156128,
      "rewards/verify_math_reward/std": 0.4990801215171814,
      "step": 981
    },
    {
      "clip_ratio/high_max": 0.0013452855027935584,
      "clip_ratio/high_mean": 0.00033923454259365826,
      "clip_ratio/low_mean": 0.00032456524900226214,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006637997832967812,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2109.0,
      "completions/mean_length": 576.099365234375,
      "completions/mean_terminated_length": 540.3843994140625,
      "completions/min_length": 3.0,
      "completions/min_terminated_length": 3.0,
      "epoch": 9.177311169437154,
      "grad_norm": 0.12158203125,
      "learning_rate": 1e-06,
      "loss": 0.0039,
      "num_tokens": 571451647.0,
      "reward": 0.5524553656578064,
      "reward_std": 0.1696561723947525,
      "rewards/verify_math_reward/mean": 0.5524553656578064,
      "rewards/verify_math_reward/std": 0.49751853942871094,
      "step": 982
    },
    {
      "clip_ratio/high_max": 0.0014058054948691279,
      "clip_ratio/high_mean": 0.0004449033821174453,
      "clip_ratio/low_mean": 0.0002943426667343374,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007392460504433984,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3615.0,
      "completions/mean_length": 581.9397583007812,
      "completions/mean_terminated_length": 542.2776489257812,
      "completions/min_length": 63.0,
      "completions/min_terminated_length": 63.0,
      "epoch": 9.186643336249636,
      "grad_norm": 0.130859375,
      "learning_rate": 1e-06,
      "loss": -0.0,
      "num_tokens": 572017457.0,
      "reward": 0.6082589626312256,
      "reward_std": 0.21616928279399872,
      "rewards/verify_math_reward/mean": 0.6082589030265808,
      "rewards/verify_math_reward/std": 0.48841196298599243,
      "step": 983
    },
    {
      "clip_ratio/high_max": 0.0014298116630016011,
      "clip_ratio/high_mean": 0.0003933404473173141,
      "clip_ratio/low_mean": 0.0003025793921551667,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006959198444747017,
      "completions/clipped_ratio": 0.0200892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2820.0,
      "completions/mean_length": 647.0424194335938,
      "completions/mean_terminated_length": 576.3348999023438,
      "completions/min_length": 131.0,
      "completions/min_terminated_length": 131.0,
      "epoch": 9.195975503062117,
      "grad_norm": 0.1181640625,
      "learning_rate": 1e-06,
      "loss": 0.0057,
      "num_tokens": 572613231.0,
      "reward": 0.53125,
      "reward_std": 0.20080114901065826,
      "rewards/verify_math_reward/mean": 0.53125,
      "rewards/verify_math_reward/std": 0.4993011951446533,
      "step": 984
    },
    {
      "clip_ratio/high_max": 0.0016818567855807487,
      "clip_ratio/high_mean": 0.0005095037383853196,
      "clip_ratio/low_mean": 0.0002962551257041923,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008057588711380959,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3771.0,
      "completions/mean_length": 558.9553833007812,
      "completions/mean_terminated_length": 527.090087890625,
      "completions/min_length": 101.0,
      "completions/min_terminated_length": 101.0,
      "epoch": 9.2053076698746,
      "grad_norm": 0.1259765625,
      "learning_rate": 1e-06,
      "loss": -0.0062,
      "num_tokens": 573152959.0,
      "reward": 0.6305803656578064,
      "reward_std": 0.1949855387210846,
      "rewards/verify_math_reward/mean": 0.6305803656578064,
      "rewards/verify_math_reward/std": 0.4829172194004059,
      "step": 985
    },
    {
      "clip_ratio/high_max": 0.0018361453376201098,
      "clip_ratio/high_mean": 0.0005749121432927495,
      "clip_ratio/low_mean": 0.00031705505421086855,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008919671927287709,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4069.0,
      "completions/mean_length": 576.7879638671875,
      "completions/mean_terminated_length": 533.0463256835938,
      "completions/min_length": 108.0,
      "completions/min_terminated_length": 108.0,
      "epoch": 9.21463983668708,
      "grad_norm": 0.146484375,
      "learning_rate": 1e-06,
      "loss": 0.0057,
      "num_tokens": 573710289.0,
      "reward": 0.6328125,
      "reward_std": 0.2312796264886856,
      "rewards/verify_math_reward/mean": 0.6328125,
      "rewards/verify_math_reward/std": 0.48230743408203125,
      "step": 986
    },
    {
      "clip_ratio/high_max": 0.001607736267033033,
      "clip_ratio/high_mean": 0.0004533880940016388,
      "clip_ratio/low_mean": 0.00042847167742365855,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008818597752906498,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3511.0,
      "completions/mean_length": 659.5770263671875,
      "completions/mean_terminated_length": 597.0965576171875,
      "completions/min_length": 63.0,
      "completions/min_terminated_length": 63.0,
      "epoch": 9.223972003499563,
      "grad_norm": 0.125,
      "learning_rate": 1e-06,
      "loss": 0.003,
      "num_tokens": 574335190.0,
      "reward": 0.5078125,
      "reward_std": 0.20651407539844513,
      "rewards/verify_math_reward/mean": 0.5078125,
      "rewards/verify_math_reward/std": 0.5002182126045227,
      "step": 987
    },
    {
      "clip_ratio/high_max": 0.0017732976848492399,
      "clip_ratio/high_mean": 0.0005352601366439558,
      "clip_ratio/low_mean": 0.0003872381439578021,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009224982841260498,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3208.0,
      "completions/mean_length": 671.2667846679688,
      "completions/mean_terminated_length": 620.845947265625,
      "completions/min_length": 11.0,
      "completions/min_terminated_length": 11.0,
      "epoch": 9.233304170312044,
      "grad_norm": 0.12890625,
      "learning_rate": 1e-06,
      "loss": -0.0016,
      "num_tokens": 574983277.0,
      "reward": 0.4988839626312256,
      "reward_std": 0.23293182253837585,
      "rewards/verify_math_reward/mean": 0.4988839328289032,
      "rewards/verify_math_reward/std": 0.5002779960632324,
      "step": 988
    },
    {
      "clip_ratio/high_max": 0.0017576297595951473,
      "clip_ratio/high_mean": 0.0005030810800690233,
      "clip_ratio/low_mean": 0.0003936687865007116,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008967498679339769,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3161.0,
      "completions/mean_length": 573.9631958007812,
      "completions/mean_terminated_length": 538.2265625,
      "completions/min_length": 130.0,
      "completions/min_terminated_length": 130.0,
      "epoch": 9.242636337124527,
      "grad_norm": 0.138671875,
      "learning_rate": 1e-06,
      "loss": 0.0188,
      "num_tokens": 575544316.0,
      "reward": 0.5848214626312256,
      "reward_std": 0.23266012966632843,
      "rewards/verify_math_reward/mean": 0.5848214030265808,
      "rewards/verify_math_reward/std": 0.49302801489830017,
      "step": 989
    },
    {
      "clip_ratio/high_max": 0.0013710622233702452,
      "clip_ratio/high_mean": 0.0003740097836271161,
      "clip_ratio/low_mean": 0.00029554720356372854,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006695569945804891,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3655.0,
      "completions/mean_length": 615.6730346679688,
      "completions/mean_terminated_length": 548.3629150390625,
      "completions/min_length": 126.0,
      "completions/min_terminated_length": 126.0,
      "epoch": 9.251968503937007,
      "grad_norm": 0.1279296875,
      "learning_rate": 1e-06,
      "loss": -0.0064,
      "num_tokens": 576123711.0,
      "reward": 0.5223214626312256,
      "reward_std": 0.20008648931980133,
      "rewards/verify_math_reward/mean": 0.5223214030265808,
      "rewards/verify_math_reward/std": 0.49978047609329224,
      "step": 990
    },
    {
      "clip_ratio/high_max": 0.0016154453733179253,
      "clip_ratio/high_mean": 0.00043451850569908856,
      "clip_ratio/low_mean": 0.0003143019927165369,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007488204887522443,
      "completions/clipped_ratio": 0.006696428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3042.0,
      "completions/mean_length": 565.6875,
      "completions/mean_terminated_length": 541.8876342773438,
      "completions/min_length": 129.0,
      "completions/min_terminated_length": 129.0,
      "epoch": 9.26130067074949,
      "grad_norm": 0.12158203125,
      "learning_rate": 1e-06,
      "loss": 0.0046,
      "num_tokens": 576708119.0,
      "reward": 0.5066964626312256,
      "reward_std": 0.16522064805030823,
      "rewards/verify_math_reward/mean": 0.5066964030265808,
      "rewards/verify_math_reward/std": 0.5002344250679016,
      "step": 991
    },
    {
      "clip_ratio/high_max": 0.0015530745004070923,
      "clip_ratio/high_mean": 0.00041285441466243356,
      "clip_ratio/low_mean": 0.00035634881396617857,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007692032140766969,
      "completions/clipped_ratio": 0.0200892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3090.0,
      "completions/mean_length": 637.7210083007812,
      "completions/mean_terminated_length": 566.8223266601562,
      "completions/min_length": 99.0,
      "completions/min_terminated_length": 99.0,
      "epoch": 9.27063283756197,
      "grad_norm": 0.12353515625,
      "learning_rate": 1e-06,
      "loss": -0.0063,
      "num_tokens": 577292125.0,
      "reward": 0.5111607313156128,
      "reward_std": 0.19692625105381012,
      "rewards/verify_math_reward/mean": 0.5111607313156128,
      "rewards/verify_math_reward/std": 0.5001546144485474,
      "step": 992
    },
    {
      "clip_ratio/high_max": 0.0015815202550584218,
      "clip_ratio/high_mean": 0.0004305243475073439,
      "clip_ratio/low_mean": 0.0004106321025574289,
      "clip_ratio/low_min": 1.2739502381009515e-05,
      "clip_ratio/region_mean": 0.0008411564531343174,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3370.0,
      "completions/mean_length": 620.7913208007812,
      "completions/mean_terminated_length": 585.5298461914062,
      "completions/min_length": 149.0,
      "completions/min_terminated_length": 149.0,
      "epoch": 9.279965004374453,
      "grad_norm": 0.12451171875,
      "learning_rate": 1e-06,
      "loss": 0.0067,
      "num_tokens": 577910698.0,
      "reward": 0.53125,
      "reward_std": 0.21781939268112183,
      "rewards/verify_math_reward/mean": 0.53125,
      "rewards/verify_math_reward/std": 0.4993011951446533,
      "step": 993
    },
    {
      "clip_ratio/high_max": 0.0019933510266127996,
      "clip_ratio/high_mean": 0.0006385835677065188,
      "clip_ratio/low_mean": 0.00035117202401124814,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009897555983116035,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3586.0,
      "completions/mean_length": 632.802490234375,
      "completions/mean_terminated_length": 581.8153686523438,
      "completions/min_length": 72.0,
      "completions/min_terminated_length": 72.0,
      "epoch": 9.289297171186934,
      "grad_norm": 0.140625,
      "learning_rate": 1e-06,
      "loss": 0.0006,
      "num_tokens": 578507617.0,
      "reward": 0.5569196939468384,
      "reward_std": 0.24390017986297607,
      "rewards/verify_math_reward/mean": 0.5569196343421936,
      "rewards/verify_math_reward/std": 0.4970270097255707,
      "step": 994
    },
    {
      "clip_ratio/high_max": 0.0013096993534418289,
      "clip_ratio/high_mean": 0.00037204886712061125,
      "clip_ratio/low_mean": 0.00029453307570292964,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006665819496447511,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3408.0,
      "completions/mean_length": 613.1138916015625,
      "completions/mean_terminated_length": 545.7542724609375,
      "completions/min_length": 164.0,
      "completions/min_terminated_length": 164.0,
      "epoch": 9.298629337999417,
      "grad_norm": 0.1201171875,
      "learning_rate": 1e-06,
      "loss": -0.0046,
      "num_tokens": 579079879.0,
      "reward": 0.5491071939468384,
      "reward_std": 0.18967919051647186,
      "rewards/verify_math_reward/mean": 0.5491071343421936,
      "rewards/verify_math_reward/std": 0.49786055088043213,
      "step": 995
    },
    {
      "clip_ratio/high_max": 0.001640536576815066,
      "clip_ratio/high_mean": 0.0004984663798950351,
      "clip_ratio/low_mean": 0.00029986659455971676,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007983329805938411,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3896.0,
      "completions/mean_length": 614.7377319335938,
      "completions/mean_terminated_length": 555.4653930664062,
      "completions/min_length": 109.0,
      "completions/min_terminated_length": 109.0,
      "epoch": 9.307961504811898,
      "grad_norm": 0.1328125,
      "learning_rate": 1e-06,
      "loss": 0.0129,
      "num_tokens": 579661684.0,
      "reward": 0.5792410969734192,
      "reward_std": 0.22255055606365204,
      "rewards/verify_math_reward/mean": 0.5792410969734192,
      "rewards/verify_math_reward/std": 0.49395665526390076,
      "step": 996
    },
    {
      "clip_ratio/high_max": 0.0014478760458587203,
      "clip_ratio/high_mean": 0.00043692247299986775,
      "clip_ratio/low_mean": 0.000346227021964296,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007831494958736585,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3933.0,
      "completions/mean_length": 578.7723388671875,
      "completions/mean_terminated_length": 531.0271606445312,
      "completions/min_length": 128.0,
      "completions/min_terminated_length": 128.0,
      "epoch": 9.31729367162438,
      "grad_norm": 0.1357421875,
      "learning_rate": 1e-06,
      "loss": 0.0077,
      "num_tokens": 580218440.0,
      "reward": 0.5602678656578064,
      "reward_std": 0.20595203340053558,
      "rewards/verify_math_reward/mean": 0.5602678656578064,
      "rewards/verify_math_reward/std": 0.4966317117214203,
      "step": 997
    },
    {
      "clip_ratio/high_max": 0.001665726963437919,
      "clip_ratio/high_mean": 0.0005046477675705319,
      "clip_ratio/low_mean": 0.00042466105219318706,
      "clip_ratio/low_min": 1.0045001545222476e-05,
      "clip_ratio/region_mean": 0.0009293088205595268,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2393.0,
      "completions/mean_length": 600.796875,
      "completions/mean_terminated_length": 549.338623046875,
      "completions/min_length": 89.0,
      "completions/min_terminated_length": 89.0,
      "epoch": 9.326625838436861,
      "grad_norm": 0.15625,
      "learning_rate": 1e-06,
      "loss": 0.011,
      "num_tokens": 580781218.0,
      "reward": 0.5580357313156128,
      "reward_std": 0.2465658336877823,
      "rewards/verify_math_reward/mean": 0.5580357313156128,
      "rewards/verify_math_reward/std": 0.49689781665802,
      "step": 998
    },
    {
      "clip_ratio/high_max": 0.0017876784786494682,
      "clip_ratio/high_mean": 0.0004964058711038888,
      "clip_ratio/low_mean": 0.00036549975561683823,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008619056288807769,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3417.0,
      "completions/mean_length": 556.0301513671875,
      "completions/mean_terminated_length": 512.030517578125,
      "completions/min_length": 122.0,
      "completions/min_terminated_length": 122.0,
      "epoch": 9.335958005249344,
      "grad_norm": 0.1357421875,
      "learning_rate": 1e-06,
      "loss": 0.0202,
      "num_tokens": 581325741.0,
      "reward": 0.5725446939468384,
      "reward_std": 0.20261241495609283,
      "rewards/verify_math_reward/mean": 0.5725446343421936,
      "rewards/verify_math_reward/std": 0.49498558044433594,
      "step": 999
    },
    {
      "clip_ratio/high_max": 0.0012760765566781629,
      "clip_ratio/high_mean": 0.00035061654477885895,
      "clip_ratio/low_mean": 0.0002110987237529116,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0005617152737613651,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3900.0,
      "completions/mean_length": 593.6975708007812,
      "completions/mean_terminated_length": 558.1611938476562,
      "completions/min_length": 127.0,
      "completions/min_terminated_length": 127.0,
      "epoch": 9.345290172061826,
      "grad_norm": 0.109375,
      "learning_rate": 1e-06,
      "loss": -0.0062,
      "num_tokens": 581915550.0,
      "reward": 0.5881696939468384,
      "reward_std": 0.15991567075252533,
      "rewards/verify_math_reward/mean": 0.5881696343421936,
      "rewards/verify_math_reward/std": 0.4924396276473999,
      "step": 1000
    },
    {
      "clip_ratio/high_max": 0.002071037813948351,
      "clip_ratio/high_mean": 0.0005819952150432073,
      "clip_ratio/low_mean": 0.0004212928470224142,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010032880572907743,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2502.0,
      "completions/mean_length": 590.8973388671875,
      "completions/mean_terminated_length": 547.3311157226562,
      "completions/min_length": 165.0,
      "completions/min_terminated_length": 165.0,
      "epoch": 9.354622338874307,
      "grad_norm": 0.134765625,
      "learning_rate": 1e-06,
      "loss": 0.0129,
      "num_tokens": 582493786.0,
      "reward": 0.5491071939468384,
      "reward_std": 0.23364469408988953,
      "rewards/verify_math_reward/mean": 0.5491071343421936,
      "rewards/verify_math_reward/std": 0.49786055088043213,
      "step": 1001
    },
    {
      "clip_ratio/high_max": 0.0016897258901735768,
      "clip_ratio/high_mean": 0.0005495733062161889,
      "clip_ratio/low_mean": 0.0002537046361794637,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008032779414861579,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3972.0,
      "completions/mean_length": 569.5803833007812,
      "completions/mean_terminated_length": 525.7491455078125,
      "completions/min_length": 3.0,
      "completions/min_terminated_length": 3.0,
      "epoch": 9.36395450568679,
      "grad_norm": 0.142578125,
      "learning_rate": 1e-06,
      "loss": 0.0109,
      "num_tokens": 583041778.0,
      "reward": 0.5625,
      "reward_std": 0.19772180914878845,
      "rewards/verify_math_reward/mean": 0.5625,
      "rewards/verify_math_reward/std": 0.49635544419288635,
      "step": 1002
    },
    {
      "clip_ratio/high_max": 0.0016611479131825035,
      "clip_ratio/high_mean": 0.00045596009431392304,
      "clip_ratio/low_mean": 0.0002882320144408368,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007441921043209732,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2643.0,
      "completions/mean_length": 603.8192138671875,
      "completions/mean_terminated_length": 556.4140625,
      "completions/min_length": 145.0,
      "completions/min_terminated_length": 145.0,
      "epoch": 9.37328667249927,
      "grad_norm": 0.11572265625,
      "learning_rate": 1e-06,
      "loss": 0.0105,
      "num_tokens": 583621336.0,
      "reward": 0.5290178656578064,
      "reward_std": 0.18088369071483612,
      "rewards/verify_math_reward/mean": 0.5290178656578064,
      "rewards/verify_math_reward/std": 0.49943605065345764,
      "step": 1003
    },
    {
      "clip_ratio/high_max": 0.0015815586339158472,
      "clip_ratio/high_mean": 0.0005218012724981236,
      "clip_ratio/low_mean": 0.000298866166190237,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008206674433495209,
      "completions/clipped_ratio": 0.0200892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2059.0,
      "completions/mean_length": 621.4888916015625,
      "completions/mean_terminated_length": 550.2574462890625,
      "completions/min_length": 110.0,
      "completions/min_terminated_length": 110.0,
      "epoch": 9.382618839311753,
      "grad_norm": 0.1220703125,
      "learning_rate": 1e-06,
      "loss": -0.0031,
      "num_tokens": 584195918.0,
      "reward": 0.535714328289032,
      "reward_std": 0.21091002225875854,
      "rewards/verify_math_reward/mean": 0.5357142686843872,
      "rewards/verify_math_reward/std": 0.4990014135837555,
      "step": 1004
    },
    {
      "clip_ratio/high_max": 0.0017101928788179066,
      "clip_ratio/high_mean": 0.000560220372335607,
      "clip_ratio/low_mean": 0.00034818381993773073,
      "clip_ratio/low_min": 1.051834442478139e-05,
      "clip_ratio/region_mean": 0.0009084041876121773,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3058.0,
      "completions/mean_length": 612.5167846679688,
      "completions/mean_terminated_length": 581.134033203125,
      "completions/min_length": 91.0,
      "completions/min_terminated_length": 91.0,
      "epoch": 9.391951006124234,
      "grad_norm": 0.1357421875,
      "learning_rate": 1e-06,
      "loss": 0.0031,
      "num_tokens": 584796997.0,
      "reward": 0.5602678656578064,
      "reward_std": 0.2270306795835495,
      "rewards/verify_math_reward/mean": 0.5602678656578064,
      "rewards/verify_math_reward/std": 0.4966317415237427,
      "step": 1005
    },
    {
      "clip_ratio/high_max": 0.0014503122120004264,
      "clip_ratio/high_mean": 0.0004433378998101034,
      "clip_ratio/low_mean": 0.00033345918097893446,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007767970964778215,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3953.0,
      "completions/mean_length": 665.9788208007812,
      "completions/mean_terminated_length": 587.6677856445312,
      "completions/min_length": 26.0,
      "completions/min_terminated_length": 26.0,
      "epoch": 9.401283172936717,
      "grad_norm": 0.1259765625,
      "learning_rate": 1e-06,
      "loss": 0.0133,
      "num_tokens": 585400402.0,
      "reward": 0.5089285969734192,
      "reward_std": 0.19497595727443695,
      "rewards/verify_math_reward/mean": 0.5089285969734192,
      "rewards/verify_math_reward/std": 0.5001994967460632,
      "step": 1006
    },
    {
      "clip_ratio/high_max": 0.0016050372869358398,
      "clip_ratio/high_mean": 0.0005562499048892278,
      "clip_ratio/low_mean": 0.00028444551423945086,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008406954211750417,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4016.0,
      "completions/mean_length": 615.7366333007812,
      "completions/mean_terminated_length": 548.427734375,
      "completions/min_length": 115.0,
      "completions/min_terminated_length": 115.0,
      "epoch": 9.410615339749198,
      "grad_norm": 0.12158203125,
      "learning_rate": 1e-06,
      "loss": 0.0003,
      "num_tokens": 585970918.0,
      "reward": 0.59375,
      "reward_std": 0.20252613723278046,
      "rewards/verify_math_reward/mean": 0.59375,
      "rewards/verify_math_reward/std": 0.4914066195487976,
      "step": 1007
    },
    {
      "clip_ratio/high_max": 0.0015311507650039857,
      "clip_ratio/high_mean": 0.00044015199955538264,
      "clip_ratio/low_mean": 0.0003749093129954417,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008150613184625399,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3824.0,
      "completions/mean_length": 618.2254638671875,
      "completions/mean_terminated_length": 571.015869140625,
      "completions/min_length": 119.0,
      "completions/min_terminated_length": 119.0,
      "epoch": 9.41994750656168,
      "grad_norm": 0.1328125,
      "learning_rate": 1e-06,
      "loss": 0.0057,
      "num_tokens": 586564280.0,
      "reward": 0.5212053656578064,
      "reward_std": 0.22582675516605377,
      "rewards/verify_math_reward/mean": 0.5212053656578064,
      "rewards/verify_math_reward/std": 0.49982914328575134,
      "step": 1008
    },
    {
      "clip_ratio/high_max": 0.0012292135716052144,
      "clip_ratio/high_mean": 0.0003560837087661639,
      "clip_ratio/low_mean": 0.0003563880686670018,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007124717767510447,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2959.0,
      "completions/mean_length": 617.5100708007812,
      "completions/mean_terminated_length": 574.2745971679688,
      "completions/min_length": 11.0,
      "completions/min_terminated_length": 11.0,
      "epoch": 9.429279673374161,
      "grad_norm": 0.119140625,
      "learning_rate": 1e-06,
      "loss": 0.009,
      "num_tokens": 587160345.0,
      "reward": 0.535714328289032,
      "reward_std": 0.19910797476768494,
      "rewards/verify_math_reward/mean": 0.5357142686843872,
      "rewards/verify_math_reward/std": 0.4990014135837555,
      "step": 1009
    },
    {
      "clip_ratio/high_max": 0.001768231273672427,
      "clip_ratio/high_mean": 0.0004981605184184446,
      "clip_ratio/low_mean": 0.0003863488836941542,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008845094143907772,
      "completions/clipped_ratio": 0.0200892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2806.0,
      "completions/mean_length": 619.786865234375,
      "completions/mean_terminated_length": 548.5205078125,
      "completions/min_length": 142.0,
      "completions/min_terminated_length": 142.0,
      "epoch": 9.438611840186644,
      "grad_norm": 0.1376953125,
      "learning_rate": 1e-06,
      "loss": 0.0026,
      "num_tokens": 587739226.0,
      "reward": 0.5334821939468384,
      "reward_std": 0.21112899482250214,
      "rewards/verify_math_reward/mean": 0.5334821343421936,
      "rewards/verify_math_reward/std": 0.49915632605552673,
      "step": 1010
    },
    {
      "clip_ratio/high_max": 0.0019684721610246925,
      "clip_ratio/high_mean": 0.0006615833231080614,
      "clip_ratio/low_mean": 0.0003499977823366862,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010115811091964133,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3607.0,
      "completions/mean_length": 601.5714721679688,
      "completions/mean_terminated_length": 554.1357421875,
      "completions/min_length": 146.0,
      "completions/min_terminated_length": 146.0,
      "epoch": 9.447944006999125,
      "grad_norm": 0.1416015625,
      "learning_rate": 1e-06,
      "loss": 0.0131,
      "num_tokens": 588319570.0,
      "reward": 0.6049107313156128,
      "reward_std": 0.2502896189689636,
      "rewards/verify_math_reward/mean": 0.6049107313156128,
      "rewards/verify_math_reward/std": 0.48914292454719543,
      "step": 1011
    },
    {
      "clip_ratio/high_max": 0.0015889170699665556,
      "clip_ratio/high_mean": 0.0004813561185983417,
      "clip_ratio/low_mean": 0.00032364998958200886,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000805006105110806,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2658.0,
      "completions/mean_length": 583.544677734375,
      "completions/mean_terminated_length": 523.7412109375,
      "completions/min_length": 105.0,
      "completions/min_terminated_length": 105.0,
      "epoch": 9.457276173811607,
      "grad_norm": 0.1416015625,
      "learning_rate": 1e-06,
      "loss": 0.0018,
      "num_tokens": 588870722.0,
      "reward": 0.590401828289032,
      "reward_std": 0.2043694704771042,
      "rewards/verify_math_reward/mean": 0.5904017686843872,
      "rewards/verify_math_reward/std": 0.49203425645828247,
      "step": 1012
    },
    {
      "clip_ratio/high_max": 0.0014783477381570265,
      "clip_ratio/high_mean": 0.0003990056422935595,
      "clip_ratio/low_mean": 0.0003467415241402705,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007457471647285274,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3370.0,
      "completions/mean_length": 590.3527221679688,
      "completions/mean_terminated_length": 546.7796630859375,
      "completions/min_length": 134.0,
      "completions/min_terminated_length": 134.0,
      "epoch": 9.466608340624088,
      "grad_norm": 0.125,
      "learning_rate": 1e-06,
      "loss": 0.0047,
      "num_tokens": 589461462.0,
      "reward": 0.494419664144516,
      "reward_std": 0.19952164590358734,
      "rewards/verify_math_reward/mean": 0.4944196343421936,
      "rewards/verify_math_reward/std": 0.5002480745315552,
      "step": 1013
    },
    {
      "clip_ratio/high_max": 0.0018437276394251967,
      "clip_ratio/high_mean": 0.0005355752382456558,
      "clip_ratio/low_mean": 0.00026403180913803226,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000799607054432272,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3397.0,
      "completions/mean_length": 659.4989013671875,
      "completions/mean_terminated_length": 620.7122192382812,
      "completions/min_length": 77.0,
      "completions/min_terminated_length": 77.0,
      "epoch": 9.47594050743657,
      "grad_norm": 0.1181640625,
      "learning_rate": 1e-06,
      "loss": -0.0038,
      "num_tokens": 590094893.0,
      "reward": 0.5368303656578064,
      "reward_std": 0.2111382633447647,
      "rewards/verify_math_reward/mean": 0.5368303656578064,
      "rewards/verify_math_reward/std": 0.49892017245292664,
      "step": 1014
    },
    {
      "clip_ratio/high_max": 0.001442779456738208,
      "clip_ratio/high_mean": 0.000435021896237231,
      "clip_ratio/low_mean": 0.00024378611402653405,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006788080145270214,
      "completions/clipped_ratio": 0.0234375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3704.0,
      "completions/mean_length": 697.185302734375,
      "completions/mean_terminated_length": 615.6137084960938,
      "completions/min_length": 165.0,
      "completions/min_terminated_length": 165.0,
      "epoch": 9.485272674249051,
      "grad_norm": 0.1123046875,
      "learning_rate": 1e-06,
      "loss": -0.0019,
      "num_tokens": 590723643.0,
      "reward": 0.4441964626312256,
      "reward_std": 0.19358617067337036,
      "rewards/verify_math_reward/mean": 0.4441964328289032,
      "rewards/verify_math_reward/std": 0.49715369939804077,
      "step": 1015
    },
    {
      "clip_ratio/high_max": 0.0017190755297633586,
      "clip_ratio/high_mean": 0.0005332826183348516,
      "clip_ratio/low_mean": 0.00030512571436247526,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008384083257624297,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4025.0,
      "completions/mean_length": 586.8616333007812,
      "completions/mean_terminated_length": 527.1146850585938,
      "completions/min_length": 102.0,
      "completions/min_terminated_length": 102.0,
      "epoch": 9.494604841061534,
      "grad_norm": 0.140625,
      "learning_rate": 1e-06,
      "loss": 0.0062,
      "num_tokens": 591268103.0,
      "reward": 0.5870535969734192,
      "reward_std": 0.21298159658908844,
      "rewards/verify_math_reward/mean": 0.5870535969734192,
      "rewards/verify_math_reward/std": 0.49263834953308105,
      "step": 1016
    },
    {
      "clip_ratio/high_max": 0.0015440364350070013,
      "clip_ratio/high_mean": 0.0004342544613109567,
      "clip_ratio/low_mean": 0.0004449193831987941,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008791738518993952,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2152.0,
      "completions/mean_length": 621.7154541015625,
      "completions/mean_terminated_length": 554.5221557617188,
      "completions/min_length": 100.0,
      "completions/min_terminated_length": 100.0,
      "epoch": 9.503937007874015,
      "grad_norm": 0.130859375,
      "learning_rate": 1e-06,
      "loss": 0.0058,
      "num_tokens": 591837840.0,
      "reward": 0.5848214626312256,
      "reward_std": 0.2205638438463211,
      "rewards/verify_math_reward/mean": 0.5848214030265808,
      "rewards/verify_math_reward/std": 0.49302801489830017,
      "step": 1017
    },
    {
      "clip_ratio/high_max": 0.0016775978756413679,
      "clip_ratio/high_mean": 0.0005225780655564449,
      "clip_ratio/low_mean": 0.0004171479017713864,
      "clip_ratio/low_min": 1.0109996765095275e-05,
      "clip_ratio/region_mean": 0.0009397259718753048,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3891.0,
      "completions/mean_length": 612.9486694335938,
      "completions/mean_terminated_length": 569.656494140625,
      "completions/min_length": 128.0,
      "completions/min_terminated_length": 128.0,
      "epoch": 9.513269174686497,
      "grad_norm": 0.1357421875,
      "learning_rate": 1e-06,
      "loss": 0.0002,
      "num_tokens": 592435378.0,
      "reward": 0.5524553656578064,
      "reward_std": 0.2252180278301239,
      "rewards/verify_math_reward/mean": 0.5524553656578064,
      "rewards/verify_math_reward/std": 0.49751853942871094,
      "step": 1018
    },
    {
      "clip_ratio/high_max": 0.0016598425918346038,
      "clip_ratio/high_mean": 0.0005649558779623476,
      "clip_ratio/low_mean": 0.0004223565244956262,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009873123999568634,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4054.0,
      "completions/mean_length": 599.8627319335938,
      "completions/mean_terminated_length": 564.388916015625,
      "completions/min_length": 105.0,
      "completions/min_terminated_length": 105.0,
      "epoch": 9.52260134149898,
      "grad_norm": 0.13671875,
      "learning_rate": 1e-06,
      "loss": 0.0113,
      "num_tokens": 593017191.0,
      "reward": 0.6037946939468384,
      "reward_std": 0.24960379302501678,
      "rewards/verify_math_reward/mean": 0.6037946343421936,
      "rewards/verify_math_reward/std": 0.48938122391700745,
      "step": 1019
    },
    {
      "clip_ratio/high_max": 0.0014643544318460044,
      "clip_ratio/high_mean": 0.0003963733986438456,
      "clip_ratio/low_mean": 0.0004394810962367046,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.00083585449920065,
      "completions/clipped_ratio": 0.0200892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2703.0,
      "completions/mean_length": 644.1920166015625,
      "completions/mean_terminated_length": 573.4259643554688,
      "completions/min_length": 96.0,
      "completions/min_terminated_length": 96.0,
      "epoch": 9.531933508311461,
      "grad_norm": 0.12255859375,
      "learning_rate": 1e-06,
      "loss": 0.0033,
      "num_tokens": 593605507.0,
      "reward": 0.5033482313156128,
      "reward_std": 0.21613861620426178,
      "rewards/verify_math_reward/mean": 0.5033482313156128,
      "rewards/verify_math_reward/std": 0.5002680420875549,
      "step": 1020
    },
    {
      "clip_ratio/high_max": 0.0015964167314450606,
      "clip_ratio/high_mean": 0.00046870355231476424,
      "clip_ratio/low_mean": 0.0003091682157219111,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007778717708788463,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3500.0,
      "completions/mean_length": 618.8861694335938,
      "completions/mean_terminated_length": 551.63818359375,
      "completions/min_length": 133.0,
      "completions/min_terminated_length": 133.0,
      "epoch": 9.541265675123944,
      "grad_norm": 0.1337890625,
      "learning_rate": 1e-06,
      "loss": -0.0046,
      "num_tokens": 594178189.0,
      "reward": 0.5848214626312256,
      "reward_std": 0.19561536610126495,
      "rewards/verify_math_reward/mean": 0.5848214030265808,
      "rewards/verify_math_reward/std": 0.49302801489830017,
      "step": 1021
    },
    {
      "clip_ratio/high_max": 0.001627592717341031,
      "clip_ratio/high_mean": 0.00047343376854769303,
      "clip_ratio/low_mean": 0.00032549734771691874,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007989311216078931,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2881.0,
      "completions/mean_length": 618.0357666015625,
      "completions/mean_terminated_length": 558.819580078125,
      "completions/min_length": 44.0,
      "completions/min_terminated_length": 44.0,
      "epoch": 9.550597841936424,
      "grad_norm": 0.1240234375,
      "learning_rate": 1e-06,
      "loss": 0.0076,
      "num_tokens": 594758661.0,
      "reward": 0.582589328289032,
      "reward_std": 0.20569244027137756,
      "rewards/verify_math_reward/mean": 0.5825892686843872,
      "rewards/verify_math_reward/std": 0.493407279253006,
      "step": 1022
    },
    {
      "clip_ratio/high_max": 0.001609678109161905,
      "clip_ratio/high_mean": 0.00046538331412193656,
      "clip_ratio/low_mean": 0.000392561342209774,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000857944663039234,
      "completions/clipped_ratio": 0.0200892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3430.0,
      "completions/mean_length": 681.044677734375,
      "completions/mean_terminated_length": 611.0341796875,
      "completions/min_length": 96.0,
      "completions/min_terminated_length": 96.0,
      "epoch": 9.559930008748907,
      "grad_norm": 0.1201171875,
      "learning_rate": 1e-06,
      "loss": -0.0044,
      "num_tokens": 595396293.0,
      "reward": 0.470982164144516,
      "reward_std": 0.23150008916854858,
      "rewards/verify_math_reward/mean": 0.4709821343421936,
      "rewards/verify_math_reward/std": 0.49943602085113525,
      "step": 1023
    },
    {
      "clip_ratio/high_max": 0.0017634148316574283,
      "clip_ratio/high_mean": 0.0005043214844135946,
      "clip_ratio/low_mean": 0.0004319453153129871,
      "clip_ratio/low_min": 2.605532336019678e-05,
      "clip_ratio/region_mean": 0.0009362667951791082,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3143.0,
      "completions/mean_length": 593.3660888671875,
      "completions/mean_terminated_length": 549.8305053710938,
      "completions/min_length": 127.0,
      "completions/min_terminated_length": 127.0,
      "epoch": 9.569262175561388,
      "grad_norm": 0.13671875,
      "learning_rate": 1e-06,
      "loss": 0.0027,
      "num_tokens": 595982261.0,
      "reward": 0.5691964626312256,
      "reward_std": 0.22330942749977112,
      "rewards/verify_math_reward/mean": 0.5691964030265808,
      "rewards/verify_math_reward/std": 0.4954652488231659,
      "step": 1024
    },
    {
      "clip_ratio/high_max": 0.0016593620530329645,
      "clip_ratio/high_mean": 0.0005050667223258642,
      "clip_ratio/low_mean": 0.00026910394012702454,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007741706713204621,
      "completions/clipped_ratio": 0.0200892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3742.0,
      "completions/mean_length": 656.372802734375,
      "completions/mean_terminated_length": 585.8565063476562,
      "completions/min_length": 126.0,
      "completions/min_terminated_length": 126.0,
      "epoch": 9.57859434237387,
      "grad_norm": 0.125,
      "learning_rate": 1e-06,
      "loss": -0.0099,
      "num_tokens": 596579747.0,
      "reward": 0.582589328289032,
      "reward_std": 0.2100098878145218,
      "rewards/verify_math_reward/mean": 0.5825892686843872,
      "rewards/verify_math_reward/std": 0.493407279253006,
      "step": 1025
    },
    {
      "clip_ratio/high_max": 0.001635274834370648,
      "clip_ratio/high_mean": 0.00047569862294949417,
      "clip_ratio/low_mean": 0.00030445646814314387,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007801551028023823,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2578.0,
      "completions/mean_length": 583.4810791015625,
      "completions/mean_terminated_length": 515.54833984375,
      "completions/min_length": 143.0,
      "completions/min_terminated_length": 143.0,
      "epoch": 9.587926509186351,
      "grad_norm": 0.1298828125,
      "learning_rate": 1e-06,
      "loss": -0.0044,
      "num_tokens": 597130194.0,
      "reward": 0.6026785969734192,
      "reward_std": 0.1856241375207901,
      "rewards/verify_math_reward/mean": 0.6026785969734192,
      "rewards/verify_math_reward/std": 0.48961687088012695,
      "step": 1026
    },
    {
      "clip_ratio/high_max": 0.001661720155425428,
      "clip_ratio/high_mean": 0.0004896392042610387,
      "clip_ratio/low_mean": 0.0002725053276435574,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007621445315635356,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2047.0,
      "completions/mean_length": 605.661865234375,
      "completions/mean_terminated_length": 542.2011108398438,
      "completions/min_length": 139.0,
      "completions/min_terminated_length": 139.0,
      "epoch": 9.597258675998834,
      "grad_norm": 0.1171875,
      "learning_rate": 1e-06,
      "loss": -0.0145,
      "num_tokens": 597693315.0,
      "reward": 0.6116071939468384,
      "reward_std": 0.17870448529720306,
      "rewards/verify_math_reward/mean": 0.6116071343421936,
      "rewards/verify_math_reward/std": 0.48765692114830017,
      "step": 1027
    },
    {
      "clip_ratio/high_max": 0.0012194090950288228,
      "clip_ratio/high_mean": 0.0004011477055883006,
      "clip_ratio/low_mean": 0.00036200342162828747,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007631511352883535,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2730.0,
      "completions/mean_length": 650.7042846679688,
      "completions/mean_terminated_length": 611.8182983398438,
      "completions/min_length": 56.0,
      "completions/min_terminated_length": 56.0,
      "epoch": 9.606590842811315,
      "grad_norm": 0.1162109375,
      "learning_rate": 1e-06,
      "loss": 0.0009,
      "num_tokens": 598329818.0,
      "reward": 0.5055803656578064,
      "reward_std": 0.17930999398231506,
      "rewards/verify_math_reward/mean": 0.5055803656578064,
      "rewards/verify_math_reward/std": 0.5002480745315552,
      "step": 1028
    },
    {
      "clip_ratio/high_max": 0.0018090404009853955,
      "clip_ratio/high_mean": 0.0005827510431117844,
      "clip_ratio/low_mean": 0.00031681506106906454,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008995661037261016,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3716.0,
      "completions/mean_length": 585.2578125,
      "completions/mean_terminated_length": 529.53173828125,
      "completions/min_length": 128.0,
      "completions/min_terminated_length": 128.0,
      "epoch": 9.615923009623797,
      "grad_norm": 0.134765625,
      "learning_rate": 1e-06,
      "loss": 0.0047,
      "num_tokens": 598891633.0,
      "reward": 0.5558035969734192,
      "reward_std": 0.21447792649269104,
      "rewards/verify_math_reward/mean": 0.5558035969734192,
      "rewards/verify_math_reward/std": 0.49715369939804077,
      "step": 1029
    },
    {
      "clip_ratio/high_max": 0.0016982593861030182,
      "clip_ratio/high_mean": 0.0005398427610998624,
      "clip_ratio/low_mean": 0.0002807480625506287,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000820590821604128,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3498.0,
      "completions/mean_length": 582.9006958007812,
      "completions/mean_terminated_length": 523.0863037109375,
      "completions/min_length": 133.0,
      "completions/min_terminated_length": 133.0,
      "epoch": 9.625255176436278,
      "grad_norm": 0.1279296875,
      "learning_rate": 1e-06,
      "loss": 0.0013,
      "num_tokens": 599437824.0,
      "reward": 0.6238839626312256,
      "reward_std": 0.194227397441864,
      "rewards/verify_math_reward/mean": 0.6238839030265808,
      "rewards/verify_math_reward/std": 0.48468026518821716,
      "step": 1030
    },
    {
      "clip_ratio/high_max": 0.0013905311316193547,
      "clip_ratio/high_mean": 0.00043191946792831004,
      "clip_ratio/low_mean": 0.000325700002576923,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007576194821012905,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2730.0,
      "completions/mean_length": 630.625,
      "completions/mean_terminated_length": 563.6040649414062,
      "completions/min_length": 115.0,
      "completions/min_terminated_length": 115.0,
      "epoch": 9.63458734324876,
      "grad_norm": 0.12255859375,
      "learning_rate": 1e-06,
      "loss": 0.0026,
      "num_tokens": 600022896.0,
      "reward": 0.5267857313156128,
      "reward_std": 0.18599504232406616,
      "rewards/verify_math_reward/mean": 0.5267857313156128,
      "rewards/verify_math_reward/std": 0.4995608627796173,
      "step": 1031
    },
    {
      "clip_ratio/high_max": 0.001653968316531973,
      "clip_ratio/high_mean": 0.0005212303149164654,
      "clip_ratio/low_mean": 0.0003065186374442419,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008277489523607073,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3873.0,
      "completions/mean_length": 607.5636596679688,
      "completions/mean_terminated_length": 576.1362915039062,
      "completions/min_length": 5.0,
      "completions/min_terminated_length": 5.0,
      "epoch": 9.643919510061242,
      "grad_norm": 0.1298828125,
      "learning_rate": 1e-06,
      "loss": 0.0113,
      "num_tokens": 600628745.0,
      "reward": 0.5524553656578064,
      "reward_std": 0.21098490059375763,
      "rewards/verify_math_reward/mean": 0.5524553656578064,
      "rewards/verify_math_reward/std": 0.49751850962638855,
      "step": 1032
    },
    {
      "clip_ratio/high_max": 0.0017462572832300793,
      "clip_ratio/high_mean": 0.0005433238966361387,
      "clip_ratio/low_mean": 0.0003435649302900856,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008868888226061244,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2331.0,
      "completions/mean_length": 576.5881958007812,
      "completions/mean_terminated_length": 540.8782348632812,
      "completions/min_length": 131.0,
      "completions/min_terminated_length": 131.0,
      "epoch": 9.653251676873724,
      "grad_norm": 0.1337890625,
      "learning_rate": 1e-06,
      "loss": -0.0004,
      "num_tokens": 601192448.0,
      "reward": 0.6071428656578064,
      "reward_std": 0.19945567846298218,
      "rewards/verify_math_reward/mean": 0.6071428656578064,
      "rewards/verify_math_reward/std": 0.48865827918052673,
      "step": 1033
    },
    {
      "clip_ratio/high_max": 0.002208570349466754,
      "clip_ratio/high_mean": 0.0006456002129198168,
      "clip_ratio/low_mean": 0.00037567671222404897,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001021276943902194,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2319.0,
      "completions/mean_length": 627.2824096679688,
      "completions/mean_terminated_length": 576.2140502929688,
      "completions/min_length": 114.0,
      "completions/min_terminated_length": 114.0,
      "epoch": 9.662583843686207,
      "grad_norm": 0.142578125,
      "learning_rate": 1e-06,
      "loss": -0.0017,
      "num_tokens": 601790869.0,
      "reward": 0.578125,
      "reward_std": 0.237140953540802,
      "rewards/verify_math_reward/mean": 0.578125,
      "rewards/verify_math_reward/std": 0.4941346049308777,
      "step": 1034
    },
    {
      "clip_ratio/high_max": 0.0013230712147560553,
      "clip_ratio/high_mean": 0.0003571682145775412,
      "clip_ratio/low_mean": 0.0003271263526585244,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006842945576863713,
      "completions/clipped_ratio": 0.005580357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3915.0,
      "completions/mean_length": 555.6529541015625,
      "completions/mean_terminated_length": 535.78564453125,
      "completions/min_length": 124.0,
      "completions/min_terminated_length": 124.0,
      "epoch": 9.671916010498688,
      "grad_norm": 0.11572265625,
      "learning_rate": 1e-06,
      "loss": 0.0106,
      "num_tokens": 602353366.0,
      "reward": 0.582589328289032,
      "reward_std": 0.18392983078956604,
      "rewards/verify_math_reward/mean": 0.5825892686843872,
      "rewards/verify_math_reward/std": 0.493407279253006,
      "step": 1035
    },
    {
      "clip_ratio/high_max": 0.0016704393565305509,
      "clip_ratio/high_mean": 0.0005685832229573862,
      "clip_ratio/low_mean": 0.00031748412220622413,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008860673483468418,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2530.0,
      "completions/mean_length": 581.5011596679688,
      "completions/mean_terminated_length": 545.8410034179688,
      "completions/min_length": 141.0,
      "completions/min_terminated_length": 141.0,
      "epoch": 9.68124817731117,
      "grad_norm": 0.1435546875,
      "learning_rate": 1e-06,
      "loss": 0.0152,
      "num_tokens": 602926519.0,
      "reward": 0.5837053656578064,
      "reward_std": 0.22740861773490906,
      "rewards/verify_math_reward/mean": 0.5837053656578064,
      "rewards/verify_math_reward/std": 0.49321895837783813,
      "step": 1036
    },
    {
      "clip_ratio/high_max": 0.002119047327141743,
      "clip_ratio/high_mean": 0.0006555687136824417,
      "clip_ratio/low_mean": 0.0003680266057699555,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010235953250230523,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3265.0,
      "completions/mean_length": 632.6920166015625,
      "completions/mean_terminated_length": 569.7227172851562,
      "completions/min_length": 152.0,
      "completions/min_terminated_length": 152.0,
      "epoch": 9.690580344123651,
      "grad_norm": 0.1328125,
      "learning_rate": 1e-06,
      "loss": -0.0045,
      "num_tokens": 603507147.0,
      "reward": 0.5758928656578064,
      "reward_std": 0.2413061559200287,
      "rewards/verify_math_reward/mean": 0.5758928656578064,
      "rewards/verify_math_reward/std": 0.49448272585868835,
      "step": 1037
    },
    {
      "clip_ratio/high_max": 0.0015943269299896201,
      "clip_ratio/high_mean": 0.00052203596135314,
      "clip_ratio/low_mean": 0.000388023648156377,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009100595943891676,
      "completions/clipped_ratio": 0.0200892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1912.0,
      "completions/mean_length": 632.4609375,
      "completions/mean_terminated_length": 561.4544677734375,
      "completions/min_length": 5.0,
      "completions/min_terminated_length": 5.0,
      "epoch": 9.699912510936134,
      "grad_norm": 0.134765625,
      "learning_rate": 1e-06,
      "loss": 0.0005,
      "num_tokens": 604083768.0,
      "reward": 0.543526828289032,
      "reward_std": 0.23830027878284454,
      "rewards/verify_math_reward/mean": 0.5435267686843872,
      "rewards/verify_math_reward/std": 0.49838000535964966,
      "step": 1038
    },
    {
      "clip_ratio/high_max": 0.0015890000067884102,
      "clip_ratio/high_mean": 0.0004911664696010121,
      "clip_ratio/low_mean": 0.0002455752407968248,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007367417165369261,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3680.0,
      "completions/mean_length": 605.0558471679688,
      "completions/mean_terminated_length": 545.61865234375,
      "completions/min_length": 112.0,
      "completions/min_terminated_length": 112.0,
      "epoch": 9.709244677748615,
      "grad_norm": 0.1337890625,
      "learning_rate": 1e-06,
      "loss": 0.0083,
      "num_tokens": 604653770.0,
      "reward": 0.5613839626312256,
      "reward_std": 0.19681817293167114,
      "rewards/verify_math_reward/mean": 0.5613839030265808,
      "rewards/verify_math_reward/std": 0.496494859457016,
      "step": 1039
    },
    {
      "clip_ratio/high_max": 0.0017141320331575116,
      "clip_ratio/high_mean": 0.0005748283646198615,
      "clip_ratio/low_mean": 0.0002959078447020147,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008707362076165737,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2506.0,
      "completions/mean_length": 620.1663208007812,
      "completions/mean_terminated_length": 560.9863891601562,
      "completions/min_length": 144.0,
      "completions/min_terminated_length": 144.0,
      "epoch": 9.718576844561097,
      "grad_norm": 0.125,
      "learning_rate": 1e-06,
      "loss": -0.0123,
      "num_tokens": 605233159.0,
      "reward": 0.5424107313156128,
      "reward_std": 0.20621420443058014,
      "rewards/verify_math_reward/mean": 0.5424107313156128,
      "rewards/verify_math_reward/std": 0.4984763264656067,
      "step": 1040
    },
    {
      "clip_ratio/high_max": 0.0018559827494755154,
      "clip_ratio/high_mean": 0.0005467600085466984,
      "clip_ratio/low_mean": 0.00037965662920669274,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.00092641663923132,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3292.0,
      "completions/mean_length": 539.8772583007812,
      "completions/mean_terminated_length": 495.6768493652344,
      "completions/min_length": 145.0,
      "completions/min_terminated_length": 145.0,
      "epoch": 9.727909011373578,
      "grad_norm": 0.1455078125,
      "learning_rate": 1e-06,
      "loss": 0.0078,
      "num_tokens": 605762393.0,
      "reward": 0.5535714626312256,
      "reward_std": 0.22620722651481628,
      "rewards/verify_math_reward/mean": 0.5535714030265808,
      "rewards/verify_math_reward/std": 0.4973995089530945,
      "step": 1041
    },
    {
      "clip_ratio/high_max": 0.0014845919586150558,
      "clip_ratio/high_mean": 0.0005118056769788382,
      "clip_ratio/low_mean": 0.0003898650722931052,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009016707490445697,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3428.0,
      "completions/mean_length": 602.75,
      "completions/mean_terminated_length": 539.236328125,
      "completions/min_length": 92.0,
      "completions/min_terminated_length": 92.0,
      "epoch": 9.73724117818606,
      "grad_norm": 0.12890625,
      "learning_rate": 1e-06,
      "loss": 0.0048,
      "num_tokens": 606319305.0,
      "reward": 0.613839328289032,
      "reward_std": 0.22969591617584229,
      "rewards/verify_math_reward/mean": 0.6138392686843872,
      "rewards/verify_math_reward/std": 0.48714008927345276,
      "step": 1042
    },
    {
      "clip_ratio/high_max": 0.0014428102840611245,
      "clip_ratio/high_mean": 0.0004627575365248049,
      "clip_ratio/low_mean": 0.00044080607949581463,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009035636321641505,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3593.0,
      "completions/mean_length": 592.021240234375,
      "completions/mean_terminated_length": 532.3621215820312,
      "completions/min_length": 56.0,
      "completions/min_terminated_length": 56.0,
      "epoch": 9.746573344998541,
      "grad_norm": 0.130859375,
      "learning_rate": 1e-06,
      "loss": 0.0086,
      "num_tokens": 606875244.0,
      "reward": 0.5502232313156128,
      "reward_std": 0.20434018969535828,
      "rewards/verify_math_reward/mean": 0.5502232313156128,
      "rewards/verify_math_reward/std": 0.49774909019470215,
      "step": 1043
    },
    {
      "clip_ratio/high_max": 0.0017396485473000212,
      "clip_ratio/high_mean": 0.0005491690124017623,
      "clip_ratio/low_mean": 0.00039158352706181176,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009407525503775105,
      "completions/clipped_ratio": 0.0279017857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3378.0,
      "completions/mean_length": 671.0748291015625,
      "completions/mean_terminated_length": 572.7703857421875,
      "completions/min_length": 110.0,
      "completions/min_terminated_length": 110.0,
      "epoch": 9.755905511811024,
      "grad_norm": 0.1376953125,
      "learning_rate": 1e-06,
      "loss": -0.0025,
      "num_tokens": 607459855.0,
      "reward": 0.566964328289032,
      "reward_std": 0.25156474113464355,
      "rewards/verify_math_reward/mean": 0.5669642686843872,
      "rewards/verify_math_reward/std": 0.49577224254608154,
      "step": 1044
    },
    {
      "clip_ratio/high_max": 0.0016653785951348254,
      "clip_ratio/high_mean": 0.0005270020892567118,
      "clip_ratio/low_mean": 0.0002509757468942553,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007779778234180412,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3713.0,
      "completions/mean_length": 579.2567138671875,
      "completions/mean_terminated_length": 531.5181274414062,
      "completions/min_length": 13.0,
      "completions/min_terminated_length": 13.0,
      "epoch": 9.765237678623505,
      "grad_norm": 0.12353515625,
      "learning_rate": 1e-06,
      "loss": 0.0088,
      "num_tokens": 608004685.0,
      "reward": 0.6194196939468384,
      "reward_std": 0.18074241280555725,
      "rewards/verify_math_reward/mean": 0.6194196343421936,
      "rewards/verify_math_reward/std": 0.48580074310302734,
      "step": 1045
    },
    {
      "clip_ratio/high_max": 0.0014723056883667596,
      "clip_ratio/high_mean": 0.00041872708015944227,
      "clip_ratio/low_mean": 0.00039844727859872364,
      "clip_ratio/low_min": 1.3481449968821835e-05,
      "clip_ratio/region_mean": 0.0008171743629645789,
      "completions/clipped_ratio": 0.0200892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3227.0,
      "completions/mean_length": 684.450927734375,
      "completions/mean_terminated_length": 614.51025390625,
      "completions/min_length": 140.0,
      "completions/min_terminated_length": 140.0,
      "epoch": 9.774569845435988,
      "grad_norm": 0.115234375,
      "learning_rate": 1e-06,
      "loss": 0.0116,
      "num_tokens": 608638521.0,
      "reward": 0.5178571939468384,
      "reward_std": 0.20568355917930603,
      "rewards/verify_math_reward/mean": 0.5178571343421936,
      "rewards/verify_math_reward/std": 0.4999600946903229,
      "step": 1046
    },
    {
      "clip_ratio/high_max": 0.001494511433520529,
      "clip_ratio/high_mean": 0.00046225893277096475,
      "clip_ratio/low_mean": 0.0003379497046580582,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000800208629698318,
      "completions/clipped_ratio": 0.0200892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3895.0,
      "completions/mean_length": 665.216552734375,
      "completions/mean_terminated_length": 594.881591796875,
      "completions/min_length": 178.0,
      "completions/min_terminated_length": 178.0,
      "epoch": 9.783902012248468,
      "grad_norm": 0.1240234375,
      "learning_rate": 1e-06,
      "loss": 0.0093,
      "num_tokens": 609258235.0,
      "reward": 0.5245535969734192,
      "reward_std": 0.20234207808971405,
      "rewards/verify_math_reward/mean": 0.5245535969734192,
      "rewards/verify_math_reward/std": 0.4996756613254547,
      "step": 1047
    },
    {
      "clip_ratio/high_max": 0.0017511433034087531,
      "clip_ratio/high_mean": 0.00048649705445313884,
      "clip_ratio/low_mean": 0.0002581315509360138,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000744628598113195,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4000.0,
      "completions/mean_length": 602.1629638671875,
      "completions/mean_terminated_length": 558.7367553710938,
      "completions/min_length": 98.0,
      "completions/min_terminated_length": 98.0,
      "epoch": 9.793234179060951,
      "grad_norm": 0.1337890625,
      "learning_rate": 1e-06,
      "loss": 0.002,
      "num_tokens": 609846141.0,
      "reward": 0.5647321939468384,
      "reward_std": 0.19125469028949738,
      "rewards/verify_math_reward/mean": 0.5647321343421936,
      "rewards/verify_math_reward/std": 0.49606892466545105,
      "step": 1048
    },
    {
      "clip_ratio/high_max": 0.0016136951999214943,
      "clip_ratio/high_mean": 0.0004358586422767985,
      "clip_ratio/low_mean": 0.0003786776679817194,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008145363053699839,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2354.0,
      "completions/mean_length": 631.857177734375,
      "completions/mean_terminated_length": 572.8762817382812,
      "completions/min_length": 129.0,
      "completions/min_terminated_length": 129.0,
      "epoch": 9.802566345873432,
      "grad_norm": 0.125,
      "learning_rate": 1e-06,
      "loss": 0.007,
      "num_tokens": 610438685.0,
      "reward": 0.4888392984867096,
      "reward_std": 0.203317791223526,
      "rewards/verify_math_reward/mean": 0.4888392984867096,
      "rewards/verify_math_reward/std": 0.5001546144485474,
      "step": 1049
    },
    {
      "clip_ratio/high_max": 0.0016718643801141297,
      "clip_ratio/high_mean": 0.0005195867747715965,
      "clip_ratio/low_mean": 0.0003522919473653019,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008718787289581087,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2635.0,
      "completions/mean_length": 589.9342041015625,
      "completions/mean_terminated_length": 542.3405151367188,
      "completions/min_length": 18.0,
      "completions/min_terminated_length": 18.0,
      "epoch": 9.811898512685914,
      "grad_norm": 0.126953125,
      "learning_rate": 1e-06,
      "loss": 0.0047,
      "num_tokens": 611005954.0,
      "reward": 0.625,
      "reward_std": 0.21571576595306396,
      "rewards/verify_math_reward/mean": 0.625,
      "rewards/verify_math_reward/std": 0.48439329862594604,
      "step": 1050
    },
    {
      "clip_ratio/high_max": 0.0016855701924214372,
      "clip_ratio/high_mean": 0.0005488282470196282,
      "clip_ratio/low_mean": 0.0003581058080044386,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009069340544556326,
      "completions/clipped_ratio": 0.021205357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3349.0,
      "completions/mean_length": 633.0178833007812,
      "completions/mean_terminated_length": 557.9931640625,
      "completions/min_length": 14.0,
      "completions/min_terminated_length": 14.0,
      "epoch": 9.821230679498395,
      "grad_norm": 0.1337890625,
      "learning_rate": 1e-06,
      "loss": -0.0184,
      "num_tokens": 611575882.0,
      "reward": 0.5613839626312256,
      "reward_std": 0.2335277795791626,
      "rewards/verify_math_reward/mean": 0.5613839030265808,
      "rewards/verify_math_reward/std": 0.496494859457016,
      "step": 1051
    },
    {
      "clip_ratio/high_max": 0.0014498521923087537,
      "clip_ratio/high_mean": 0.00042809664171272743,
      "clip_ratio/low_mean": 0.00034096496324309555,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007690615902902209,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3234.0,
      "completions/mean_length": 664.0245971679688,
      "completions/mean_terminated_length": 609.5487670898438,
      "completions/min_length": 98.0,
      "completions/min_terminated_length": 98.0,
      "epoch": 9.830562846310878,
      "grad_norm": 0.126953125,
      "learning_rate": 1e-06,
      "loss": -0.0032,
      "num_tokens": 612207752.0,
      "reward": 0.5011160969734192,
      "reward_std": 0.21752727031707764,
      "rewards/verify_math_reward/mean": 0.5011160969734192,
      "rewards/verify_math_reward/std": 0.5002779960632324,
      "step": 1052
    },
    {
      "clip_ratio/high_max": 0.0016368181459256448,
      "clip_ratio/high_mean": 0.0004629036884580273,
      "clip_ratio/low_mean": 0.00030241855756685254,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007653222401131643,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2825.0,
      "completions/mean_length": 606.2042846679688,
      "completions/mean_terminated_length": 554.8255615234375,
      "completions/min_length": 79.0,
      "completions/min_terminated_length": 79.0,
      "epoch": 9.83989501312336,
      "grad_norm": 0.1328125,
      "learning_rate": 1e-06,
      "loss": 0.0012,
      "num_tokens": 612780503.0,
      "reward": 0.5691964626312256,
      "reward_std": 0.20493286848068237,
      "rewards/verify_math_reward/mean": 0.5691964030265808,
      "rewards/verify_math_reward/std": 0.4954652786254883,
      "step": 1053
    },
    {
      "clip_ratio/high_max": 0.0015261678454407956,
      "clip_ratio/high_mean": 0.00044068275667541457,
      "clip_ratio/low_mean": 0.00036278312018112047,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008034658958422369,
      "completions/clipped_ratio": 0.0267857142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3330.0,
      "completions/mean_length": 647.3783569335938,
      "completions/mean_terminated_length": 552.462158203125,
      "completions/min_length": 147.0,
      "completions/min_terminated_length": 147.0,
      "epoch": 9.849227179935841,
      "grad_norm": 0.130859375,
      "learning_rate": 1e-06,
      "loss": -0.0112,
      "num_tokens": 613352306.0,
      "reward": 0.4921875298023224,
      "reward_std": 0.21673452854156494,
      "rewards/verify_math_reward/mean": 0.4921875,
      "rewards/verify_math_reward/std": 0.5002182126045227,
      "step": 1054
    },
    {
      "clip_ratio/high_max": 0.0015844178733459557,
      "clip_ratio/high_mean": 0.0004496640008255781,
      "clip_ratio/low_mean": 0.0002630391011280153,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007127031035452092,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3781.0,
      "completions/mean_length": 610.0859375,
      "completions/mean_terminated_length": 550.7344360351562,
      "completions/min_length": 118.0,
      "completions/min_terminated_length": 118.0,
      "epoch": 9.858559346748324,
      "grad_norm": 0.11474609375,
      "learning_rate": 1e-06,
      "loss": 0.0075,
      "num_tokens": 613939655.0,
      "reward": 0.5535714626312256,
      "reward_std": 0.18193750083446503,
      "rewards/verify_math_reward/mean": 0.5535714030265808,
      "rewards/verify_math_reward/std": 0.4973994493484497,
      "step": 1055
    },
    {
      "clip_ratio/high_max": 0.0013832466383973951,
      "clip_ratio/high_mean": 0.0003877047243463494,
      "clip_ratio/low_mean": 0.0002934871680508877,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006811918874518597,
      "completions/clipped_ratio": 0.0279017857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3738.0,
      "completions/mean_length": 685.935302734375,
      "completions/mean_terminated_length": 588.057373046875,
      "completions/min_length": 133.0,
      "completions/min_terminated_length": 133.0,
      "epoch": 9.867891513560805,
      "grad_norm": 0.11474609375,
      "learning_rate": 1e-06,
      "loss": 0.0147,
      "num_tokens": 614546061.0,
      "reward": 0.5145089626312256,
      "reward_std": 0.1807435303926468,
      "rewards/verify_math_reward/mean": 0.5145089030265808,
      "rewards/verify_math_reward/std": 0.5000685453414917,
      "step": 1056
    },
    {
      "clip_ratio/high_max": 0.0016222526155615924,
      "clip_ratio/high_mean": 0.0005341903965927486,
      "clip_ratio/low_mean": 0.0002644018669570869,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007985922597981698,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4052.0,
      "completions/mean_length": 630.232177734375,
      "completions/mean_terminated_length": 583.185546875,
      "completions/min_length": 70.0,
      "completions/min_terminated_length": 70.0,
      "epoch": 9.877223680373287,
      "grad_norm": 0.1279296875,
      "learning_rate": 1e-06,
      "loss": 0.0067,
      "num_tokens": 615148701.0,
      "reward": 0.5412946939468384,
      "reward_std": 0.2181655615568161,
      "rewards/verify_math_reward/mean": 0.5412946343421936,
      "rewards/verify_math_reward/std": 0.49857014417648315,
      "step": 1057
    },
    {
      "clip_ratio/high_max": 0.001589798277564114,
      "clip_ratio/high_mean": 0.0005022087107136031,
      "clip_ratio/low_mean": 0.00036527433803712483,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008674830378367915,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3523.0,
      "completions/mean_length": 629.8873291015625,
      "completions/mean_terminated_length": 570.8729248046875,
      "completions/min_length": 121.0,
      "completions/min_terminated_length": 121.0,
      "epoch": 9.886555847185768,
      "grad_norm": 0.1357421875,
      "learning_rate": 1e-06,
      "loss": 0.0004,
      "num_tokens": 615745488.0,
      "reward": 0.5223214626312256,
      "reward_std": 0.22608567774295807,
      "rewards/verify_math_reward/mean": 0.5223214030265808,
      "rewards/verify_math_reward/std": 0.49978047609329224,
      "step": 1058
    },
    {
      "clip_ratio/high_max": 0.0015174525497059221,
      "clip_ratio/high_mean": 0.000393879813032072,
      "clip_ratio/low_mean": 0.00024093854619877675,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006348183455884282,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2036.0,
      "completions/mean_length": 625.7902221679688,
      "completions/mean_terminated_length": 546.5616455078125,
      "completions/min_length": 123.0,
      "completions/min_terminated_length": 123.0,
      "epoch": 9.89588801399825,
      "grad_norm": 0.12060546875,
      "learning_rate": 1e-06,
      "loss": 0.0065,
      "num_tokens": 616310788.0,
      "reward": 0.5178571939468384,
      "reward_std": 0.17911633849143982,
      "rewards/verify_math_reward/mean": 0.5178571343421936,
      "rewards/verify_math_reward/std": 0.4999600946903229,
      "step": 1059
    },
    {
      "clip_ratio/high_max": 0.0015828846571821487,
      "clip_ratio/high_mean": 0.0004305865841160994,
      "clip_ratio/low_mean": 0.00028942746791926766,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007200140516943065,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2633.0,
      "completions/mean_length": 570.78125,
      "completions/mean_terminated_length": 522.9276123046875,
      "completions/min_length": 101.0,
      "completions/min_terminated_length": 101.0,
      "epoch": 9.905220180810732,
      "grad_norm": 0.12060546875,
      "learning_rate": 1e-06,
      "loss": -0.004,
      "num_tokens": 616859240.0,
      "reward": 0.5993303656578064,
      "reward_std": 0.17149020731449127,
      "rewards/verify_math_reward/mean": 0.5993303656578064,
      "rewards/verify_math_reward/std": 0.49030786752700806,
      "step": 1060
    },
    {
      "clip_ratio/high_max": 0.0017359167850372614,
      "clip_ratio/high_mean": 0.0005204638389386673,
      "clip_ratio/low_mean": 0.00037651833508789423,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008969821874416084,
      "completions/clipped_ratio": 0.004464285714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2550.0,
      "completions/mean_length": 503.8035888671875,
      "completions/mean_terminated_length": 487.6950988769531,
      "completions/min_length": 135.0,
      "completions/min_terminated_length": 135.0,
      "epoch": 9.914552347623214,
      "grad_norm": 0.1357421875,
      "learning_rate": 1e-06,
      "loss": 0.0121,
      "num_tokens": 617374888.0,
      "reward": 0.6428571939468384,
      "reward_std": 0.20891515910625458,
      "rewards/verify_math_reward/mean": 0.6428571343421936,
      "rewards/verify_math_reward/std": 0.47942501306533813,
      "step": 1061
    },
    {
      "clip_ratio/high_max": 0.001725457988868584,
      "clip_ratio/high_mean": 0.000477585521821311,
      "clip_ratio/low_mean": 0.00038720311158613185,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008647886463677423,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3222.0,
      "completions/mean_length": 630.3125,
      "completions/mean_terminated_length": 567.2999877929688,
      "completions/min_length": 89.0,
      "completions/min_terminated_length": 89.0,
      "epoch": 9.923884514435695,
      "grad_norm": 0.123046875,
      "learning_rate": 1e-06,
      "loss": -0.0018,
      "num_tokens": 617962968.0,
      "reward": 0.5658482313156128,
      "reward_std": 0.19922491908073425,
      "rewards/verify_math_reward/mean": 0.5658482313156128,
      "rewards/verify_math_reward/std": 0.49592188000679016,
      "step": 1062
    },
    {
      "clip_ratio/high_max": 0.0019836470783047844,
      "clip_ratio/high_mean": 0.0005347727078515163,
      "clip_ratio/low_mean": 0.0002900128150713499,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008247855193985743,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3111.0,
      "completions/mean_length": 564.9408569335938,
      "completions/mean_terminated_length": 508.8923034667969,
      "completions/min_length": 70.0,
      "completions/min_terminated_length": 70.0,
      "epoch": 9.933216681248178,
      "grad_norm": 0.1337890625,
      "learning_rate": 1e-06,
      "loss": -0.0079,
      "num_tokens": 618507411.0,
      "reward": 0.543526828289032,
      "reward_std": 0.2126345932483673,
      "rewards/verify_math_reward/mean": 0.5435267686843872,
      "rewards/verify_math_reward/std": 0.49838000535964966,
      "step": 1063
    },
    {
      "clip_ratio/high_max": 0.0014982956236053724,
      "clip_ratio/high_mean": 0.0005473875917232363,
      "clip_ratio/low_mean": 0.00041467652863502735,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009620641203582636,
      "completions/clipped_ratio": 0.006696428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2453.0,
      "completions/mean_length": 631.1049194335938,
      "completions/mean_terminated_length": 607.74609375,
      "completions/min_length": 16.0,
      "completions/min_terminated_length": 16.0,
      "epoch": 9.942548848060659,
      "grad_norm": 0.140625,
      "learning_rate": 1e-06,
      "loss": 0.0021,
      "num_tokens": 619128145.0,
      "reward": 0.5680803656578064,
      "reward_std": 0.2296210527420044,
      "rewards/verify_math_reward/mean": 0.5680803656578064,
      "rewards/verify_math_reward/std": 0.4956200420856476,
      "step": 1064
    },
    {
      "clip_ratio/high_max": 0.0017813220674725017,
      "clip_ratio/high_mean": 0.0005317763999528324,
      "clip_ratio/low_mean": 0.0003509698141215267,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008827462388580898,
      "completions/clipped_ratio": 0.021205357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2415.0,
      "completions/mean_length": 655.536865234375,
      "completions/mean_terminated_length": 581.0,
      "completions/min_length": 132.0,
      "completions/min_terminated_length": 132.0,
      "epoch": 9.951881014873141,
      "grad_norm": 0.12060546875,
      "learning_rate": 1e-06,
      "loss": -0.0125,
      "num_tokens": 619729586.0,
      "reward": 0.5703125,
      "reward_std": 0.2108672559261322,
      "rewards/verify_math_reward/mean": 0.5703125,
      "rewards/verify_math_reward/std": 0.49530795216560364,
      "step": 1065
    },
    {
      "clip_ratio/high_max": 0.001583902850143204,
      "clip_ratio/high_mean": 0.0004801422760465357,
      "clip_ratio/low_mean": 0.0002936088548040061,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007737511232335237,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3959.0,
      "completions/mean_length": 589.4564819335938,
      "completions/mean_terminated_length": 549.8792724609375,
      "completions/min_length": 85.0,
      "completions/min_terminated_length": 85.0,
      "epoch": 9.961213181685622,
      "grad_norm": 0.12451171875,
      "learning_rate": 1e-06,
      "loss": 0.0126,
      "num_tokens": 620307955.0,
      "reward": 0.5323660969734192,
      "reward_std": 0.2038070261478424,
      "rewards/verify_math_reward/mean": 0.5323660969734192,
      "rewards/verify_math_reward/std": 0.4992299973964691,
      "step": 1066
    },
    {
      "clip_ratio/high_max": 0.001739679495585733,
      "clip_ratio/high_mean": 0.0004959868065270712,
      "clip_ratio/low_mean": 0.0004510337030296796,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009470204950048355,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4001.0,
      "completions/mean_length": 601.6998291015625,
      "completions/mean_terminated_length": 554.265869140625,
      "completions/min_length": 87.0,
      "completions/min_terminated_length": 87.0,
      "epoch": 9.970545348498105,
      "grad_norm": 0.134765625,
      "learning_rate": 1e-06,
      "loss": 0.0019,
      "num_tokens": 620890710.0,
      "reward": 0.5301339626312256,
      "reward_std": 0.24221837520599365,
      "rewards/verify_math_reward/mean": 0.5301339030265808,
      "rewards/verify_math_reward/std": 0.49936985969543457,
      "step": 1067
    },
    {
      "clip_ratio/high_max": 0.0015477416945941513,
      "clip_ratio/high_mean": 0.0004594975466716278,
      "clip_ratio/low_mean": 0.00027450891491298535,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007340064667005208,
      "completions/clipped_ratio": 0.021205357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3739.0,
      "completions/mean_length": 631.6027221679688,
      "completions/mean_terminated_length": 556.5473022460938,
      "completions/min_length": 132.0,
      "completions/min_terminated_length": 132.0,
      "epoch": 9.979877515310585,
      "grad_norm": 0.1220703125,
      "learning_rate": 1e-06,
      "loss": 0.0068,
      "num_tokens": 621457746.0,
      "reward": 0.6171875,
      "reward_std": 0.19564956426620483,
      "rewards/verify_math_reward/mean": 0.6171875,
      "rewards/verify_math_reward/std": 0.4863446056842804,
      "step": 1068
    },
    {
      "clip_ratio/high_max": 0.0014885582504575723,
      "clip_ratio/high_mean": 0.0004587147298025229,
      "clip_ratio/low_mean": 0.00030915980028112244,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007678745264456666,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3943.0,
      "completions/mean_length": 626.5413208007812,
      "completions/mean_terminated_length": 559.44140625,
      "completions/min_length": 113.0,
      "completions/min_terminated_length": 113.0,
      "epoch": 9.989209682123068,
      "grad_norm": 0.12109375,
      "learning_rate": 1e-06,
      "loss": 0.0041,
      "num_tokens": 622053583.0,
      "reward": 0.5189732313156128,
      "reward_std": 0.19389109313488007,
      "rewards/verify_math_reward/mean": 0.5189732313156128,
      "rewards/verify_math_reward/std": 0.49991893768310547,
      "step": 1069
    },
    {
      "clip_ratio/high_max": 0.001736123401315126,
      "clip_ratio/high_mean": 0.0005841668562425184,
      "clip_ratio/low_mean": 0.00036166243921798014,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009458292834096937,
      "completions/clipped_ratio": 0.017045454545454586,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2381.0,
      "completions/mean_length": 641.0227661132812,
      "completions/mean_terminated_length": 581.1098022460938,
      "completions/min_length": 171.0,
      "completions/min_terminated_length": 171.0,
      "epoch": 9.998541848935549,
      "grad_norm": 0.12353515625,
      "learning_rate": 1e-06,
      "loss": -0.0079,
      "num_tokens": 622652831.0,
      "reward": 0.5412946939468384,
      "reward_std": 0.23412367701530457,
      "rewards/verify_math_reward/mean": 0.5412946343421936,
      "rewards/verify_math_reward/std": 0.49857014417648315,
      "step": 1070
    },
    {
      "clip_ratio/high_max": 0.001919582125992747,
      "clip_ratio/high_mean": 0.0005472912157529208,
      "clip_ratio/low_mean": 0.0003837442050098616,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009310354244007613,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3619.0,
      "completions/mean_length": 613.8114013671875,
      "completions/mean_terminated_length": 554.5233154296875,
      "completions/min_length": 159.0,
      "completions/min_terminated_length": 159.0,
      "epoch": 10.009332166812483,
      "grad_norm": 0.1318359375,
      "learning_rate": 1e-06,
      "loss": 0.0059,
      "num_tokens": 623233870.0,
      "reward": 0.5,
      "reward_std": 0.23165123164653778,
      "rewards/verify_math_reward/mean": 0.5,
      "rewards/verify_math_reward/std": 0.5002792477607727,
      "step": 1071
    },
    {
      "clip_ratio/high_max": 0.001850129976446624,
      "clip_ratio/high_mean": 0.0005400942072810722,
      "clip_ratio/low_mean": 0.0002552113480760454,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007953055492180283,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3605.0,
      "completions/mean_length": 619.8326416015625,
      "completions/mean_terminated_length": 580.5982055664062,
      "completions/min_length": 150.0,
      "completions/min_terminated_length": 150.0,
      "epoch": 10.018664333624963,
      "grad_norm": 0.1298828125,
      "learning_rate": 1e-06,
      "loss": -0.0055,
      "num_tokens": 623841688.0,
      "reward": 0.520089328289032,
      "reward_std": 0.22718150913715363,
      "rewards/verify_math_reward/mean": 0.5200892686843872,
      "rewards/verify_math_reward/std": 0.4998753070831299,
      "step": 1072
    },
    {
      "clip_ratio/high_max": 0.001637823027522245,
      "clip_ratio/high_mean": 0.0004614952532620009,
      "clip_ratio/low_mean": 0.00027986235090793343,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007413576065573579,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2409.0,
      "completions/mean_length": 624.794677734375,
      "completions/mean_terminated_length": 557.6609497070312,
      "completions/min_length": 100.0,
      "completions/min_terminated_length": 100.0,
      "epoch": 10.027996500437446,
      "grad_norm": 0.1279296875,
      "learning_rate": 1e-06,
      "loss": 0.0012,
      "num_tokens": 624422800.0,
      "reward": 0.5368303656578064,
      "reward_std": 0.20564965903759003,
      "rewards/verify_math_reward/mean": 0.5368303656578064,
      "rewards/verify_math_reward/std": 0.49892017245292664,
      "step": 1073
    },
    {
      "clip_ratio/high_max": 0.0015059621509863064,
      "clip_ratio/high_mean": 0.0004688872477345285,
      "clip_ratio/low_mean": 0.0004503274567468907,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009192147099383874,
      "completions/clipped_ratio": 0.0267857142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4074.0,
      "completions/mean_length": 656.4553833007812,
      "completions/mean_terminated_length": 561.7889404296875,
      "completions/min_length": 136.0,
      "completions/min_terminated_length": 136.0,
      "epoch": 10.037328667249927,
      "grad_norm": 0.1318359375,
      "learning_rate": 1e-06,
      "loss": 0.0077,
      "num_tokens": 624998632.0,
      "reward": 0.5267857313156128,
      "reward_std": 0.21286143362522125,
      "rewards/verify_math_reward/mean": 0.5267857313156128,
      "rewards/verify_math_reward/std": 0.4995608329772949,
      "step": 1074
    },
    {
      "clip_ratio/high_max": 0.0014891096298015327,
      "clip_ratio/high_mean": 0.0004943469086811092,
      "clip_ratio/low_mean": 0.0003300498495946158,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008243967622547643,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2638.0,
      "completions/mean_length": 607.8046875,
      "completions/mean_terminated_length": 572.4114990234375,
      "completions/min_length": 24.0,
      "completions/min_terminated_length": 24.0,
      "epoch": 10.04666083406241,
      "grad_norm": 0.1337890625,
      "learning_rate": 1e-06,
      "loss": 0.0037,
      "num_tokens": 625601793.0,
      "reward": 0.5446428656578064,
      "reward_std": 0.21951259672641754,
      "rewards/verify_math_reward/mean": 0.5446428656578064,
      "rewards/verify_math_reward/std": 0.4982811510562897,
      "step": 1075
    },
    {
      "clip_ratio/high_max": 0.0015768517932883697,
      "clip_ratio/high_mean": 0.0004668293256600009,
      "clip_ratio/low_mean": 0.00029906721465522423,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007658965423615882,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3007.0,
      "completions/mean_length": 581.953125,
      "completions/mean_terminated_length": 550.2950439453125,
      "completions/min_length": 117.0,
      "completions/min_terminated_length": 117.0,
      "epoch": 10.05599300087489,
      "grad_norm": 0.126953125,
      "learning_rate": 1e-06,
      "loss": 0.0108,
      "num_tokens": 626167031.0,
      "reward": 0.6261160969734192,
      "reward_std": 0.20936980843544006,
      "rewards/verify_math_reward/mean": 0.6261160969734192,
      "rewards/verify_math_reward/std": 0.48410359025001526,
      "step": 1076
    },
    {
      "clip_ratio/high_max": 0.0015254894242389128,
      "clip_ratio/high_mean": 0.00043216289623160264,
      "clip_ratio/low_mean": 0.00032624031064187875,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007584032155136811,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2434.0,
      "completions/mean_length": 606.6283569335938,
      "completions/mean_terminated_length": 563.2576293945312,
      "completions/min_length": 121.0,
      "completions/min_terminated_length": 121.0,
      "epoch": 10.065325167687373,
      "grad_norm": 0.1181640625,
      "learning_rate": 1e-06,
      "loss": -0.0124,
      "num_tokens": 626757858.0,
      "reward": 0.590401828289032,
      "reward_std": 0.17341090738773346,
      "rewards/verify_math_reward/mean": 0.5904017686843872,
      "rewards/verify_math_reward/std": 0.49203425645828247,
      "step": 1077
    },
    {
      "clip_ratio/high_max": 0.001506708684246405,
      "clip_ratio/high_mean": 0.0004400943637392629,
      "clip_ratio/low_mean": 0.00034780395139932807,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007878983233240433,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3447.0,
      "completions/mean_length": 664.6272583007812,
      "completions/mean_terminated_length": 586.2853393554688,
      "completions/min_length": 124.0,
      "completions/min_terminated_length": 124.0,
      "epoch": 10.074657334499854,
      "grad_norm": 0.12451171875,
      "learning_rate": 1e-06,
      "loss": 0.0032,
      "num_tokens": 627358924.0,
      "reward": 0.5345982313156128,
      "reward_std": 0.20534543693065643,
      "rewards/verify_math_reward/mean": 0.5345982313156128,
      "rewards/verify_math_reward/std": 0.4990801215171814,
      "step": 1078
    },
    {
      "clip_ratio/high_max": 0.0015208335544230067,
      "clip_ratio/high_mean": 0.0004815797437913716,
      "clip_ratio/low_mean": 0.00040735346692599705,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008889332175385789,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4004.0,
      "completions/mean_length": 630.859375,
      "completions/mean_terminated_length": 579.8436889648438,
      "completions/min_length": 49.0,
      "completions/min_terminated_length": 49.0,
      "epoch": 10.083989501312336,
      "grad_norm": 0.1376953125,
      "learning_rate": 1e-06,
      "loss": 0.0121,
      "num_tokens": 627973710.0,
      "reward": 0.5245535969734192,
      "reward_std": 0.2173001766204834,
      "rewards/verify_math_reward/mean": 0.5245535969734192,
      "rewards/verify_math_reward/std": 0.4996756911277771,
      "step": 1079
    },
    {
      "clip_ratio/high_max": 0.0017864221226773225,
      "clip_ratio/high_mean": 0.0006015160261085839,
      "clip_ratio/low_mean": 0.0003232312412819738,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000924747267163184,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2419.0,
      "completions/mean_length": 633.765625,
      "completions/mean_terminated_length": 578.8095092773438,
      "completions/min_length": 84.0,
      "completions/min_terminated_length": 84.0,
      "epoch": 10.093321668124817,
      "grad_norm": 0.1259765625,
      "learning_rate": 1e-06,
      "loss": -0.0112,
      "num_tokens": 628586092.0,
      "reward": 0.5345982313156128,
      "reward_std": 0.2251092493534088,
      "rewards/verify_math_reward/mean": 0.5345982313156128,
      "rewards/verify_math_reward/std": 0.4990801215171814,
      "step": 1080
    },
    {
      "clip_ratio/high_max": 0.0017837556415543077,
      "clip_ratio/high_mean": 0.000528502018596555,
      "clip_ratio/low_mean": 0.00035645686398311227,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008849588662087626,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3144.0,
      "completions/mean_length": 591.6239013671875,
      "completions/mean_terminated_length": 544.0531616210938,
      "completions/min_length": 112.0,
      "completions/min_terminated_length": 112.0,
      "epoch": 10.1026538349373,
      "grad_norm": 0.1318359375,
      "learning_rate": 1e-06,
      "loss": 0.0025,
      "num_tokens": 629159699.0,
      "reward": 0.5379464626312256,
      "reward_std": 0.21564048528671265,
      "rewards/verify_math_reward/mean": 0.5379464030265808,
      "rewards/verify_math_reward/std": 0.4988364577293396,
      "step": 1081
    },
    {
      "clip_ratio/high_max": 0.0015256862934620585,
      "clip_ratio/high_mean": 0.0004688509143306874,
      "clip_ratio/low_mean": 0.0002828350320669415,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007516859441238921,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2133.0,
      "completions/mean_length": 597.6574096679688,
      "completions/mean_terminated_length": 558.1727294921875,
      "completions/min_length": 102.0,
      "completions/min_terminated_length": 102.0,
      "epoch": 10.11198600174978,
      "grad_norm": 0.1259765625,
      "learning_rate": 1e-06,
      "loss": 0.0086,
      "num_tokens": 629749512.0,
      "reward": 0.543526828289032,
      "reward_std": 0.20354489982128143,
      "rewards/verify_math_reward/mean": 0.5435267686843872,
      "rewards/verify_math_reward/std": 0.49838000535964966,
      "step": 1082
    },
    {
      "clip_ratio/high_max": 0.0015897395605861675,
      "clip_ratio/high_mean": 0.00046778800992797187,
      "clip_ratio/low_mean": 0.00037000702150180587,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008377950489375507,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3803.0,
      "completions/mean_length": 612.1116333007812,
      "completions/mean_terminated_length": 564.8190307617188,
      "completions/min_length": 78.0,
      "completions/min_terminated_length": 78.0,
      "epoch": 10.121318168562263,
      "grad_norm": 0.130859375,
      "learning_rate": 1e-06,
      "loss": 0.0101,
      "num_tokens": 630348828.0,
      "reward": 0.5167410969734192,
      "reward_std": 0.2212817370891571,
      "rewards/verify_math_reward/mean": 0.5167410969734192,
      "rewards/verify_math_reward/std": 0.4999987483024597,
      "step": 1083
    },
    {
      "clip_ratio/high_max": 0.0015059544812174863,
      "clip_ratio/high_mean": 0.00044863606876788253,
      "clip_ratio/low_mean": 0.00035496303701165743,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008035990940697957,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2532.0,
      "completions/mean_length": 627.505615234375,
      "completions/mean_terminated_length": 584.3943481445312,
      "completions/min_length": 146.0,
      "completions/min_terminated_length": 146.0,
      "epoch": 10.130650335374744,
      "grad_norm": 0.1357421875,
      "learning_rate": 1e-06,
      "loss": 0.0205,
      "num_tokens": 630960457.0,
      "reward": 0.4743303656578064,
      "reward_std": 0.20865483582019806,
      "rewards/verify_math_reward/mean": 0.4743303656578064,
      "rewards/verify_math_reward/std": 0.4996195137500763,
      "step": 1084
    },
    {
      "clip_ratio/high_max": 0.001831831161325681,
      "clip_ratio/high_mean": 0.0004756126108986791,
      "clip_ratio/low_mean": 0.0002906310619437136,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007662436773898662,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3663.0,
      "completions/mean_length": 642.2957763671875,
      "completions/mean_terminated_length": 583.49267578125,
      "completions/min_length": 71.0,
      "completions/min_terminated_length": 71.0,
      "epoch": 10.139982502187227,
      "grad_norm": 0.11767578125,
      "learning_rate": 1e-06,
      "loss": -0.004,
      "num_tokens": 631558162.0,
      "reward": 0.551339328289032,
      "reward_std": 0.1965171843767166,
      "rewards/verify_math_reward/mean": 0.5513392686843872,
      "rewards/verify_math_reward/std": 0.4976350665092468,
      "step": 1085
    },
    {
      "clip_ratio/high_max": 0.0019993139376310864,
      "clip_ratio/high_mean": 0.0006707254733555601,
      "clip_ratio/low_mean": 0.00036152725783722417,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010322527396056103,
      "completions/clipped_ratio": 0.0078125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3234.0,
      "completions/mean_length": 549.7199096679688,
      "completions/mean_terminated_length": 521.7964477539062,
      "completions/min_length": 137.0,
      "completions/min_terminated_length": 137.0,
      "epoch": 10.149314668999708,
      "grad_norm": 0.1455078125,
      "learning_rate": 1e-06,
      "loss": -0.0098,
      "num_tokens": 632105591.0,
      "reward": 0.5892857313156128,
      "reward_std": 0.2587401866912842,
      "rewards/verify_math_reward/mean": 0.5892857313156128,
      "rewards/verify_math_reward/std": 0.49223825335502625,
      "step": 1086
    },
    {
      "clip_ratio/high_max": 0.0015812696365173906,
      "clip_ratio/high_mean": 0.00045869233645134955,
      "clip_ratio/low_mean": 0.0003219428009515468,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007806351241015363,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2300.0,
      "completions/mean_length": 580.7053833007812,
      "completions/mean_terminated_length": 532.9864501953125,
      "completions/min_length": 53.0,
      "completions/min_terminated_length": 53.0,
      "epoch": 10.15864683581219,
      "grad_norm": 0.126953125,
      "learning_rate": 1e-06,
      "loss": -0.0118,
      "num_tokens": 632671839.0,
      "reward": 0.578125,
      "reward_std": 0.1695060133934021,
      "rewards/verify_math_reward/mean": 0.578125,
      "rewards/verify_math_reward/std": 0.4941346049308777,
      "step": 1087
    },
    {
      "clip_ratio/high_max": 0.0015989276325854007,
      "clip_ratio/high_mean": 0.00047871218202999444,
      "clip_ratio/low_mean": 0.00033596853643302893,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008146807094817632,
      "completions/clipped_ratio": 0.021205357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3427.0,
      "completions/mean_length": 660.989990234375,
      "completions/mean_terminated_length": 586.5712280273438,
      "completions/min_length": 131.0,
      "completions/min_terminated_length": 131.0,
      "epoch": 10.167979002624673,
      "grad_norm": 0.130859375,
      "learning_rate": 1e-06,
      "loss": 0.0011,
      "num_tokens": 633271286.0,
      "reward": 0.5334821939468384,
      "reward_std": 0.2019711583852768,
      "rewards/verify_math_reward/mean": 0.5334821343421936,
      "rewards/verify_math_reward/std": 0.49915632605552673,
      "step": 1088
    },
    {
      "clip_ratio/high_max": 0.001786749296115886,
      "clip_ratio/high_mean": 0.0005251097479685995,
      "clip_ratio/low_mean": 0.00035115719413170154,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008762669303905568,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4039.0,
      "completions/mean_length": 701.9520263671875,
      "completions/mean_terminated_length": 624.4622802734375,
      "completions/min_length": 8.0,
      "completions/min_terminated_length": 8.0,
      "epoch": 10.177311169437154,
      "grad_norm": 0.126953125,
      "learning_rate": 1e-06,
      "loss": 0.0053,
      "num_tokens": 633909875.0,
      "reward": 0.5167410969734192,
      "reward_std": 0.2281925082206726,
      "rewards/verify_math_reward/mean": 0.5167410969734192,
      "rewards/verify_math_reward/std": 0.4999987483024597,
      "step": 1089
    },
    {
      "clip_ratio/high_max": 0.001676805290117045,
      "clip_ratio/high_mean": 0.0004657145470901014,
      "clip_ratio/low_mean": 0.00036184746477374574,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008275620139102102,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2841.0,
      "completions/mean_length": 570.3795166015625,
      "completions/mean_terminated_length": 522.5203857421875,
      "completions/min_length": 107.0,
      "completions/min_terminated_length": 107.0,
      "epoch": 10.186643336249636,
      "grad_norm": 0.1318359375,
      "learning_rate": 1e-06,
      "loss": -0.0132,
      "num_tokens": 634457207.0,
      "reward": 0.629464328289032,
      "reward_std": 0.20693056285381317,
      "rewards/verify_math_reward/mean": 0.6294642686843872,
      "rewards/verify_math_reward/std": 0.4832179844379425,
      "step": 1090
    },
    {
      "clip_ratio/high_max": 0.0014925350005796645,
      "clip_ratio/high_mean": 0.00047520343218820926,
      "clip_ratio/low_mean": 0.0003693925566494727,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008445960006611131,
      "completions/clipped_ratio": 0.021205357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3561.0,
      "completions/mean_length": 596.4241333007812,
      "completions/mean_terminated_length": 520.6066284179688,
      "completions/min_length": 142.0,
      "completions/min_terminated_length": 142.0,
      "epoch": 10.195975503062117,
      "grad_norm": 0.1337890625,
      "learning_rate": 1e-06,
      "loss": 0.0001,
      "num_tokens": 634997931.0,
      "reward": 0.625,
      "reward_std": 0.22184264659881592,
      "rewards/verify_math_reward/mean": 0.625,
      "rewards/verify_math_reward/std": 0.48439329862594604,
      "step": 1091
    },
    {
      "clip_ratio/high_max": 0.0015315620985347778,
      "clip_ratio/high_mean": 0.00043522461805878265,
      "clip_ratio/low_mean": 0.0003758994764666568,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008111240995276603,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2597.0,
      "completions/mean_length": 654.2835083007812,
      "completions/mean_terminated_length": 599.653076171875,
      "completions/min_length": 145.0,
      "completions/min_terminated_length": 145.0,
      "epoch": 10.2053076698746,
      "grad_norm": 0.1142578125,
      "learning_rate": 1e-06,
      "loss": 0.0098,
      "num_tokens": 635618721.0,
      "reward": 0.4720982313156128,
      "reward_std": 0.20272116363048553,
      "rewards/verify_math_reward/mean": 0.4720982015132904,
      "rewards/verify_math_reward/std": 0.49949970841407776,
      "step": 1092
    },
    {
      "clip_ratio/high_max": 0.001533907387056388,
      "clip_ratio/high_mean": 0.0004846340226549728,
      "clip_ratio/low_mean": 0.0003948526127715013,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008794866416792502,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3801.0,
      "completions/mean_length": 619.5390625,
      "completions/mean_terminated_length": 572.3472900390625,
      "completions/min_length": 132.0,
      "completions/min_terminated_length": 132.0,
      "epoch": 10.21463983668708,
      "grad_norm": 0.12109375,
      "learning_rate": 1e-06,
      "loss": 0.0032,
      "num_tokens": 636207868.0,
      "reward": 0.5424107313156128,
      "reward_std": 0.22097823023796082,
      "rewards/verify_math_reward/mean": 0.5424107313156128,
      "rewards/verify_math_reward/std": 0.4984763562679291,
      "step": 1093
    },
    {
      "clip_ratio/high_max": 0.0015355293689935934,
      "clip_ratio/high_mean": 0.00046355218569260614,
      "clip_ratio/low_mean": 0.00033559102257640916,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007991432094058837,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3785.0,
      "completions/mean_length": 653.614990234375,
      "completions/mean_terminated_length": 575.0216674804688,
      "completions/min_length": 3.0,
      "completions/min_terminated_length": 3.0,
      "epoch": 10.223972003499563,
      "grad_norm": 0.140625,
      "learning_rate": 1e-06,
      "loss": -0.0013,
      "num_tokens": 636808171.0,
      "reward": 0.4453125298023224,
      "reward_std": 0.21485699713230133,
      "rewards/verify_math_reward/mean": 0.4453125,
      "rewards/verify_math_reward/std": 0.4972778558731079,
      "step": 1094
    },
    {
      "clip_ratio/high_max": 0.0013949023050372489,
      "clip_ratio/high_mean": 0.0004000454498509498,
      "clip_ratio/low_mean": 0.00029239217974463827,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006924376311872038,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2472.0,
      "completions/mean_length": 616.4921875,
      "completions/mean_terminated_length": 557.249755859375,
      "completions/min_length": 34.0,
      "completions/min_terminated_length": 34.0,
      "epoch": 10.233304170312044,
      "grad_norm": 0.115234375,
      "learning_rate": 1e-06,
      "loss": 0.0144,
      "num_tokens": 637383084.0,
      "reward": 0.5301339626312256,
      "reward_std": 0.18734757602214813,
      "rewards/verify_math_reward/mean": 0.5301339030265808,
      "rewards/verify_math_reward/std": 0.49936985969543457,
      "step": 1095
    },
    {
      "clip_ratio/high_max": 0.0017311158208030974,
      "clip_ratio/high_mean": 0.0005165576781109849,
      "clip_ratio/low_mean": 0.00030333791505654517,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008198955838452093,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3716.0,
      "completions/mean_length": 619.5379638671875,
      "completions/mean_terminated_length": 580.3002319335938,
      "completions/min_length": 18.0,
      "completions/min_terminated_length": 18.0,
      "epoch": 10.242636337124527,
      "grad_norm": 0.12353515625,
      "learning_rate": 1e-06,
      "loss": -0.0014,
      "num_tokens": 637987990.0,
      "reward": 0.5479910969734192,
      "reward_std": 0.20557299256324768,
      "rewards/verify_math_reward/mean": 0.5479910969734192,
      "rewards/verify_math_reward/std": 0.49796950817108154,
      "step": 1096
    },
    {
      "clip_ratio/high_max": 0.0017144478551927023,
      "clip_ratio/high_mean": 0.0004727702116724686,
      "clip_ratio/low_mean": 0.00032949393676062755,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008022641375191597,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3412.0,
      "completions/mean_length": 608.5960083007812,
      "completions/mean_terminated_length": 545.1885986328125,
      "completions/min_length": 76.0,
      "completions/min_terminated_length": 76.0,
      "epoch": 10.251968503937007,
      "grad_norm": 0.1328125,
      "learning_rate": 1e-06,
      "loss": 0.0197,
      "num_tokens": 638554236.0,
      "reward": 0.5703125,
      "reward_std": 0.20962940156459808,
      "rewards/verify_math_reward/mean": 0.5703125,
      "rewards/verify_math_reward/std": 0.49530795216560364,
      "step": 1097
    },
    {
      "clip_ratio/high_max": 0.0018896704732469516,
      "clip_ratio/high_mean": 0.0005906688852519437,
      "clip_ratio/low_mean": 0.0003481938667846407,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009388627513544634,
      "completions/clipped_ratio": 0.021205357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3750.0,
      "completions/mean_length": 643.8225708007812,
      "completions/mean_terminated_length": 569.0319213867188,
      "completions/min_length": 20.0,
      "completions/min_terminated_length": 20.0,
      "epoch": 10.26130067074949,
      "grad_norm": 0.134765625,
      "learning_rate": 1e-06,
      "loss": 0.0023,
      "num_tokens": 639139445.0,
      "reward": 0.574776828289032,
      "reward_std": 0.2286478579044342,
      "rewards/verify_math_reward/mean": 0.5747767686843872,
      "rewards/verify_math_reward/std": 0.49465295672416687,
      "step": 1098
    },
    {
      "clip_ratio/high_max": 0.0019207091681892052,
      "clip_ratio/high_mean": 0.0006169301102545433,
      "clip_ratio/low_mean": 0.0003355052323286145,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009524353226879612,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3902.0,
      "completions/mean_length": 599.1785888671875,
      "completions/mean_terminated_length": 547.6964721679688,
      "completions/min_length": 115.0,
      "completions/min_terminated_length": 115.0,
      "epoch": 10.27063283756197,
      "grad_norm": 0.1259765625,
      "learning_rate": 1e-06,
      "loss": -0.018,
      "num_tokens": 639716381.0,
      "reward": 0.5613839626312256,
      "reward_std": 0.21350152790546417,
      "rewards/verify_math_reward/mean": 0.5613839030265808,
      "rewards/verify_math_reward/std": 0.496494859457016,
      "step": 1099
    },
    {
      "clip_ratio/high_max": 0.0016598080492258305,
      "clip_ratio/high_mean": 0.0004825434170925291,
      "clip_ratio/low_mean": 0.00027872542761997465,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007612688468725537,
      "completions/clipped_ratio": 0.0200892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4058.0,
      "completions/mean_length": 620.1328125,
      "completions/mean_terminated_length": 548.8735961914062,
      "completions/min_length": 63.0,
      "completions/min_terminated_length": 63.0,
      "epoch": 10.279965004374453,
      "grad_norm": 0.1298828125,
      "learning_rate": 1e-06,
      "loss": 0.0022,
      "num_tokens": 640299748.0,
      "reward": 0.5301339626312256,
      "reward_std": 0.19238336384296417,
      "rewards/verify_math_reward/mean": 0.5301339030265808,
      "rewards/verify_math_reward/std": 0.49936985969543457,
      "step": 1100
    },
    {
      "clip_ratio/high_max": 0.0015342075803346233,
      "clip_ratio/high_mean": 0.0004805250027857255,
      "clip_ratio/low_mean": 0.00031075992865226,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007912849309832382,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2281.0,
      "completions/mean_length": 600.6428833007812,
      "completions/mean_terminated_length": 549.1823120117188,
      "completions/min_length": 78.0,
      "completions/min_terminated_length": 78.0,
      "epoch": 10.289297171186934,
      "grad_norm": 0.125,
      "learning_rate": 1e-06,
      "loss": -0.0002,
      "num_tokens": 640874612.0,
      "reward": 0.5290178656578064,
      "reward_std": 0.19208984076976776,
      "rewards/verify_math_reward/mean": 0.5290178656578064,
      "rewards/verify_math_reward/std": 0.49943605065345764,
      "step": 1101
    },
    {
      "clip_ratio/high_max": 0.0016850085739861242,
      "clip_ratio/high_mean": 0.000539821442430366,
      "clip_ratio/low_mean": 0.0002855800453289703,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008254014892372652,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3895.0,
      "completions/mean_length": 627.1217041015625,
      "completions/mean_terminated_length": 580.0328369140625,
      "completions/min_length": 129.0,
      "completions/min_terminated_length": 129.0,
      "epoch": 10.298629337999417,
      "grad_norm": 0.1298828125,
      "learning_rate": 1e-06,
      "loss": 0.0051,
      "num_tokens": 641480009.0,
      "reward": 0.5189732313156128,
      "reward_std": 0.20583511888980865,
      "rewards/verify_math_reward/mean": 0.5189732313156128,
      "rewards/verify_math_reward/std": 0.49991893768310547,
      "step": 1102
    },
    {
      "clip_ratio/high_max": 0.00164590566419065,
      "clip_ratio/high_mean": 0.0004613075368524733,
      "clip_ratio/low_mean": 0.0003890542811859632,
      "clip_ratio/low_min": 8.540584531147033e-06,
      "clip_ratio/region_mean": 0.0008503618082613684,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3637.0,
      "completions/mean_length": 617.1473388671875,
      "completions/mean_terminated_length": 553.8954467773438,
      "completions/min_length": 161.0,
      "completions/min_terminated_length": 161.0,
      "epoch": 10.307961504811898,
      "grad_norm": 0.12353515625,
      "learning_rate": 1e-06,
      "loss": -0.0014,
      "num_tokens": 642060453.0,
      "reward": 0.5424107313156128,
      "reward_std": 0.20872969925403595,
      "rewards/verify_math_reward/mean": 0.5424107313156128,
      "rewards/verify_math_reward/std": 0.4984763264656067,
      "step": 1103
    },
    {
      "clip_ratio/high_max": 0.0013244973933979054,
      "clip_ratio/high_mean": 0.0003600586894663138,
      "clip_ratio/low_mean": 0.00028554810154446386,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006456067894760054,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3553.0,
      "completions/mean_length": 656.9788208007812,
      "completions/mean_terminated_length": 598.4256591796875,
      "completions/min_length": 113.0,
      "completions/min_terminated_length": 113.0,
      "epoch": 10.31729367162438,
      "grad_norm": 0.10693359375,
      "learning_rate": 1e-06,
      "loss": 0.0101,
      "num_tokens": 642672010.0,
      "reward": 0.5033482313156128,
      "reward_std": 0.18760831654071808,
      "rewards/verify_math_reward/mean": 0.5033482313156128,
      "rewards/verify_math_reward/std": 0.5002680420875549,
      "step": 1104
    },
    {
      "clip_ratio/high_max": 0.001573255103721749,
      "clip_ratio/high_mean": 0.0004818824679659883,
      "clip_ratio/low_mean": 0.00032442863539472455,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008063110954026342,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3687.0,
      "completions/mean_length": 645.6529541015625,
      "completions/mean_terminated_length": 590.885498046875,
      "completions/min_length": 131.0,
      "completions/min_terminated_length": 131.0,
      "epoch": 10.326625838436861,
      "grad_norm": 0.126953125,
      "learning_rate": 1e-06,
      "loss": 0.0147,
      "num_tokens": 643277859.0,
      "reward": 0.5558035969734192,
      "reward_std": 0.22920215129852295,
      "rewards/verify_math_reward/mean": 0.5558035969734192,
      "rewards/verify_math_reward/std": 0.49715372920036316,
      "step": 1105
    },
    {
      "clip_ratio/high_max": 0.0015616901346220402,
      "clip_ratio/high_mean": 0.0004561532878142316,
      "clip_ratio/low_mean": 0.00028561434965013177,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007417676533805206,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2398.0,
      "completions/mean_length": 655.6920166015625,
      "completions/mean_terminated_length": 577.1461181640625,
      "completions/min_length": 134.0,
      "completions/min_terminated_length": 134.0,
      "epoch": 10.335958005249344,
      "grad_norm": 0.11767578125,
      "learning_rate": 1e-06,
      "loss": 0.0161,
      "num_tokens": 643868215.0,
      "reward": 0.5558035969734192,
      "reward_std": 0.2031298130750656,
      "rewards/verify_math_reward/mean": 0.5558035969734192,
      "rewards/verify_math_reward/std": 0.49715372920036316,
      "step": 1106
    },
    {
      "clip_ratio/high_max": 0.0016778797153165215,
      "clip_ratio/high_mean": 0.000509412726046321,
      "clip_ratio/low_mean": 0.0002634687833733551,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007728815107839182,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1906.0,
      "completions/mean_length": 555.0067138671875,
      "completions/mean_terminated_length": 506.9389343261719,
      "completions/min_length": 11.0,
      "completions/min_terminated_length": 11.0,
      "epoch": 10.345290172061826,
      "grad_norm": 0.1279296875,
      "learning_rate": 1e-06,
      "loss": 0.0158,
      "num_tokens": 644402933.0,
      "reward": 0.5881696939468384,
      "reward_std": 0.19061599671840668,
      "rewards/verify_math_reward/mean": 0.5881696343421936,
      "rewards/verify_math_reward/std": 0.4924395978450775,
      "step": 1107
    },
    {
      "clip_ratio/high_max": 0.0017703265029922477,
      "clip_ratio/high_mean": 0.0005150289084667747,
      "clip_ratio/low_mean": 0.00039229383162364684,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009073227447515819,
      "completions/clipped_ratio": 0.0200892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2841.0,
      "completions/mean_length": 621.4542846679688,
      "completions/mean_terminated_length": 550.2221069335938,
      "completions/min_length": 86.0,
      "completions/min_terminated_length": 86.0,
      "epoch": 10.354622338874307,
      "grad_norm": 0.1357421875,
      "learning_rate": 1e-06,
      "loss": 0.0087,
      "num_tokens": 644969940.0,
      "reward": 0.535714328289032,
      "reward_std": 0.23446954786777496,
      "rewards/verify_math_reward/mean": 0.5357142686843872,
      "rewards/verify_math_reward/std": 0.4990014135837555,
      "step": 1108
    },
    {
      "clip_ratio/high_max": 0.0018161127554776613,
      "clip_ratio/high_mean": 0.0005660896570134355,
      "clip_ratio/low_mean": 0.0002896276710089296,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008557173277949914,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3597.0,
      "completions/mean_length": 570.9642944335938,
      "completions/mean_terminated_length": 535.197265625,
      "completions/min_length": 129.0,
      "completions/min_terminated_length": 129.0,
      "epoch": 10.36395450568679,
      "grad_norm": 0.126953125,
      "learning_rate": 1e-06,
      "loss": -0.0058,
      "num_tokens": 645532052.0,
      "reward": 0.59375,
      "reward_std": 0.22988361120224,
      "rewards/verify_math_reward/mean": 0.59375,
      "rewards/verify_math_reward/std": 0.4914066195487976,
      "step": 1109
    },
    {
      "clip_ratio/high_max": 0.0016176943408936495,
      "clip_ratio/high_mean": 0.00042381206822028616,
      "clip_ratio/low_mean": 0.0002469557192625871,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006707677830490866,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2356.0,
      "completions/mean_length": 620.8381958007812,
      "completions/mean_terminated_length": 557.6533813476562,
      "completions/min_length": 112.0,
      "completions/min_terminated_length": 112.0,
      "epoch": 10.37328667249927,
      "grad_norm": 0.12890625,
      "learning_rate": 1e-06,
      "loss": 0.0146,
      "num_tokens": 646107883.0,
      "reward": 0.535714328289032,
      "reward_std": 0.18388886749744415,
      "rewards/verify_math_reward/mean": 0.5357142686843872,
      "rewards/verify_math_reward/std": 0.4990014135837555,
      "step": 1110
    },
    {
      "clip_ratio/high_max": 0.0016423155229858821,
      "clip_ratio/high_mean": 0.0004701535292497283,
      "clip_ratio/low_mean": 0.0003223942969725613,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007925478212200687,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3673.0,
      "completions/mean_length": 629.552490234375,
      "completions/mean_terminated_length": 570.5323486328125,
      "completions/min_length": 142.0,
      "completions/min_terminated_length": 142.0,
      "epoch": 10.382618839311753,
      "grad_norm": 0.1259765625,
      "learning_rate": 1e-06,
      "loss": 0.0128,
      "num_tokens": 646684274.0,
      "reward": 0.5870535969734192,
      "reward_std": 0.19877281785011292,
      "rewards/verify_math_reward/mean": 0.5870535969734192,
      "rewards/verify_math_reward/std": 0.49263834953308105,
      "step": 1111
    },
    {
      "clip_ratio/high_max": 0.002033679169471725,
      "clip_ratio/high_mean": 0.0007057901575535652,
      "clip_ratio/low_mean": 0.00032785739153951,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001033647544318228,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4072.0,
      "completions/mean_length": 618.4654541015625,
      "completions/mean_terminated_length": 551.2092895507812,
      "completions/min_length": 142.0,
      "completions/min_terminated_length": 142.0,
      "epoch": 10.391951006124234,
      "grad_norm": 0.1357421875,
      "learning_rate": 1e-06,
      "loss": 0.0026,
      "num_tokens": 647254307.0,
      "reward": 0.6361607313156128,
      "reward_std": 0.23158451914787292,
      "rewards/verify_math_reward/mean": 0.6361607313156128,
      "rewards/verify_math_reward/std": 0.4813718795776367,
      "step": 1112
    },
    {
      "clip_ratio/high_max": 0.0013961403988105303,
      "clip_ratio/high_mean": 0.0003436762935962179,
      "clip_ratio/low_mean": 0.00026574405274004675,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000609420357250201,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3937.0,
      "completions/mean_length": 644.7701416015625,
      "completions/mean_terminated_length": 589.9886474609375,
      "completions/min_length": 101.0,
      "completions/min_terminated_length": 101.0,
      "epoch": 10.401283172936717,
      "grad_norm": 0.115234375,
      "learning_rate": 1e-06,
      "loss": 0.0117,
      "num_tokens": 647874429.0,
      "reward": 0.515625,
      "reward_std": 0.1490258425474167,
      "rewards/verify_math_reward/mean": 0.515625,
      "rewards/verify_math_reward/std": 0.5000349283218384,
      "step": 1113
    },
    {
      "clip_ratio/high_max": 0.001803381193894893,
      "clip_ratio/high_mean": 0.0005605701894637605,
      "clip_ratio/low_mean": 0.0003618182871605313,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009223884499078849,
      "completions/clipped_ratio": 0.006696428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2038.0,
      "completions/mean_length": 557.0870971679688,
      "completions/mean_terminated_length": 533.229248046875,
      "completions/min_length": 139.0,
      "completions/min_terminated_length": 139.0,
      "epoch": 10.410615339749198,
      "grad_norm": 0.140625,
      "learning_rate": 1e-06,
      "loss": -0.0009,
      "num_tokens": 648443371.0,
      "reward": 0.5848214626312256,
      "reward_std": 0.2209436297416687,
      "rewards/verify_math_reward/mean": 0.5848214030265808,
      "rewards/verify_math_reward/std": 0.49302801489830017,
      "step": 1114
    },
    {
      "clip_ratio/high_max": 0.0018055061482300516,
      "clip_ratio/high_mean": 0.0005517320200851827,
      "clip_ratio/low_mean": 0.00031505655988439685,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008667885795148322,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3865.0,
      "completions/mean_length": 625.1395263671875,
      "completions/mean_terminated_length": 578.0238037109375,
      "completions/min_length": 126.0,
      "completions/min_terminated_length": 126.0,
      "epoch": 10.41994750656168,
      "grad_norm": 0.11962890625,
      "learning_rate": 1e-06,
      "loss": -0.0165,
      "num_tokens": 649033640.0,
      "reward": 0.5926339626312256,
      "reward_std": 0.2077532857656479,
      "rewards/verify_math_reward/mean": 0.5926339030265808,
      "rewards/verify_math_reward/std": 0.49161845445632935,
      "step": 1115
    },
    {
      "clip_ratio/high_max": 0.0013779688324575545,
      "clip_ratio/high_mean": 0.00037272801978360803,
      "clip_ratio/low_mean": 0.00032349185505609057,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006962198685869225,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2630.0,
      "completions/mean_length": 606.9520263671875,
      "completions/mean_terminated_length": 551.5703125,
      "completions/min_length": 136.0,
      "completions/min_terminated_length": 136.0,
      "epoch": 10.429279673374161,
      "grad_norm": 0.12353515625,
      "learning_rate": 1e-06,
      "loss": -0.0106,
      "num_tokens": 649612381.0,
      "reward": 0.5011160969734192,
      "reward_std": 0.19948844611644745,
      "rewards/verify_math_reward/mean": 0.5011160969734192,
      "rewards/verify_math_reward/std": 0.5002779960632324,
      "step": 1116
    },
    {
      "clip_ratio/high_max": 0.0016746436685934896,
      "clip_ratio/high_mean": 0.0005172538121769321,
      "clip_ratio/low_mean": 0.00039144150798620103,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009086953277801513,
      "completions/clipped_ratio": 0.006696428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3014.0,
      "completions/mean_length": 567.3158569335938,
      "completions/mean_terminated_length": 543.5269775390625,
      "completions/min_length": 19.0,
      "completions/min_terminated_length": 19.0,
      "epoch": 10.438611840186644,
      "grad_norm": 0.125,
      "learning_rate": 1e-06,
      "loss": 0.0016,
      "num_tokens": 650176008.0,
      "reward": 0.5647321939468384,
      "reward_std": 0.2127426713705063,
      "rewards/verify_math_reward/mean": 0.5647321343421936,
      "rewards/verify_math_reward/std": 0.49606895446777344,
      "step": 1117
    },
    {
      "clip_ratio/high_max": 0.001711494096525712,
      "clip_ratio/high_mean": 0.0005152216519945796,
      "clip_ratio/low_mean": 0.0002970204674284105,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008122421086227405,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2356.0,
      "completions/mean_length": 598.5167846679688,
      "completions/mean_terminated_length": 567.0078735351562,
      "completions/min_length": 159.0,
      "completions/min_terminated_length": 159.0,
      "epoch": 10.447944006999125,
      "grad_norm": 0.1220703125,
      "learning_rate": 1e-06,
      "loss": 0.0038,
      "num_tokens": 650770783.0,
      "reward": 0.543526828289032,
      "reward_std": 0.2141755372285843,
      "rewards/verify_math_reward/mean": 0.5435267686843872,
      "rewards/verify_math_reward/std": 0.49838000535964966,
      "step": 1118
    },
    {
      "clip_ratio/high_max": 0.0015689976444264175,
      "clip_ratio/high_mean": 0.00046198784798434644,
      "clip_ratio/low_mean": 0.00041180884591085487,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000873796710948227,
      "completions/clipped_ratio": 0.021205357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3650.0,
      "completions/mean_length": 651.0245971679688,
      "completions/mean_terminated_length": 576.3899536132812,
      "completions/min_length": 154.0,
      "completions/min_terminated_length": 154.0,
      "epoch": 10.457276173811607,
      "grad_norm": 0.123046875,
      "learning_rate": 1e-06,
      "loss": -0.0086,
      "num_tokens": 651369429.0,
      "reward": 0.5055803656578064,
      "reward_std": 0.21568326652050018,
      "rewards/verify_math_reward/mean": 0.5055803656578064,
      "rewards/verify_math_reward/std": 0.5002480745315552,
      "step": 1119
    },
    {
      "clip_ratio/high_max": 0.0017002915546981967,
      "clip_ratio/high_mean": 0.0005153451375008444,
      "clip_ratio/low_mean": 0.00025879465385969525,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007741397971585684,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1635.0,
      "completions/mean_length": 598.7511596679688,
      "completions/mean_terminated_length": 551.2771606445312,
      "completions/min_length": 100.0,
      "completions/min_terminated_length": 100.0,
      "epoch": 10.466608340624088,
      "grad_norm": 0.12890625,
      "learning_rate": 1e-06,
      "loss": 0.0083,
      "num_tokens": 651948134.0,
      "reward": 0.6104910969734192,
      "reward_std": 0.1849854290485382,
      "rewards/verify_math_reward/mean": 0.6104910969734192,
      "rewards/verify_math_reward/std": 0.48791125416755676,
      "step": 1120
    },
    {
      "clip_ratio/high_max": 0.0015437259207828902,
      "clip_ratio/high_mean": 0.000483118055626619,
      "clip_ratio/low_mean": 0.00033506975671571126,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008181878238247009,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3398.0,
      "completions/mean_length": 609.5569458007812,
      "completions/mean_terminated_length": 574.1814575195312,
      "completions/min_length": 95.0,
      "completions/min_terminated_length": 95.0,
      "epoch": 10.47594050743657,
      "grad_norm": 0.12451171875,
      "learning_rate": 1e-06,
      "loss": 0.0118,
      "num_tokens": 652548409.0,
      "reward": 0.551339328289032,
      "reward_std": 0.19268685579299927,
      "rewards/verify_math_reward/mean": 0.5513392686843872,
      "rewards/verify_math_reward/std": 0.4976350665092468,
      "step": 1121
    },
    {
      "clip_ratio/high_max": 0.001450056854991999,
      "clip_ratio/high_mean": 0.0005254861473531491,
      "clip_ratio/low_mean": 0.00033613819505262654,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008616243471806229,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2954.0,
      "completions/mean_length": 593.6217041015625,
      "completions/mean_terminated_length": 529.9420166015625,
      "completions/min_length": 11.0,
      "completions/min_terminated_length": 11.0,
      "epoch": 10.485272674249051,
      "grad_norm": 0.130859375,
      "learning_rate": 1e-06,
      "loss": 0.014,
      "num_tokens": 653095390.0,
      "reward": 0.6272321939468384,
      "reward_std": 0.2109421044588089,
      "rewards/verify_math_reward/mean": 0.6272321343421936,
      "rewards/verify_math_reward/std": 0.4838111698627472,
      "step": 1122
    },
    {
      "clip_ratio/high_max": 0.0016598611282461206,
      "clip_ratio/high_mean": 0.0005630239687661742,
      "clip_ratio/low_mean": 0.0003776657865728339,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000940689756134816,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4016.0,
      "completions/mean_length": 627.6171875,
      "completions/mean_terminated_length": 560.5380859375,
      "completions/min_length": 13.0,
      "completions/min_terminated_length": 13.0,
      "epoch": 10.494604841061534,
      "grad_norm": 0.125,
      "learning_rate": 1e-06,
      "loss": -0.0064,
      "num_tokens": 653672199.0,
      "reward": 0.5658482313156128,
      "reward_std": 0.21845951676368713,
      "rewards/verify_math_reward/mean": 0.5658482313156128,
      "rewards/verify_math_reward/std": 0.49592188000679016,
      "step": 1123
    },
    {
      "clip_ratio/high_max": 0.0014626084221163183,
      "clip_ratio/high_mean": 0.0003792729844462883,
      "clip_ratio/low_mean": 0.00029118623911017494,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006704592176447477,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3778.0,
      "completions/mean_length": 619.7824096679688,
      "completions/mean_terminated_length": 552.5517578125,
      "completions/min_length": 140.0,
      "completions/min_terminated_length": 140.0,
      "epoch": 10.503937007874015,
      "grad_norm": 0.1279296875,
      "learning_rate": 1e-06,
      "loss": 0.0103,
      "num_tokens": 654243796.0,
      "reward": 0.5379464626312256,
      "reward_std": 0.17900507152080536,
      "rewards/verify_math_reward/mean": 0.5379464030265808,
      "rewards/verify_math_reward/std": 0.4988364577293396,
      "step": 1124
    },
    {
      "clip_ratio/high_max": 0.0016729121289245086,
      "clip_ratio/high_mean": 0.000553323600342992,
      "clip_ratio/low_mean": 0.0002776794560759299,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008310030671054847,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4066.0,
      "completions/mean_length": 582.513427734375,
      "completions/mean_terminated_length": 542.8577880859375,
      "completions/min_length": 86.0,
      "completions/min_terminated_length": 86.0,
      "epoch": 10.513269174686497,
      "grad_norm": 0.1357421875,
      "learning_rate": 1e-06,
      "loss": 0.0063,
      "num_tokens": 654804992.0,
      "reward": 0.590401828289032,
      "reward_std": 0.20272116363048553,
      "rewards/verify_math_reward/mean": 0.5904017686843872,
      "rewards/verify_math_reward/std": 0.49203425645828247,
      "step": 1125
    },
    {
      "clip_ratio/high_max": 0.0017559255848027533,
      "clip_ratio/high_mean": 0.0005251845698239777,
      "clip_ratio/low_mean": 0.00035391080314184364,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000879095366144611,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2949.0,
      "completions/mean_length": 567.1171875,
      "completions/mean_terminated_length": 523.25537109375,
      "completions/min_length": 135.0,
      "completions/min_terminated_length": 135.0,
      "epoch": 10.52260134149898,
      "grad_norm": 0.1435546875,
      "learning_rate": 1e-06,
      "loss": 0.0166,
      "num_tokens": 655373857.0,
      "reward": 0.5636160969734192,
      "reward_std": 0.2204137146472931,
      "rewards/verify_math_reward/mean": 0.5636160969734192,
      "rewards/verify_math_reward/std": 0.49621346592903137,
      "step": 1126
    },
    {
      "clip_ratio/high_max": 0.0015505834790019435,
      "clip_ratio/high_mean": 0.0004221622829163607,
      "clip_ratio/low_mean": 0.0002862095540194787,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.00070837183511685,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2665.0,
      "completions/mean_length": 560.2756958007812,
      "completions/mean_terminated_length": 516.3287963867188,
      "completions/min_length": 48.0,
      "completions/min_terminated_length": 48.0,
      "epoch": 10.531933508311461,
      "grad_norm": 0.1279296875,
      "learning_rate": 1e-06,
      "loss": -0.0052,
      "num_tokens": 655922648.0,
      "reward": 0.5848214626312256,
      "reward_std": 0.18915992975234985,
      "rewards/verify_math_reward/mean": 0.5848214030265808,
      "rewards/verify_math_reward/std": 0.49302801489830017,
      "step": 1127
    },
    {
      "clip_ratio/high_max": 0.0017083090233427356,
      "clip_ratio/high_mean": 0.0005162691450095735,
      "clip_ratio/low_mean": 0.00035800668763386057,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008742758409425733,
      "completions/clipped_ratio": 0.0200892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3790.0,
      "completions/mean_length": 585.3538208007812,
      "completions/mean_terminated_length": 513.381591796875,
      "completions/min_length": 51.0,
      "completions/min_terminated_length": 51.0,
      "epoch": 10.541265675123944,
      "grad_norm": 0.13671875,
      "learning_rate": 1e-06,
      "loss": 0.0001,
      "num_tokens": 656474885.0,
      "reward": 0.5647321939468384,
      "reward_std": 0.21237428486347198,
      "rewards/verify_math_reward/mean": 0.5647321343421936,
      "rewards/verify_math_reward/std": 0.49606892466545105,
      "step": 1128
    },
    {
      "clip_ratio/high_max": 0.0015053191436891211,
      "clip_ratio/high_mean": 0.0004785692044606549,
      "clip_ratio/low_mean": 0.0003047558732305333,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007833250847397721,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2637.0,
      "completions/mean_length": 570.0848388671875,
      "completions/mean_terminated_length": 526.2598876953125,
      "completions/min_length": 81.0,
      "completions/min_terminated_length": 81.0,
      "epoch": 10.550597841936424,
      "grad_norm": 0.12890625,
      "learning_rate": 1e-06,
      "loss": 0.0051,
      "num_tokens": 657029409.0,
      "reward": 0.546875,
      "reward_std": 0.19445420801639557,
      "rewards/verify_math_reward/mean": 0.546875,
      "rewards/verify_math_reward/std": 0.4980759024620056,
      "step": 1129
    },
    {
      "clip_ratio/high_max": 0.0017718157059789519,
      "clip_ratio/high_mean": 0.0005742348662352015,
      "clip_ratio/low_mean": 0.00030858243007969577,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008828172940411605,
      "completions/clipped_ratio": 0.006696428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3093.0,
      "completions/mean_length": 593.9442138671875,
      "completions/mean_terminated_length": 570.3348388671875,
      "completions/min_length": 82.0,
      "completions/min_terminated_length": 82.0,
      "epoch": 10.559930008748907,
      "grad_norm": 0.123046875,
      "learning_rate": 1e-06,
      "loss": -0.0005,
      "num_tokens": 657623919.0,
      "reward": 0.5970982313156128,
      "reward_std": 0.22950340807437897,
      "rewards/verify_math_reward/mean": 0.5970982313156128,
      "rewards/verify_math_reward/std": 0.4907552897930145,
      "step": 1130
    },
    {
      "clip_ratio/high_max": 0.0017404933114448795,
      "clip_ratio/high_mean": 0.0005020621056246455,
      "clip_ratio/low_mean": 0.0002497227479807407,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007517848521274573,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4015.0,
      "completions/mean_length": 604.34375,
      "completions/mean_terminated_length": 540.8590698242188,
      "completions/min_length": 114.0,
      "completions/min_terminated_length": 114.0,
      "epoch": 10.569262175561388,
      "grad_norm": 0.11865234375,
      "learning_rate": 1e-06,
      "loss": -0.0015,
      "num_tokens": 658180339.0,
      "reward": 0.5613839626312256,
      "reward_std": 0.18994230031967163,
      "rewards/verify_math_reward/mean": 0.5613839030265808,
      "rewards/verify_math_reward/std": 0.496494859457016,
      "step": 1131
    },
    {
      "clip_ratio/high_max": 0.0017064755766114104,
      "clip_ratio/high_mean": 0.0004532395195155914,
      "clip_ratio/low_mean": 0.0003917174688012892,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008449569822914782,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3412.0,
      "completions/mean_length": 652.9967041015625,
      "completions/mean_terminated_length": 606.2590942382812,
      "completions/min_length": 30.0,
      "completions/min_terminated_length": 30.0,
      "epoch": 10.57859434237387,
      "grad_norm": 0.1328125,
      "learning_rate": 1e-06,
      "loss": 0.0161,
      "num_tokens": 658810120.0,
      "reward": 0.5558035969734192,
      "reward_std": 0.21872234344482422,
      "rewards/verify_math_reward/mean": 0.5558035969734192,
      "rewards/verify_math_reward/std": 0.49715369939804077,
      "step": 1132
    },
    {
      "clip_ratio/high_max": 0.0016609421973043936,
      "clip_ratio/high_mean": 0.0004729513090069304,
      "clip_ratio/low_mean": 0.0002798828978711754,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007528341966462904,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4023.0,
      "completions/mean_length": 619.4006958007812,
      "completions/mean_terminated_length": 572.20703125,
      "completions/min_length": 74.0,
      "completions/min_terminated_length": 74.0,
      "epoch": 10.587926509186351,
      "grad_norm": 0.11962890625,
      "learning_rate": 1e-06,
      "loss": 0.0059,
      "num_tokens": 659414871.0,
      "reward": 0.574776828289032,
      "reward_std": 0.1935127079486847,
      "rewards/verify_math_reward/mean": 0.5747767686843872,
      "rewards/verify_math_reward/std": 0.49465295672416687,
      "step": 1133
    },
    {
      "clip_ratio/high_max": 0.0017192801878991304,
      "clip_ratio/high_mean": 0.0005340224756764655,
      "clip_ratio/low_mean": 0.0004510266913939631,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009850491751421941,
      "completions/clipped_ratio": 0.0200892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2074.0,
      "completions/mean_length": 628.8381958007812,
      "completions/mean_terminated_length": 557.7574462890625,
      "completions/min_length": 124.0,
      "completions/min_terminated_length": 124.0,
      "epoch": 10.597258675998834,
      "grad_norm": 0.13671875,
      "learning_rate": 1e-06,
      "loss": 0.0192,
      "num_tokens": 659994414.0,
      "reward": 0.5345982313156128,
      "reward_std": 0.23657643795013428,
      "rewards/verify_math_reward/mean": 0.5345982313156128,
      "rewards/verify_math_reward/std": 0.4990801215171814,
      "step": 1134
    },
    {
      "clip_ratio/high_max": 0.0017060625768863247,
      "clip_ratio/high_mean": 0.0004993932670913637,
      "clip_ratio/low_mean": 0.00043186435993902705,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000931257621232362,
      "completions/clipped_ratio": 0.005580357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2884.0,
      "completions/mean_length": 550.6953125,
      "completions/mean_terminated_length": 530.8002319335938,
      "completions/min_length": 123.0,
      "completions/min_terminated_length": 123.0,
      "epoch": 10.606590842811315,
      "grad_norm": 0.1416015625,
      "learning_rate": 1e-06,
      "loss": 0.0047,
      "num_tokens": 660552565.0,
      "reward": 0.6261160969734192,
      "reward_std": 0.2282680869102478,
      "rewards/verify_math_reward/mean": 0.6261160969734192,
      "rewards/verify_math_reward/std": 0.48410359025001526,
      "step": 1135
    },
    {
      "clip_ratio/high_max": 0.0017179808710352518,
      "clip_ratio/high_mean": 0.0005433138130683801,
      "clip_ratio/low_mean": 0.0003254884570651484,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008688022735441336,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2830.0,
      "completions/mean_length": 581.4598388671875,
      "completions/mean_terminated_length": 541.7923583984375,
      "completions/min_length": 109.0,
      "completions/min_terminated_length": 109.0,
      "epoch": 10.615923009623797,
      "grad_norm": 0.126953125,
      "learning_rate": 1e-06,
      "loss": 0.0065,
      "num_tokens": 661120321.0,
      "reward": 0.598214328289032,
      "reward_std": 0.20012575387954712,
      "rewards/verify_math_reward/mean": 0.5982142686843872,
      "rewards/verify_math_reward/std": 0.49053287506103516,
      "step": 1136
    },
    {
      "clip_ratio/high_max": 0.00196997782222752,
      "clip_ratio/high_mean": 0.0005800461913167965,
      "clip_ratio/low_mean": 0.0003350745846546488,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009151207609647827,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2226.0,
      "completions/mean_length": 580.7098388671875,
      "completions/mean_terminated_length": 537.0169677734375,
      "completions/min_length": 111.0,
      "completions/min_terminated_length": 111.0,
      "epoch": 10.625255176436278,
      "grad_norm": 0.130859375,
      "learning_rate": 1e-06,
      "loss": 0.0019,
      "num_tokens": 661692381.0,
      "reward": 0.551339328289032,
      "reward_std": 0.21684511005878448,
      "rewards/verify_math_reward/mean": 0.5513392686843872,
      "rewards/verify_math_reward/std": 0.4976350665092468,
      "step": 1137
    },
    {
      "clip_ratio/high_max": 0.0016381486402679002,
      "clip_ratio/high_mean": 0.0005049274443535978,
      "clip_ratio/low_mean": 0.00031980725100311247,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008247346995631233,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3929.0,
      "completions/mean_length": 602.8080444335938,
      "completions/mean_terminated_length": 547.3605346679688,
      "completions/min_length": 76.0,
      "completions/min_terminated_length": 76.0,
      "epoch": 10.63458734324876,
      "grad_norm": 0.1279296875,
      "learning_rate": 1e-06,
      "loss": -0.0045,
      "num_tokens": 662267089.0,
      "reward": 0.5848214626312256,
      "reward_std": 0.22112908959388733,
      "rewards/verify_math_reward/mean": 0.5848214030265808,
      "rewards/verify_math_reward/std": 0.49302801489830017,
      "step": 1138
    },
    {
      "clip_ratio/high_max": 0.001646062865802378,
      "clip_ratio/high_mean": 0.0004756888117753988,
      "clip_ratio/low_mean": 0.00031733140269807336,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000793020223682106,
      "completions/clipped_ratio": 0.0033482142857143016,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2243.0,
      "completions/mean_length": 551.5379638671875,
      "completions/mean_terminated_length": 539.6304931640625,
      "completions/min_length": 114.0,
      "completions/min_terminated_length": 114.0,
      "epoch": 10.643919510061242,
      "grad_norm": 0.12451171875,
      "learning_rate": 1e-06,
      "loss": 0.0279,
      "num_tokens": 662826115.0,
      "reward": 0.5714285969734192,
      "reward_std": 0.21428248286247253,
      "rewards/verify_math_reward/mean": 0.5714285969734192,
      "rewards/verify_math_reward/std": 0.49514803290367126,
      "step": 1139
    },
    {
      "clip_ratio/high_max": 0.0018040633385680849,
      "clip_ratio/high_mean": 0.0006041449305485003,
      "clip_ratio/low_mean": 0.00030515388198182336,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009092988229895127,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2160.0,
      "completions/mean_length": 631.4319458007812,
      "completions/mean_terminated_length": 580.4246826171875,
      "completions/min_length": 91.0,
      "completions/min_terminated_length": 91.0,
      "epoch": 10.653251676873724,
      "grad_norm": 0.1376953125,
      "learning_rate": 1e-06,
      "loss": -0.0055,
      "num_tokens": 663437206.0,
      "reward": 0.535714328289032,
      "reward_std": 0.23401953279972076,
      "rewards/verify_math_reward/mean": 0.5357142686843872,
      "rewards/verify_math_reward/std": 0.4990014135837555,
      "step": 1140
    },
    {
      "clip_ratio/high_max": 0.0017425021833332721,
      "clip_ratio/high_mean": 0.0005442006226985541,
      "clip_ratio/low_mean": 0.0003284790451516528,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008726796668270254,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4041.0,
      "completions/mean_length": 656.2589721679688,
      "completions/mean_terminated_length": 593.7181396484375,
      "completions/min_length": 97.0,
      "completions/min_terminated_length": 97.0,
      "epoch": 10.662583843686207,
      "grad_norm": 0.1318359375,
      "learning_rate": 1e-06,
      "loss": 0.0078,
      "num_tokens": 664043614.0,
      "reward": 0.527901828289032,
      "reward_std": 0.2386789470911026,
      "rewards/verify_math_reward/mean": 0.5279017686843872,
      "rewards/verify_math_reward/std": 0.49949970841407776,
      "step": 1141
    },
    {
      "clip_ratio/high_max": 0.0021527270846490865,
      "clip_ratio/high_mean": 0.0006683511567189271,
      "clip_ratio/low_mean": 0.00030635680445811886,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009747079675435089,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3337.0,
      "completions/mean_length": 608.411865234375,
      "completions/mean_terminated_length": 561.0690307617188,
      "completions/min_length": 161.0,
      "completions/min_terminated_length": 161.0,
      "epoch": 10.671916010498688,
      "grad_norm": 0.126953125,
      "learning_rate": 1e-06,
      "loss": -0.0025,
      "num_tokens": 664634191.0,
      "reward": 0.606026828289032,
      "reward_std": 0.23033711314201355,
      "rewards/verify_math_reward/mean": 0.6060267686843872,
      "rewards/verify_math_reward/std": 0.48890194296836853,
      "step": 1142
    },
    {
      "clip_ratio/high_max": 0.001847786519647343,
      "clip_ratio/high_mean": 0.0005114170812703378,
      "clip_ratio/low_mean": 0.00032902276154800347,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000840439839521423,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2231.0,
      "completions/mean_length": 599.5949096679688,
      "completions/mean_terminated_length": 536.0238647460938,
      "completions/min_length": 108.0,
      "completions/min_terminated_length": 108.0,
      "epoch": 10.68124817731117,
      "grad_norm": 0.1240234375,
      "learning_rate": 1e-06,
      "loss": 0.0136,
      "num_tokens": 665204764.0,
      "reward": 0.566964328289032,
      "reward_std": 0.18960639834403992,
      "rewards/verify_math_reward/mean": 0.5669642686843872,
      "rewards/verify_math_reward/std": 0.49577224254608154,
      "step": 1143
    },
    {
      "clip_ratio/high_max": 0.0016712147426005686,
      "clip_ratio/high_mean": 0.0005301716157646297,
      "clip_ratio/low_mean": 0.000316820767579884,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008469923868688056,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2783.0,
      "completions/mean_length": 592.669677734375,
      "completions/mean_terminated_length": 553.128662109375,
      "completions/min_length": 135.0,
      "completions/min_terminated_length": 135.0,
      "epoch": 10.690580344123651,
      "grad_norm": 0.125,
      "learning_rate": 1e-06,
      "loss": 0.0049,
      "num_tokens": 665784492.0,
      "reward": 0.5290178656578064,
      "reward_std": 0.212375670671463,
      "rewards/verify_math_reward/mean": 0.5290178656578064,
      "rewards/verify_math_reward/std": 0.49943605065345764,
      "step": 1144
    },
    {
      "clip_ratio/high_max": 0.0014545907670253655,
      "clip_ratio/high_mean": 0.0003818193686129234,
      "clip_ratio/low_mean": 0.00033153243975903024,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007133518174669007,
      "completions/clipped_ratio": 0.021205357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3150.0,
      "completions/mean_length": 611.1105346679688,
      "completions/mean_terminated_length": 535.6111450195312,
      "completions/min_length": 4.0,
      "completions/min_terminated_length": 4.0,
      "epoch": 10.699912510936134,
      "grad_norm": 0.12109375,
      "learning_rate": 1e-06,
      "loss": -0.0174,
      "num_tokens": 666343279.0,
      "reward": 0.5245535969734192,
      "reward_std": 0.19050683081150055,
      "rewards/verify_math_reward/mean": 0.5245535969734192,
      "rewards/verify_math_reward/std": 0.4996756613254547,
      "step": 1145
    },
    {
      "clip_ratio/high_max": 0.001637159796700871,
      "clip_ratio/high_mean": 0.0005205864244999248,
      "clip_ratio/low_mean": 0.0003697998142797587,
      "clip_ratio/low_min": 1.2084299669368193e-05,
      "clip_ratio/region_mean": 0.0008903862335500889,
      "completions/clipped_ratio": 0.0078125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3940.0,
      "completions/mean_length": 592.7098388671875,
      "completions/mean_terminated_length": 565.1248779296875,
      "completions/min_length": 127.0,
      "completions/min_terminated_length": 127.0,
      "epoch": 10.709244677748615,
      "grad_norm": 0.1337890625,
      "learning_rate": 1e-06,
      "loss": 0.0004,
      "num_tokens": 666927371.0,
      "reward": 0.53125,
      "reward_std": 0.2197069227695465,
      "rewards/verify_math_reward/mean": 0.53125,
      "rewards/verify_math_reward/std": 0.4993011951446533,
      "step": 1146
    },
    {
      "clip_ratio/high_max": 0.001656133903452428,
      "clip_ratio/high_mean": 0.0004848967168982199,
      "clip_ratio/low_mean": 0.0003100814160461596,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007949781238494324,
      "completions/clipped_ratio": 0.0200892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3398.0,
      "completions/mean_length": 639.5792846679688,
      "completions/mean_terminated_length": 568.7186889648438,
      "completions/min_length": 89.0,
      "completions/min_terminated_length": 89.0,
      "epoch": 10.718576844561097,
      "grad_norm": 0.125,
      "learning_rate": 1e-06,
      "loss": 0.0054,
      "num_tokens": 667516386.0,
      "reward": 0.5959821939468384,
      "reward_std": 0.2043369561433792,
      "rewards/verify_math_reward/mean": 0.5959821343421936,
      "rewards/verify_math_reward/std": 0.490975022315979,
      "step": 1147
    },
    {
      "clip_ratio/high_max": 0.0013927572299508029,
      "clip_ratio/high_mean": 0.00039721748328247486,
      "clip_ratio/low_mean": 0.0003871739263558993,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007843914254408446,
      "completions/clipped_ratio": 0.006696428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3564.0,
      "completions/mean_length": 604.599365234375,
      "completions/mean_terminated_length": 581.0618286132812,
      "completions/min_length": 95.0,
      "completions/min_terminated_length": 95.0,
      "epoch": 10.727909011373578,
      "grad_norm": 0.1376953125,
      "learning_rate": 1e-06,
      "loss": 0.0058,
      "num_tokens": 668119219.0,
      "reward": 0.5368303656578064,
      "reward_std": 0.2093709260225296,
      "rewards/verify_math_reward/mean": 0.5368303656578064,
      "rewards/verify_math_reward/std": 0.49892017245292664,
      "step": 1148
    },
    {
      "clip_ratio/high_max": 0.0015744132088002516,
      "clip_ratio/high_mean": 0.0004731255261276601,
      "clip_ratio/low_mean": 0.0003359877121056343,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008091132385743549,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3999.0,
      "completions/mean_length": 622.997802734375,
      "completions/mean_terminated_length": 563.8660888671875,
      "completions/min_length": 123.0,
      "completions/min_terminated_length": 123.0,
      "epoch": 10.73724117818606,
      "grad_norm": 0.12060546875,
      "learning_rate": 1e-06,
      "loss": -0.0003,
      "num_tokens": 668701513.0,
      "reward": 0.5803571939468384,
      "reward_std": 0.20546743273735046,
      "rewards/verify_math_reward/mean": 0.5803571343421936,
      "rewards/verify_math_reward/std": 0.4937761127948761,
      "step": 1149
    },
    {
      "clip_ratio/high_max": 0.0016613937586953398,
      "clip_ratio/high_mean": 0.00046575758778999443,
      "clip_ratio/low_mean": 0.0003909868725600063,
      "clip_ratio/low_min": 1.093804712581914e-05,
      "clip_ratio/region_mean": 0.0008567444722302753,
      "completions/clipped_ratio": 0.0234375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3467.0,
      "completions/mean_length": 667.140625,
      "completions/mean_terminated_length": 584.8480224609375,
      "completions/min_length": 137.0,
      "completions/min_terminated_length": 137.0,
      "epoch": 10.746573344998541,
      "grad_norm": 0.13671875,
      "learning_rate": 1e-06,
      "loss": 0.0025,
      "num_tokens": 669304535.0,
      "reward": 0.5066964626312256,
      "reward_std": 0.24660933017730713,
      "rewards/verify_math_reward/mean": 0.5066964030265808,
      "rewards/verify_math_reward/std": 0.5002344250679016,
      "step": 1150
    },
    {
      "clip_ratio/high_max": 0.001383524305310857,
      "clip_ratio/high_mean": 0.00037995680236235785,
      "clip_ratio/low_mean": 0.0004186753594694892,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007986321697899257,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2696.0,
      "completions/mean_length": 627.8984375,
      "completions/mean_terminated_length": 568.8502197265625,
      "completions/min_length": 95.0,
      "completions/min_terminated_length": 95.0,
      "epoch": 10.755905511811024,
      "grad_norm": 0.1298828125,
      "learning_rate": 1e-06,
      "loss": 0.0126,
      "num_tokens": 669896212.0,
      "reward": 0.494419664144516,
      "reward_std": 0.20888124406337738,
      "rewards/verify_math_reward/mean": 0.4944196343421936,
      "rewards/verify_math_reward/std": 0.5002480745315552,
      "step": 1151
    },
    {
      "clip_ratio/high_max": 0.0018027637515842798,
      "clip_ratio/high_mean": 0.0005304419826188678,
      "clip_ratio/low_mean": 0.0002956257392270345,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008260677177531761,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2323.0,
      "completions/mean_length": 621.5357666015625,
      "completions/mean_terminated_length": 566.385498046875,
      "completions/min_length": 48.0,
      "completions/min_terminated_length": 48.0,
      "epoch": 10.765237678623505,
      "grad_norm": 0.12109375,
      "learning_rate": 1e-06,
      "loss": 0.0094,
      "num_tokens": 670489684.0,
      "reward": 0.5189732313156128,
      "reward_std": 0.23788337409496307,
      "rewards/verify_math_reward/mean": 0.5189732313156128,
      "rewards/verify_math_reward/std": 0.49991893768310547,
      "step": 1152
    },
    {
      "clip_ratio/high_max": 0.0015434565884788753,
      "clip_ratio/high_mean": 0.0005081853137198777,
      "clip_ratio/low_mean": 0.00028249833917470824,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007906836594884226,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3328.0,
      "completions/mean_length": 587.1674194335938,
      "completions/mean_terminated_length": 535.5084838867188,
      "completions/min_length": 104.0,
      "completions/min_terminated_length": 104.0,
      "epoch": 10.774569845435988,
      "grad_norm": 0.12890625,
      "learning_rate": 1e-06,
      "loss": -0.0119,
      "num_tokens": 671053954.0,
      "reward": 0.6104910969734192,
      "reward_std": 0.19287051260471344,
      "rewards/verify_math_reward/mean": 0.6104910969734192,
      "rewards/verify_math_reward/std": 0.48791128396987915,
      "step": 1153
    },
    {
      "clip_ratio/high_max": 0.001792408020264702,
      "clip_ratio/high_mean": 0.0005367111339182884,
      "clip_ratio/low_mean": 0.00033960035409563716,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008763114865359967,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3270.0,
      "completions/mean_length": 610.8515625,
      "completions/mean_terminated_length": 555.53173828125,
      "completions/min_length": 11.0,
      "completions/min_terminated_length": 11.0,
      "epoch": 10.783902012248468,
      "grad_norm": 0.130859375,
      "learning_rate": 1e-06,
      "loss": -0.0016,
      "num_tokens": 671632333.0,
      "reward": 0.5959821939468384,
      "reward_std": 0.20816698670387268,
      "rewards/verify_math_reward/mean": 0.5959821343421936,
      "rewards/verify_math_reward/std": 0.490975022315979,
      "step": 1154
    },
    {
      "clip_ratio/high_max": 0.0015771444259371492,
      "clip_ratio/high_mean": 0.000503479648841676,
      "clip_ratio/low_mean": 0.00034037283899124304,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008438524901066558,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4056.0,
      "completions/mean_length": 597.609375,
      "completions/mean_terminated_length": 558.1241455078125,
      "completions/min_length": 131.0,
      "completions/min_terminated_length": 131.0,
      "epoch": 10.793234179060951,
      "grad_norm": 0.1279296875,
      "learning_rate": 1e-06,
      "loss": 0.021,
      "num_tokens": 672214135.0,
      "reward": 0.5178571939468384,
      "reward_std": 0.2090253233909607,
      "rewards/verify_math_reward/mean": 0.5178571343421936,
      "rewards/verify_math_reward/std": 0.4999600946903229,
      "step": 1155
    },
    {
      "clip_ratio/high_max": 0.0013750391290159314,
      "clip_ratio/high_mean": 0.00043364562338865653,
      "clip_ratio/low_mean": 0.0004127840682031092,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008464296993224707,
      "completions/clipped_ratio": 0.0234375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1945.0,
      "completions/mean_length": 645.0100708007812,
      "completions/mean_terminated_length": 562.186279296875,
      "completions/min_length": 124.0,
      "completions/min_terminated_length": 124.0,
      "epoch": 10.802566345873432,
      "grad_norm": 0.12255859375,
      "learning_rate": 1e-06,
      "loss": 0.0027,
      "num_tokens": 672796912.0,
      "reward": 0.5446428656578064,
      "reward_std": 0.2171047180891037,
      "rewards/verify_math_reward/mean": 0.5446428656578064,
      "rewards/verify_math_reward/std": 0.4982811510562897,
      "step": 1156
    },
    {
      "clip_ratio/high_max": 0.0015667758489144035,
      "clip_ratio/high_mean": 0.0004141048735846198,
      "clip_ratio/low_mean": 0.0003231997775401396,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007373046614702616,
      "completions/clipped_ratio": 0.0200892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4047.0,
      "completions/mean_length": 631.8392944335938,
      "completions/mean_terminated_length": 560.820068359375,
      "completions/min_length": 145.0,
      "completions/min_terminated_length": 145.0,
      "epoch": 10.811898512685914,
      "grad_norm": 0.12060546875,
      "learning_rate": 1e-06,
      "loss": -0.0021,
      "num_tokens": 673387832.0,
      "reward": 0.5457589626312256,
      "reward_std": 0.209066703915596,
      "rewards/verify_math_reward/mean": 0.5457589030265808,
      "rewards/verify_math_reward/std": 0.4981798231601715,
      "step": 1157
    },
    {
      "clip_ratio/high_max": 0.0016655336548865307,
      "clip_ratio/high_mean": 0.0005172209932879923,
      "clip_ratio/low_mean": 0.00022447497963185015,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007416959742840845,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2078.0,
      "completions/mean_length": 604.8616333007812,
      "completions/mean_terminated_length": 549.4467163085938,
      "completions/min_length": 129.0,
      "completions/min_terminated_length": 129.0,
      "epoch": 10.821230679498395,
      "grad_norm": 0.1259765625,
      "learning_rate": 1e-06,
      "loss": 0.0023,
      "num_tokens": 673962228.0,
      "reward": 0.5602678656578064,
      "reward_std": 0.19982656836509705,
      "rewards/verify_math_reward/mean": 0.5602678656578064,
      "rewards/verify_math_reward/std": 0.4966317117214203,
      "step": 1158
    },
    {
      "clip_ratio/high_max": 0.001596397483808687,
      "clip_ratio/high_mean": 0.0004775468332809396,
      "clip_ratio/low_mean": 0.00030511208478856133,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007826589203432377,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3767.0,
      "completions/mean_length": 566.3114013671875,
      "completions/mean_terminated_length": 498.046630859375,
      "completions/min_length": 25.0,
      "completions/min_terminated_length": 25.0,
      "epoch": 10.830562846310878,
      "grad_norm": 0.1279296875,
      "learning_rate": 1e-06,
      "loss": -0.0051,
      "num_tokens": 674485163.0,
      "reward": 0.6428571939468384,
      "reward_std": 0.20324109494686127,
      "rewards/verify_math_reward/mean": 0.6428571343421936,
      "rewards/verify_math_reward/std": 0.4794250428676605,
      "step": 1159
    },
    {
      "clip_ratio/high_max": 0.0016447261969005922,
      "clip_ratio/high_mean": 0.000513678448214705,
      "clip_ratio/low_mean": 0.00041539912967891723,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009290775929002848,
      "completions/clipped_ratio": 0.0078125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2496.0,
      "completions/mean_length": 593.7734375,
      "completions/mean_terminated_length": 566.1968994140625,
      "completions/min_length": 96.0,
      "completions/min_terminated_length": 96.0,
      "epoch": 10.83989501312336,
      "grad_norm": 0.12060546875,
      "learning_rate": 1e-06,
      "loss": 0.0168,
      "num_tokens": 675082320.0,
      "reward": 0.5323660969734192,
      "reward_std": 0.20996825397014618,
      "rewards/verify_math_reward/mean": 0.5323660969734192,
      "rewards/verify_math_reward/std": 0.4992299973964691,
      "step": 1160
    },
    {
      "clip_ratio/high_max": 0.0016843953844727366,
      "clip_ratio/high_mean": 0.00046381006325191265,
      "clip_ratio/low_mean": 0.00033338541311422887,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007971954810273019,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2907.0,
      "completions/mean_length": 604.5167846679688,
      "completions/mean_terminated_length": 553.1132202148438,
      "completions/min_length": 129.0,
      "completions/min_terminated_length": 129.0,
      "epoch": 10.849227179935841,
      "grad_norm": 0.1259765625,
      "learning_rate": 1e-06,
      "loss": 0.0073,
      "num_tokens": 675652767.0,
      "reward": 0.5524553656578064,
      "reward_std": 0.20682109892368317,
      "rewards/verify_math_reward/mean": 0.5524553656578064,
      "rewards/verify_math_reward/std": 0.49751853942871094,
      "step": 1161
    },
    {
      "clip_ratio/high_max": 0.00183062973064807,
      "clip_ratio/high_mean": 0.0005302630440837675,
      "clip_ratio/low_mean": 0.00037720782142969256,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009074708586922497,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2846.0,
      "completions/mean_length": 610.3828125,
      "completions/mean_terminated_length": 567.0587768554688,
      "completions/min_length": 174.0,
      "completions/min_terminated_length": 174.0,
      "epoch": 10.858559346748324,
      "grad_norm": 0.1337890625,
      "learning_rate": 1e-06,
      "loss": 0.0121,
      "num_tokens": 676242238.0,
      "reward": 0.5055803656578064,
      "reward_std": 0.2456229329109192,
      "rewards/verify_math_reward/mean": 0.5055803656578064,
      "rewards/verify_math_reward/std": 0.5002480745315552,
      "step": 1162
    },
    {
      "clip_ratio/high_max": 0.0014446050290644052,
      "clip_ratio/high_mean": 0.0004043962151172309,
      "clip_ratio/low_mean": 0.00040507040330339805,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008094666127362871,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2527.0,
      "completions/mean_length": 564.3605346679688,
      "completions/mean_terminated_length": 528.5264892578125,
      "completions/min_length": 24.0,
      "completions/min_terminated_length": 24.0,
      "epoch": 10.867891513560805,
      "grad_norm": 0.1240234375,
      "learning_rate": 1e-06,
      "loss": -0.0006,
      "num_tokens": 676800721.0,
      "reward": 0.578125,
      "reward_std": 0.21327723562717438,
      "rewards/verify_math_reward/mean": 0.578125,
      "rewards/verify_math_reward/std": 0.4941346049308777,
      "step": 1163
    },
    {
      "clip_ratio/high_max": 0.0011761088244384155,
      "clip_ratio/high_mean": 0.0003668110516628076,
      "clip_ratio/low_mean": 0.00022661833645543084,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0005934293808422808,
      "completions/clipped_ratio": 0.021205357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3396.0,
      "completions/mean_length": 657.1629638671875,
      "completions/mean_terminated_length": 582.6613159179688,
      "completions/min_length": 18.0,
      "completions/min_terminated_length": 18.0,
      "epoch": 10.877223680373287,
      "grad_norm": 0.11328125,
      "learning_rate": 1e-06,
      "loss": 0.023,
      "num_tokens": 677394051.0,
      "reward": 0.5390625,
      "reward_std": 0.16183525323867798,
      "rewards/verify_math_reward/mean": 0.5390625,
      "rewards/verify_math_reward/std": 0.4987502098083496,
      "step": 1164
    },
    {
      "clip_ratio/high_max": 0.001815230340071139,
      "clip_ratio/high_mean": 0.0006076487995869684,
      "clip_ratio/low_mean": 0.000338437375603462,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009460861729166936,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4053.0,
      "completions/mean_length": 628.4989013671875,
      "completions/mean_terminated_length": 561.4368286132812,
      "completions/min_length": 101.0,
      "completions/min_terminated_length": 101.0,
      "epoch": 10.886555847185768,
      "grad_norm": 0.12890625,
      "learning_rate": 1e-06,
      "loss": 0.0132,
      "num_tokens": 677972770.0,
      "reward": 0.6037946939468384,
      "reward_std": 0.22105281054973602,
      "rewards/verify_math_reward/mean": 0.6037946343421936,
      "rewards/verify_math_reward/std": 0.48938119411468506,
      "step": 1165
    },
    {
      "clip_ratio/high_max": 0.0016446989338874118,
      "clip_ratio/high_mean": 0.0005324632415977248,
      "clip_ratio/low_mean": 0.0003466120699613384,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008790752990535111,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2077.0,
      "completions/mean_length": 602.2701416015625,
      "completions/mean_terminated_length": 562.8374633789062,
      "completions/min_length": 145.0,
      "completions/min_terminated_length": 145.0,
      "epoch": 10.89588801399825,
      "grad_norm": 0.1357421875,
      "learning_rate": 1e-06,
      "loss": 0.0098,
      "num_tokens": 678559916.0,
      "reward": 0.5814732313156128,
      "reward_std": 0.23191125690937042,
      "rewards/verify_math_reward/mean": 0.5814732313156128,
      "rewards/verify_math_reward/std": 0.4935929775238037,
      "step": 1166
    },
    {
      "clip_ratio/high_max": 0.001686195442744065,
      "clip_ratio/high_mean": 0.0005248477732493484,
      "clip_ratio/low_mean": 0.00036475858155426977,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008896063582142233,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3829.0,
      "completions/mean_length": 584.396240234375,
      "completions/mean_terminated_length": 552.7601318359375,
      "completions/min_length": 139.0,
      "completions/min_terminated_length": 139.0,
      "epoch": 10.905220180810732,
      "grad_norm": 0.1328125,
      "learning_rate": 1e-06,
      "loss": 0.0041,
      "num_tokens": 679138615.0,
      "reward": 0.606026828289032,
      "reward_std": 0.23090235888957977,
      "rewards/verify_math_reward/mean": 0.6060267686843872,
      "rewards/verify_math_reward/std": 0.48890194296836853,
      "step": 1167
    },
    {
      "clip_ratio/high_max": 0.0018442560667608632,
      "clip_ratio/high_mean": 0.0004919110244827607,
      "clip_ratio/low_mean": 0.00031940933422447415,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008113203757602605,
      "completions/clipped_ratio": 0.006696428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2050.0,
      "completions/mean_length": 531.2098388671875,
      "completions/mean_terminated_length": 507.1775207519531,
      "completions/min_length": 167.0,
      "completions/min_terminated_length": 167.0,
      "epoch": 10.914552347623214,
      "grad_norm": 0.125,
      "learning_rate": 1e-06,
      "loss": 0.0134,
      "num_tokens": 679681755.0,
      "reward": 0.5970982313156128,
      "reward_std": 0.18182942271232605,
      "rewards/verify_math_reward/mean": 0.5970982313156128,
      "rewards/verify_math_reward/std": 0.49075523018836975,
      "step": 1168
    },
    {
      "clip_ratio/high_max": 0.0016073361293820199,
      "clip_ratio/high_mean": 0.0005071012240023265,
      "clip_ratio/low_mean": 0.00041626721326792904,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009233684304490453,
      "completions/clipped_ratio": 0.021205357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3944.0,
      "completions/mean_length": 698.6674194335938,
      "completions/mean_terminated_length": 625.0650024414062,
      "completions/min_length": 117.0,
      "completions/min_terminated_length": 117.0,
      "epoch": 10.923884514435695,
      "grad_norm": 0.1240234375,
      "learning_rate": 1e-06,
      "loss": -0.0065,
      "num_tokens": 680316033.0,
      "reward": 0.5033482313156128,
      "reward_std": 0.2425856590270996,
      "rewards/verify_math_reward/mean": 0.5033482313156128,
      "rewards/verify_math_reward/std": 0.5002680420875549,
      "step": 1169
    },
    {
      "clip_ratio/high_max": 0.001973909056687262,
      "clip_ratio/high_mean": 0.0006501263105747057,
      "clip_ratio/low_mean": 0.0004083588496541779,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010584851579551469,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2576.0,
      "completions/mean_length": 581.9810791015625,
      "completions/mean_terminated_length": 526.2029418945312,
      "completions/min_length": 96.0,
      "completions/min_terminated_length": 96.0,
      "epoch": 10.933216681248178,
      "grad_norm": 0.146484375,
      "learning_rate": 1e-06,
      "loss": 0.0013,
      "num_tokens": 680871392.0,
      "reward": 0.5613839626312256,
      "reward_std": 0.24243342876434326,
      "rewards/verify_math_reward/mean": 0.5613839030265808,
      "rewards/verify_math_reward/std": 0.496494859457016,
      "step": 1170
    },
    {
      "clip_ratio/high_max": 0.0014305145341495518,
      "clip_ratio/high_mean": 0.00043259119070171437,
      "clip_ratio/low_mean": 0.00030472648995782947,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007373176758846967,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3810.0,
      "completions/mean_length": 625.8984375,
      "completions/mean_terminated_length": 570.8174438476562,
      "completions/min_length": 101.0,
      "completions/min_terminated_length": 101.0,
      "epoch": 10.942548848060659,
      "grad_norm": 0.1259765625,
      "learning_rate": 1e-06,
      "loss": 0.0028,
      "num_tokens": 681471277.0,
      "reward": 0.4799107313156128,
      "reward_std": 0.18908463418483734,
      "rewards/verify_math_reward/mean": 0.4799107015132904,
      "rewards/verify_math_reward/std": 0.4998753070831299,
      "step": 1171
    },
    {
      "clip_ratio/high_max": 0.0014520787253786693,
      "clip_ratio/high_mean": 0.00040996120560521376,
      "clip_ratio/low_mean": 0.0002406684872084952,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000650629697702243,
      "completions/clipped_ratio": 0.0200892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3729.0,
      "completions/mean_length": 685.1473388671875,
      "completions/mean_terminated_length": 615.2210083007812,
      "completions/min_length": 145.0,
      "completions/min_terminated_length": 145.0,
      "epoch": 10.951881014873141,
      "grad_norm": 0.1083984375,
      "learning_rate": 1e-06,
      "loss": 0.0046,
      "num_tokens": 682101825.0,
      "reward": 0.5234375,
      "reward_std": 0.17630618810653687,
      "rewards/verify_math_reward/mean": 0.5234375,
      "rewards/verify_math_reward/std": 0.49972933530807495,
      "step": 1172
    },
    {
      "clip_ratio/high_max": 0.0014668763142253738,
      "clip_ratio/high_mean": 0.0003770719254134747,
      "clip_ratio/low_mean": 0.00043069329888112406,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008077652282736381,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3896.0,
      "completions/mean_length": 678.1830444335938,
      "completions/mean_terminated_length": 623.9320068359375,
      "completions/min_length": 5.0,
      "completions/min_terminated_length": 5.0,
      "epoch": 10.961213181685622,
      "grad_norm": 0.12109375,
      "learning_rate": 1e-06,
      "loss": 0.016,
      "num_tokens": 682733661.0,
      "reward": 0.5100446939468384,
      "reward_std": 0.2220708727836609,
      "rewards/verify_math_reward/mean": 0.5100446343421936,
      "rewards/verify_math_reward/std": 0.5001782774925232,
      "step": 1173
    },
    {
      "clip_ratio/high_max": 0.0015102527440831182,
      "clip_ratio/high_mean": 0.00045779168567605666,
      "clip_ratio/low_mean": 0.00034387840389626945,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008016700894586393,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3937.0,
      "completions/mean_length": 633.3449096679688,
      "completions/mean_terminated_length": 598.2108154296875,
      "completions/min_length": 145.0,
      "completions/min_terminated_length": 145.0,
      "epoch": 10.970545348498105,
      "grad_norm": 0.12353515625,
      "learning_rate": 1e-06,
      "loss": -0.0034,
      "num_tokens": 683349690.0,
      "reward": 0.5100446939468384,
      "reward_std": 0.21722276508808136,
      "rewards/verify_math_reward/mean": 0.5100446343421936,
      "rewards/verify_math_reward/std": 0.5001782774925232,
      "step": 1174
    },
    {
      "clip_ratio/high_max": 0.0015930744812067132,
      "clip_ratio/high_mean": 0.0005445129036161234,
      "clip_ratio/low_mean": 0.0002570602872538075,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008015732037165435,
      "completions/clipped_ratio": 0.0078125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2080.0,
      "completions/mean_length": 567.5614013671875,
      "completions/mean_terminated_length": 539.7784423828125,
      "completions/min_length": 3.0,
      "completions/min_terminated_length": 3.0,
      "epoch": 10.979877515310585,
      "grad_norm": 0.1337890625,
      "learning_rate": 1e-06,
      "loss": -0.0077,
      "num_tokens": 683922553.0,
      "reward": 0.6004464626312256,
      "reward_std": 0.2145892083644867,
      "rewards/verify_math_reward/mean": 0.6004464030265808,
      "rewards/verify_math_reward/std": 0.49008017778396606,
      "step": 1175
    },
    {
      "clip_ratio/high_max": 0.00159230694953294,
      "clip_ratio/high_mean": 0.0004766566730722843,
      "clip_ratio/low_mean": 0.00037552160347331665,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008521782829120639,
      "completions/clipped_ratio": 0.029017857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3517.0,
      "completions/mean_length": 661.2064819335938,
      "completions/mean_terminated_length": 558.5574951171875,
      "completions/min_length": 108.0,
      "completions/min_terminated_length": 108.0,
      "epoch": 10.989209682123068,
      "grad_norm": 0.12353515625,
      "learning_rate": 1e-06,
      "loss": -0.0025,
      "num_tokens": 684498378.0,
      "reward": 0.574776828289032,
      "reward_std": 0.20925851166248322,
      "rewards/verify_math_reward/mean": 0.5747767686843872,
      "rewards/verify_math_reward/std": 0.49465295672416687,
      "step": 1176
    },
    {
      "clip_ratio/high_max": 0.0014878864712954964,
      "clip_ratio/high_mean": 0.0004385699053273129,
      "clip_ratio/low_mean": 0.0003012640701172131,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007398339857900282,
      "completions/clipped_ratio": 0.011363636363636354,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3766.0,
      "completions/mean_length": 676.6278686523438,
      "completions/mean_terminated_length": 637.32470703125,
      "completions/min_length": 112.0,
      "completions/min_terminated_length": 112.0,
      "epoch": 10.998541848935549,
      "grad_norm": 0.123046875,
      "learning_rate": 1e-06,
      "loss": 0.0124,
      "num_tokens": 685119355.0,
      "reward": 0.4921875298023224,
      "reward_std": 0.22338652610778809,
      "rewards/verify_math_reward/mean": 0.4921875,
      "rewards/verify_math_reward/std": 0.5002182126045227,
      "step": 1177
    },
    {
      "clip_ratio/high_max": 0.0018126432732969988,
      "clip_ratio/high_mean": 0.0004733995822334691,
      "clip_ratio/low_mean": 0.0003042564163706629,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007776560069032712,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2209.0,
      "completions/mean_length": 646.6897583007812,
      "completions/mean_terminated_length": 591.9387817382812,
      "completions/min_length": 24.0,
      "completions/min_terminated_length": 24.0,
      "epoch": 11.009332166812483,
      "grad_norm": 0.1240234375,
      "learning_rate": 1e-06,
      "loss": -0.0006,
      "num_tokens": 685735141.0,
      "reward": 0.5446428656578064,
      "reward_std": 0.21429386734962463,
      "rewards/verify_math_reward/mean": 0.5446428656578064,
      "rewards/verify_math_reward/std": 0.49828118085861206,
      "step": 1178
    },
    {
      "clip_ratio/high_max": 0.0015300237100746017,
      "clip_ratio/high_mean": 0.0004948398907345108,
      "clip_ratio/low_mean": 0.0004086324597665225,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.00090347235709487,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3401.0,
      "completions/mean_length": 710.4788208007812,
      "completions/mean_terminated_length": 648.923828125,
      "completions/min_length": 159.0,
      "completions/min_terminated_length": 159.0,
      "epoch": 11.018664333624963,
      "grad_norm": 0.12451171875,
      "learning_rate": 1e-06,
      "loss": -0.0025,
      "num_tokens": 686397666.0,
      "reward": 0.527901828289032,
      "reward_std": 0.24254217743873596,
      "rewards/verify_math_reward/mean": 0.5279017686843872,
      "rewards/verify_math_reward/std": 0.49949970841407776,
      "step": 1179
    },
    {
      "clip_ratio/high_max": 0.0016976731531030964,
      "clip_ratio/high_mean": 0.0005497295298937388,
      "clip_ratio/low_mean": 0.00038699891047144774,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009367284392283182,
      "completions/clipped_ratio": 0.0234375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2289.0,
      "completions/mean_length": 623.2176513671875,
      "completions/mean_terminated_length": 539.870849609375,
      "completions/min_length": 116.0,
      "completions/min_terminated_length": 116.0,
      "epoch": 11.027996500437446,
      "grad_norm": 0.142578125,
      "learning_rate": 1e-06,
      "loss": -0.0028,
      "num_tokens": 686951421.0,
      "reward": 0.515625,
      "reward_std": 0.24130618572235107,
      "rewards/verify_math_reward/mean": 0.515625,
      "rewards/verify_math_reward/std": 0.5000349283218384,
      "step": 1180
    },
    {
      "clip_ratio/high_max": 0.001765901306498563,
      "clip_ratio/high_mean": 0.0005292653304422856,
      "clip_ratio/low_mean": 0.0004168298777358359,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009460951951041352,
      "completions/clipped_ratio": 0.021205357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3862.0,
      "completions/mean_length": 626.6975708007812,
      "completions/mean_terminated_length": 551.535888671875,
      "completions/min_length": 112.0,
      "completions/min_terminated_length": 112.0,
      "epoch": 11.037328667249927,
      "grad_norm": 0.1328125,
      "learning_rate": 1e-06,
      "loss": 0.0013,
      "num_tokens": 687521270.0,
      "reward": 0.5424107313156128,
      "reward_std": 0.2051643431186676,
      "rewards/verify_math_reward/mean": 0.5424107313156128,
      "rewards/verify_math_reward/std": 0.4984763562679291,
      "step": 1181
    },
    {
      "clip_ratio/high_max": 0.0013871519849999459,
      "clip_ratio/high_mean": 0.00043188820632167335,
      "clip_ratio/low_mean": 0.0003643261718480062,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007962143845361425,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2761.0,
      "completions/mean_length": 653.2589721679688,
      "completions/mean_terminated_length": 590.6636352539062,
      "completions/min_length": 147.0,
      "completions/min_terminated_length": 147.0,
      "epoch": 11.04666083406241,
      "grad_norm": 0.125,
      "learning_rate": 1e-06,
      "loss": -0.0019,
      "num_tokens": 688127518.0,
      "reward": 0.5636160969734192,
      "reward_std": 0.2021559327840805,
      "rewards/verify_math_reward/mean": 0.5636160969734192,
      "rewards/verify_math_reward/std": 0.49621346592903137,
      "step": 1182
    },
    {
      "clip_ratio/high_max": 0.0020745518704643473,
      "clip_ratio/high_mean": 0.0006256531460167025,
      "clip_ratio/low_mean": 0.00028837112915880425,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009140242773355567,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2625.0,
      "completions/mean_length": 617.2980346679688,
      "completions/mean_terminated_length": 554.048828125,
      "completions/min_length": 141.0,
      "completions/min_terminated_length": 141.0,
      "epoch": 11.05599300087489,
      "grad_norm": 0.142578125,
      "learning_rate": 1e-06,
      "loss": -0.0039,
      "num_tokens": 688704465.0,
      "reward": 0.5859375,
      "reward_std": 0.2099350392818451,
      "rewards/verify_math_reward/mean": 0.5859375,
      "rewards/verify_math_reward/std": 0.4928344786167145,
      "step": 1183
    },
    {
      "clip_ratio/high_max": 0.001705239990769769,
      "clip_ratio/high_mean": 0.0005049621718171693,
      "clip_ratio/low_mean": 0.00036079768506169785,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008657598427816993,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3778.0,
      "completions/mean_length": 668.5770263671875,
      "completions/mean_terminated_length": 590.3253173828125,
      "completions/min_length": 154.0,
      "completions/min_terminated_length": 154.0,
      "epoch": 11.065325167687373,
      "grad_norm": 0.13671875,
      "learning_rate": 1e-06,
      "loss": -0.023,
      "num_tokens": 689317526.0,
      "reward": 0.5178571939468384,
      "reward_std": 0.23404881358146667,
      "rewards/verify_math_reward/mean": 0.5178571343421936,
      "rewards/verify_math_reward/std": 0.4999600946903229,
      "step": 1184
    },
    {
      "clip_ratio/high_max": 0.0017312664949713508,
      "clip_ratio/high_mean": 0.0005299104766436358,
      "clip_ratio/low_mean": 0.0003292298105179725,
      "clip_ratio/low_min": 8.933676326705609e-06,
      "clip_ratio/region_mean": 0.0008591402920501423,
      "completions/clipped_ratio": 0.0200892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3933.0,
      "completions/mean_length": 653.6328125,
      "completions/mean_terminated_length": 583.0603637695312,
      "completions/min_length": 102.0,
      "completions/min_terminated_length": 102.0,
      "epoch": 11.074657334499854,
      "grad_norm": 0.130859375,
      "learning_rate": 1e-06,
      "loss": 0.0123,
      "num_tokens": 689919005.0,
      "reward": 0.5,
      "reward_std": 0.23052442073822021,
      "rewards/verify_math_reward/mean": 0.5,
      "rewards/verify_math_reward/std": 0.5002792477607727,
      "step": 1185
    },
    {
      "clip_ratio/high_max": 0.0015408249964821152,
      "clip_ratio/high_mean": 0.0004702627782080526,
      "clip_ratio/low_mean": 0.00037055292841614573,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008408156918449095,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3983.0,
      "completions/mean_length": 644.9710083007812,
      "completions/mean_terminated_length": 590.1927490234375,
      "completions/min_length": 161.0,
      "completions/min_terminated_length": 161.0,
      "epoch": 11.083989501312336,
      "grad_norm": 0.125,
      "learning_rate": 1e-06,
      "loss": 0.008,
      "num_tokens": 690534483.0,
      "reward": 0.5212053656578064,
      "reward_std": 0.21222272515296936,
      "rewards/verify_math_reward/mean": 0.5212053656578064,
      "rewards/verify_math_reward/std": 0.49982914328575134,
      "step": 1186
    },
    {
      "clip_ratio/high_max": 0.0018965411545650568,
      "clip_ratio/high_mean": 0.0004936661780448048,
      "clip_ratio/low_mean": 0.0003178008994382253,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008114670790746459,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4077.0,
      "completions/mean_length": 613.9263916015625,
      "completions/mean_terminated_length": 554.6401977539062,
      "completions/min_length": 124.0,
      "completions/min_terminated_length": 124.0,
      "epoch": 11.093321668124817,
      "grad_norm": 0.11767578125,
      "learning_rate": 1e-06,
      "loss": 0.0005,
      "num_tokens": 691116193.0,
      "reward": 0.59375,
      "reward_std": 0.19407722353935242,
      "rewards/verify_math_reward/mean": 0.59375,
      "rewards/verify_math_reward/std": 0.4914066195487976,
      "step": 1187
    },
    {
      "clip_ratio/high_max": 0.0014303534935606876,
      "clip_ratio/high_mean": 0.00045155592238188547,
      "clip_ratio/low_mean": 0.00034358008974777476,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007951360030347132,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3517.0,
      "completions/mean_length": 600.8761596679688,
      "completions/mean_terminated_length": 569.3885498046875,
      "completions/min_length": 98.0,
      "completions/min_terminated_length": 98.0,
      "epoch": 11.1026538349373,
      "grad_norm": 0.1318359375,
      "learning_rate": 1e-06,
      "loss": 0.0025,
      "num_tokens": 691721818.0,
      "reward": 0.5334821939468384,
      "reward_std": 0.2241317480802536,
      "rewards/verify_math_reward/mean": 0.5334821343421936,
      "rewards/verify_math_reward/std": 0.49915632605552673,
      "step": 1188
    },
    {
      "clip_ratio/high_max": 0.0016902213592402404,
      "clip_ratio/high_mean": 0.0004496746387303574,
      "clip_ratio/low_mean": 0.00028331191299457714,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007329865593419527,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2120.0,
      "completions/mean_length": 563.6785888671875,
      "completions/mean_terminated_length": 527.837646484375,
      "completions/min_length": 136.0,
      "completions/min_terminated_length": 136.0,
      "epoch": 11.11198600174978,
      "grad_norm": 0.123046875,
      "learning_rate": 1e-06,
      "loss": 0.0005,
      "num_tokens": 692265354.0,
      "reward": 0.5881696939468384,
      "reward_std": 0.19948594272136688,
      "rewards/verify_math_reward/mean": 0.5881696343421936,
      "rewards/verify_math_reward/std": 0.4924396276473999,
      "step": 1189
    },
    {
      "clip_ratio/high_max": 0.0020211618048051605,
      "clip_ratio/high_mean": 0.0005771383284809417,
      "clip_ratio/low_mean": 0.0003027978863201497,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008799362194622518,
      "completions/clipped_ratio": 0.0234375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4054.0,
      "completions/mean_length": 663.1707763671875,
      "completions/mean_terminated_length": 580.7828369140625,
      "completions/min_length": 112.0,
      "completions/min_terminated_length": 112.0,
      "epoch": 11.121318168562263,
      "grad_norm": 0.119140625,
      "learning_rate": 1e-06,
      "loss": 0.0201,
      "num_tokens": 692868315.0,
      "reward": 0.53125,
      "reward_std": 0.19047221541404724,
      "rewards/verify_math_reward/mean": 0.53125,
      "rewards/verify_math_reward/std": 0.4993011951446533,
      "step": 1190
    },
    {
      "clip_ratio/high_max": 0.0014844330980849918,
      "clip_ratio/high_mean": 0.0004902599914657912,
      "clip_ratio/low_mean": 0.0003573382582544582,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008475982481286337,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3828.0,
      "completions/mean_length": 614.3035888671875,
      "completions/mean_terminated_length": 559.03857421875,
      "completions/min_length": 133.0,
      "completions/min_terminated_length": 133.0,
      "epoch": 11.130650335374744,
      "grad_norm": 0.126953125,
      "learning_rate": 1e-06,
      "loss": 0.0008,
      "num_tokens": 693438459.0,
      "reward": 0.559151828289032,
      "reward_std": 0.20636573433876038,
      "rewards/verify_math_reward/mean": 0.5591517686843872,
      "rewards/verify_math_reward/std": 0.496766060590744,
      "step": 1191
    },
    {
      "clip_ratio/high_max": 0.0015765076191200933,
      "clip_ratio/high_mean": 0.00040870388295388693,
      "clip_ratio/low_mean": 0.00033751574142115714,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000746219618122268,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2421.0,
      "completions/mean_length": 646.7957763671875,
      "completions/mean_terminated_length": 596.0147094726562,
      "completions/min_length": 128.0,
      "completions/min_terminated_length": 128.0,
      "epoch": 11.139982502187227,
      "grad_norm": 0.11669921875,
      "learning_rate": 1e-06,
      "loss": 0.0066,
      "num_tokens": 694067772.0,
      "reward": 0.478794664144516,
      "reward_std": 0.19764302670955658,
      "rewards/verify_math_reward/mean": 0.4787946343421936,
      "rewards/verify_math_reward/std": 0.49982914328575134,
      "step": 1192
    },
    {
      "clip_ratio/high_max": 0.00181157861425163,
      "clip_ratio/high_mean": 0.0005747752998104261,
      "clip_ratio/low_mean": 0.00035434867038475204,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000929123970308865,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3214.0,
      "completions/mean_length": 614.7589721679688,
      "completions/mean_terminated_length": 567.5022583007812,
      "completions/min_length": 138.0,
      "completions/min_terminated_length": 138.0,
      "epoch": 11.149314668999708,
      "grad_norm": 0.1357421875,
      "learning_rate": 1e-06,
      "loss": 0.0009,
      "num_tokens": 694653468.0,
      "reward": 0.5524553656578064,
      "reward_std": 0.24563290178775787,
      "rewards/verify_math_reward/mean": 0.5524553656578064,
      "rewards/verify_math_reward/std": 0.49751853942871094,
      "step": 1193
    },
    {
      "clip_ratio/high_max": 0.001964186062650697,
      "clip_ratio/high_mean": 0.0005575590637363348,
      "clip_ratio/low_mean": 0.00041727745838215924,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009748365491759614,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3598.0,
      "completions/mean_length": 626.2511596679688,
      "completions/mean_terminated_length": 547.0330810546875,
      "completions/min_length": 59.0,
      "completions/min_terminated_length": 59.0,
      "epoch": 11.15864683581219,
      "grad_norm": 0.1357421875,
      "learning_rate": 1e-06,
      "loss": -0.0045,
      "num_tokens": 695217165.0,
      "reward": 0.5602678656578064,
      "reward_std": 0.23124048113822937,
      "rewards/verify_math_reward/mean": 0.5602678656578064,
      "rewards/verify_math_reward/std": 0.4966317117214203,
      "step": 1194
    },
    {
      "clip_ratio/high_max": 0.0016791548505352694,
      "clip_ratio/high_mean": 0.00042282035997232015,
      "clip_ratio/low_mean": 0.0003706209006395511,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007934412560643977,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2843.0,
      "completions/mean_length": 589.8817138671875,
      "completions/mean_terminated_length": 522.0728149414062,
      "completions/min_length": 110.0,
      "completions/min_terminated_length": 110.0,
      "epoch": 11.167979002624673,
      "grad_norm": 0.13671875,
      "learning_rate": 1e-06,
      "loss": -0.0027,
      "num_tokens": 695778035.0,
      "reward": 0.5602678656578064,
      "reward_std": 0.19287119805812836,
      "rewards/verify_math_reward/mean": 0.5602678656578064,
      "rewards/verify_math_reward/std": 0.4966317415237427,
      "step": 1195
    },
    {
      "clip_ratio/high_max": 0.001689097861344635,
      "clip_ratio/high_mean": 0.0005339278081919474,
      "clip_ratio/low_mean": 0.0003026856468295591,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008366134429707017,
      "completions/clipped_ratio": 0.0234375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4086.0,
      "completions/mean_length": 657.9408569335938,
      "completions/mean_terminated_length": 575.4274291992188,
      "completions/min_length": 11.0,
      "completions/min_terminated_length": 11.0,
      "epoch": 11.177311169437154,
      "grad_norm": 0.130859375,
      "learning_rate": 1e-06,
      "loss": 0.0049,
      "num_tokens": 696360278.0,
      "reward": 0.5546875,
      "reward_std": 0.23409047722816467,
      "rewards/verify_math_reward/mean": 0.5546875,
      "rewards/verify_math_reward/std": 0.4972778558731079,
      "step": 1196
    },
    {
      "clip_ratio/high_max": 0.001657502443777048,
      "clip_ratio/high_mean": 0.0004737180547635944,
      "clip_ratio/low_mean": 0.00027587881493218447,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007495968684452237,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3989.0,
      "completions/mean_length": 558.896240234375,
      "completions/mean_terminated_length": 523.0067138671875,
      "completions/min_length": 93.0,
      "completions/min_terminated_length": 93.0,
      "epoch": 11.186643336249636,
      "grad_norm": 0.130859375,
      "learning_rate": 1e-06,
      "loss": 0.0053,
      "num_tokens": 696910841.0,
      "reward": 0.582589328289032,
      "reward_std": 0.19918467104434967,
      "rewards/verify_math_reward/mean": 0.5825892686843872,
      "rewards/verify_math_reward/std": 0.4934072494506836,
      "step": 1197
    },
    {
      "clip_ratio/high_max": 0.0012414027651175275,
      "clip_ratio/high_mean": 0.0003762090127565898,
      "clip_ratio/low_mean": 0.00029280607691362093,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006690150892154634,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3913.0,
      "completions/mean_length": 619.1027221679688,
      "completions/mean_terminated_length": 567.9139404296875,
      "completions/min_length": 99.0,
      "completions/min_terminated_length": 99.0,
      "epoch": 11.195975503062117,
      "grad_norm": 0.12158203125,
      "learning_rate": 1e-06,
      "loss": 0.0015,
      "num_tokens": 697496725.0,
      "reward": 0.546875,
      "reward_std": 0.20531155169010162,
      "rewards/verify_math_reward/mean": 0.546875,
      "rewards/verify_math_reward/std": 0.4980759024620056,
      "step": 1198
    },
    {
      "clip_ratio/high_max": 0.0015222707679640735,
      "clip_ratio/high_mean": 0.00043158240328011743,
      "clip_ratio/low_mean": 0.00021781701445888757,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006493994064840081,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3517.0,
      "completions/mean_length": 600.0145263671875,
      "completions/mean_terminated_length": 536.4511108398438,
      "completions/min_length": 145.0,
      "completions/min_terminated_length": 145.0,
      "epoch": 11.2053076698746,
      "grad_norm": 0.12060546875,
      "learning_rate": 1e-06,
      "loss": 0.0014,
      "num_tokens": 698064514.0,
      "reward": 0.5602678656578064,
      "reward_std": 0.18013553321361542,
      "rewards/verify_math_reward/mean": 0.5602678656578064,
      "rewards/verify_math_reward/std": 0.4966317415237427,
      "step": 1199
    },
    {
      "clip_ratio/high_max": 0.0013311835728018195,
      "clip_ratio/high_mean": 0.0003712672070150802,
      "clip_ratio/low_mean": 0.00038878351938365086,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007600507132110579,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3761.0,
      "completions/mean_length": 607.3527221679688,
      "completions/mean_terminated_length": 567.9774169921875,
      "completions/min_length": 133.0,
      "completions/min_terminated_length": 133.0,
      "epoch": 11.21463983668708,
      "grad_norm": 0.126953125,
      "learning_rate": 1e-06,
      "loss": 0.0178,
      "num_tokens": 698662950.0,
      "reward": 0.5658482313156128,
      "reward_std": 0.2123749703168869,
      "rewards/verify_math_reward/mean": 0.5658482313156128,
      "rewards/verify_math_reward/std": 0.49592188000679016,
      "step": 1200
    },
    {
      "clip_ratio/high_max": 0.0015760056467115646,
      "clip_ratio/high_mean": 0.00045181570465047116,
      "clip_ratio/low_mean": 0.0002670606072570081,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007188763220256078,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3505.0,
      "completions/mean_length": 626.4799194335938,
      "completions/mean_terminated_length": 583.35595703125,
      "completions/min_length": 116.0,
      "completions/min_terminated_length": 116.0,
      "epoch": 11.223972003499563,
      "grad_norm": 0.11767578125,
      "learning_rate": 1e-06,
      "loss": 0.018,
      "num_tokens": 699261916.0,
      "reward": 0.5524553656578064,
      "reward_std": 0.1813715547323227,
      "rewards/verify_math_reward/mean": 0.5524553656578064,
      "rewards/verify_math_reward/std": 0.49751853942871094,
      "step": 1201
    },
    {
      "clip_ratio/high_max": 0.001806717382351053,
      "clip_ratio/high_mean": 0.0005310341794029227,
      "clip_ratio/low_mean": 0.0002920879667271947,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008231221463574911,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3179.0,
      "completions/mean_length": 632.8705444335938,
      "completions/mean_terminated_length": 597.7316284179688,
      "completions/min_length": 102.0,
      "completions/min_terminated_length": 102.0,
      "epoch": 11.233304170312044,
      "grad_norm": 0.1171875,
      "learning_rate": 1e-06,
      "loss": 0.0079,
      "num_tokens": 699879192.0,
      "reward": 0.5848214626312256,
      "reward_std": 0.19805558025836945,
      "rewards/verify_math_reward/mean": 0.5848214030265808,
      "rewards/verify_math_reward/std": 0.49302801489830017,
      "step": 1202
    },
    {
      "clip_ratio/high_max": 0.0014581408777303295,
      "clip_ratio/high_mean": 0.0004344063413554977,
      "clip_ratio/low_mean": 0.0003344224043075883,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007688287473683886,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3717.0,
      "completions/mean_length": 593.4252319335938,
      "completions/mean_terminated_length": 553.8927612304688,
      "completions/min_length": 35.0,
      "completions/min_terminated_length": 35.0,
      "epoch": 11.242636337124527,
      "grad_norm": 0.12109375,
      "learning_rate": 1e-06,
      "loss": 0.0002,
      "num_tokens": 700456373.0,
      "reward": 0.6004464626312256,
      "reward_std": 0.1965913474559784,
      "rewards/verify_math_reward/mean": 0.6004464030265808,
      "rewards/verify_math_reward/std": 0.49008017778396606,
      "step": 1203
    },
    {
      "clip_ratio/high_max": 0.001748106321429077,
      "clip_ratio/high_mean": 0.0005243480889021157,
      "clip_ratio/low_mean": 0.0003121826123333449,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000836530721244344,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3366.0,
      "completions/mean_length": 616.2377319335938,
      "completions/mean_terminated_length": 565.0067749023438,
      "completions/min_length": 120.0,
      "completions/min_terminated_length": 120.0,
      "epoch": 11.251968503937007,
      "grad_norm": 0.1201171875,
      "learning_rate": 1e-06,
      "loss": 0.0154,
      "num_tokens": 701048426.0,
      "reward": 0.625,
      "reward_std": 0.20151470601558685,
      "rewards/verify_math_reward/mean": 0.625,
      "rewards/verify_math_reward/std": 0.48439329862594604,
      "step": 1204
    },
    {
      "clip_ratio/high_max": 0.0017423929130018223,
      "clip_ratio/high_mean": 0.0004999095231141837,
      "clip_ratio/low_mean": 0.0003165828585451891,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000816492373814981,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3488.0,
      "completions/mean_length": 660.7935791015625,
      "completions/mean_terminated_length": 582.3641357421875,
      "completions/min_length": 4.0,
      "completions/min_terminated_length": 4.0,
      "epoch": 11.26130067074949,
      "grad_norm": 0.12158203125,
      "learning_rate": 1e-06,
      "loss": 0.0046,
      "num_tokens": 701668137.0,
      "reward": 0.4810267984867096,
      "reward_std": 0.20098592340946198,
      "rewards/verify_math_reward/mean": 0.4810267984867096,
      "rewards/verify_math_reward/std": 0.49991899728775024,
      "step": 1205
    },
    {
      "clip_ratio/high_max": 0.0017148759006886394,
      "clip_ratio/high_mean": 0.00047484214167070604,
      "clip_ratio/low_mean": 0.00037459873703937774,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008494408657497843,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2263.0,
      "completions/mean_length": 562.4375,
      "completions/mean_terminated_length": 498.1908874511719,
      "completions/min_length": 50.0,
      "completions/min_terminated_length": 50.0,
      "epoch": 11.27063283756197,
      "grad_norm": 0.1376953125,
      "learning_rate": 1e-06,
      "loss": -0.006,
      "num_tokens": 702197481.0,
      "reward": 0.5691964626312256,
      "reward_std": 0.21139857172966003,
      "rewards/verify_math_reward/mean": 0.5691964030265808,
      "rewards/verify_math_reward/std": 0.4954652488231659,
      "step": 1206
    },
    {
      "clip_ratio/high_max": 0.0014666623719676863,
      "clip_ratio/high_mean": 0.0004838364152419672,
      "clip_ratio/low_mean": 0.00034066045623148966,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008244968639701256,
      "completions/clipped_ratio": 0.0078125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3976.0,
      "completions/mean_length": 586.6763916015625,
      "completions/mean_terminated_length": 559.0438842773438,
      "completions/min_length": 123.0,
      "completions/min_terminated_length": 123.0,
      "epoch": 11.279965004374453,
      "grad_norm": 0.1298828125,
      "learning_rate": 1e-06,
      "loss": 0.003,
      "num_tokens": 702780599.0,
      "reward": 0.5636160969734192,
      "reward_std": 0.21676772832870483,
      "rewards/verify_math_reward/mean": 0.5636160969734192,
      "rewards/verify_math_reward/std": 0.49621346592903137,
      "step": 1207
    },
    {
      "clip_ratio/high_max": 0.0014182000450091437,
      "clip_ratio/high_mean": 0.000344319263831494,
      "clip_ratio/low_mean": 0.00031040950091210107,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006547287696321291,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3035.0,
      "completions/mean_length": 596.328125,
      "completions/mean_terminated_length": 548.8212890625,
      "completions/min_length": 141.0,
      "completions/min_terminated_length": 141.0,
      "epoch": 11.289297171186934,
      "grad_norm": 0.1103515625,
      "learning_rate": 1e-06,
      "loss": 0.0035,
      "num_tokens": 703358725.0,
      "reward": 0.5569196939468384,
      "reward_std": 0.15409964323043823,
      "rewards/verify_math_reward/mean": 0.5569196343421936,
      "rewards/verify_math_reward/std": 0.49702703952789307,
      "step": 1208
    },
    {
      "clip_ratio/high_max": 0.0018192526422353694,
      "clip_ratio/high_mean": 0.0005398614571276994,
      "clip_ratio/low_mean": 0.00029727401306445245,
      "clip_ratio/low_min": 1.1015156815119553e-05,
      "clip_ratio/region_mean": 0.0008371354679184151,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3090.0,
      "completions/mean_length": 536.396240234375,
      "completions/mean_terminated_length": 504.3277282714844,
      "completions/min_length": 67.0,
      "completions/min_terminated_length": 67.0,
      "epoch": 11.298629337999417,
      "grad_norm": 0.126953125,
      "learning_rate": 1e-06,
      "loss": -0.0058,
      "num_tokens": 703894256.0,
      "reward": 0.5703125,
      "reward_std": 0.19340254366397858,
      "rewards/verify_math_reward/mean": 0.5703125,
      "rewards/verify_math_reward/std": 0.49530795216560364,
      "step": 1209
    },
    {
      "clip_ratio/high_max": 0.0016637576281937072,
      "clip_ratio/high_mean": 0.0005448425899885478,
      "clip_ratio/low_mean": 0.0003061326644910878,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008509752551617566,
      "completions/clipped_ratio": 0.006696428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2822.0,
      "completions/mean_length": 532.28125,
      "completions/mean_terminated_length": 508.2561950683594,
      "completions/min_length": 92.0,
      "completions/min_terminated_length": 92.0,
      "epoch": 11.307961504811898,
      "grad_norm": 0.12890625,
      "learning_rate": 1e-06,
      "loss": 0.0109,
      "num_tokens": 704425860.0,
      "reward": 0.6774553656578064,
      "reward_std": 0.19835910201072693,
      "rewards/verify_math_reward/mean": 0.6774553656578064,
      "rewards/verify_math_reward/std": 0.4677111804485321,
      "step": 1210
    },
    {
      "clip_ratio/high_max": 0.0015508226497331634,
      "clip_ratio/high_mean": 0.0004395264149934519,
      "clip_ratio/low_mean": 0.00027341181407791737,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007129382206585433,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3985.0,
      "completions/mean_length": 665.7611694335938,
      "completions/mean_terminated_length": 623.1254272460938,
      "completions/min_length": 12.0,
      "completions/min_terminated_length": 12.0,
      "epoch": 11.31729367162438,
      "grad_norm": 0.1123046875,
      "learning_rate": 1e-06,
      "loss": -0.0042,
      "num_tokens": 705077542.0,
      "reward": 0.5,
      "reward_std": 0.18517020344734192,
      "rewards/verify_math_reward/mean": 0.5,
      "rewards/verify_math_reward/std": 0.5002792477607727,
      "step": 1211
    },
    {
      "clip_ratio/high_max": 0.0018860916006815387,
      "clip_ratio/high_mean": 0.0005438715411401063,
      "clip_ratio/low_mean": 0.00027447828165350074,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008183498266589595,
      "completions/clipped_ratio": 0.0078125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4045.0,
      "completions/mean_length": 567.6752319335938,
      "completions/mean_terminated_length": 539.8931884765625,
      "completions/min_length": 70.0,
      "completions/min_terminated_length": 70.0,
      "epoch": 11.326625838436861,
      "grad_norm": 0.13671875,
      "learning_rate": 1e-06,
      "loss": -0.0036,
      "num_tokens": 705644011.0,
      "reward": 0.5848214626312256,
      "reward_std": 0.20298148691654205,
      "rewards/verify_math_reward/mean": 0.5848214030265808,
      "rewards/verify_math_reward/std": 0.49302801489830017,
      "step": 1212
    },
    {
      "clip_ratio/high_max": 0.0018231462363473838,
      "clip_ratio/high_mean": 0.0006360889037750894,
      "clip_ratio/low_mean": 0.0002481379181062948,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008842268271109788,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3642.0,
      "completions/mean_length": 560.6194458007812,
      "completions/mean_terminated_length": 524.7474365234375,
      "completions/min_length": 72.0,
      "completions/min_terminated_length": 72.0,
      "epoch": 11.335958005249344,
      "grad_norm": 0.13671875,
      "learning_rate": 1e-06,
      "loss": 0.003,
      "num_tokens": 706186790.0,
      "reward": 0.613839328289032,
      "reward_std": 0.2141006737947464,
      "rewards/verify_math_reward/mean": 0.6138392686843872,
      "rewards/verify_math_reward/std": 0.48714008927345276,
      "step": 1213
    },
    {
      "clip_ratio/high_max": 0.001437395154425758,
      "clip_ratio/high_mean": 0.0004377646807824931,
      "clip_ratio/low_mean": 0.0004015669476302719,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008393316211368074,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2824.0,
      "completions/mean_length": 574.2444458007812,
      "completions/mean_terminated_length": 530.47119140625,
      "completions/min_length": 105.0,
      "completions/min_terminated_length": 105.0,
      "epoch": 11.345290172061826,
      "grad_norm": 0.134765625,
      "learning_rate": 1e-06,
      "loss": 0.0046,
      "num_tokens": 706744473.0,
      "reward": 0.53125,
      "reward_std": 0.2232327163219452,
      "rewards/verify_math_reward/mean": 0.53125,
      "rewards/verify_math_reward/std": 0.4993011951446533,
      "step": 1214
    },
    {
      "clip_ratio/high_max": 0.0016219537110373494,
      "clip_ratio/high_mean": 0.0004899214943634433,
      "clip_ratio/low_mean": 0.00033940166224510904,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008293231612697127,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2581.0,
      "completions/mean_length": 620.4152221679688,
      "completions/mean_terminated_length": 569.2457275390625,
      "completions/min_length": 173.0,
      "completions/min_terminated_length": 173.0,
      "epoch": 11.354622338874307,
      "grad_norm": 0.12158203125,
      "learning_rate": 1e-06,
      "loss": 0.0022,
      "num_tokens": 707341573.0,
      "reward": 0.5792410969734192,
      "reward_std": 0.19798073172569275,
      "rewards/verify_math_reward/mean": 0.5792410969734192,
      "rewards/verify_math_reward/std": 0.49395665526390076,
      "step": 1215
    },
    {
      "clip_ratio/high_max": 0.0016301512459904188,
      "clip_ratio/high_mean": 0.0004961456893397553,
      "clip_ratio/low_mean": 0.0002186143169637944,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000714760011760518,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3414.0,
      "completions/mean_length": 641.5346069335938,
      "completions/mean_terminated_length": 562.66552734375,
      "completions/min_length": 113.0,
      "completions/min_terminated_length": 113.0,
      "epoch": 11.36395450568679,
      "grad_norm": 0.12060546875,
      "learning_rate": 1e-06,
      "loss": -0.0021,
      "num_tokens": 707916292.0,
      "reward": 0.5491071939468384,
      "reward_std": 0.19753523170948029,
      "rewards/verify_math_reward/mean": 0.5491071343421936,
      "rewards/verify_math_reward/std": 0.49786055088043213,
      "step": 1216
    },
    {
      "clip_ratio/high_max": 0.0016524278253200464,
      "clip_ratio/high_mean": 0.00047501923427262227,
      "clip_ratio/low_mean": 0.0003669948181368454,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008420140538873966,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4039.0,
      "completions/mean_length": 575.849365234375,
      "completions/mean_terminated_length": 540.1318969726562,
      "completions/min_length": 11.0,
      "completions/min_terminated_length": 11.0,
      "epoch": 11.37328667249927,
      "grad_norm": 0.1474609375,
      "learning_rate": 1e-06,
      "loss": -0.0038,
      "num_tokens": 708495637.0,
      "reward": 0.5535714626312256,
      "reward_std": 0.22109587490558624,
      "rewards/verify_math_reward/mean": 0.5535714030265808,
      "rewards/verify_math_reward/std": 0.4973994791507721,
      "step": 1217
    },
    {
      "clip_ratio/high_max": 0.0015316274593715207,
      "clip_ratio/high_mean": 0.0005121717392739811,
      "clip_ratio/low_mean": 0.00032788698342756106,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008400587203141185,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3477.0,
      "completions/mean_length": 625.1506958007812,
      "completions/mean_terminated_length": 578.0350952148438,
      "completions/min_length": 154.0,
      "completions/min_terminated_length": 154.0,
      "epoch": 11.382618839311753,
      "grad_norm": 0.1259765625,
      "learning_rate": 1e-06,
      "loss": 0.0005,
      "num_tokens": 709101636.0,
      "reward": 0.5881696939468384,
      "reward_std": 0.21808859705924988,
      "rewards/verify_math_reward/mean": 0.5881696343421936,
      "rewards/verify_math_reward/std": 0.4924395978450775,
      "step": 1218
    },
    {
      "clip_ratio/high_max": 0.0015591038190905238,
      "clip_ratio/high_mean": 0.0004793016846633691,
      "clip_ratio/low_mean": 0.00037911617471309,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008584178658566088,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3903.0,
      "completions/mean_length": 599.328125,
      "completions/mean_terminated_length": 531.701904296875,
      "completions/min_length": 142.0,
      "completions/min_terminated_length": 142.0,
      "epoch": 11.391951006124234,
      "grad_norm": 0.1376953125,
      "learning_rate": 1e-06,
      "loss": -0.0133,
      "num_tokens": 709659026.0,
      "reward": 0.515625,
      "reward_std": 0.22007529437541962,
      "rewards/verify_math_reward/mean": 0.515625,
      "rewards/verify_math_reward/std": 0.5000349283218384,
      "step": 1219
    },
    {
      "clip_ratio/high_max": 0.001727671418848331,
      "clip_ratio/high_mean": 0.0005489591512741754,
      "clip_ratio/low_mean": 0.0003577263383931495,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000906685498648585,
      "completions/clipped_ratio": 0.006696428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3732.0,
      "completions/mean_length": 567.5960083007812,
      "completions/mean_terminated_length": 543.8090209960938,
      "completions/min_length": 3.0,
      "completions/min_terminated_length": 3.0,
      "epoch": 11.401283172936717,
      "grad_norm": 0.1318359375,
      "learning_rate": 1e-06,
      "loss": 0.0042,
      "num_tokens": 710230920.0,
      "reward": 0.5680803656578064,
      "reward_std": 0.22819000482559204,
      "rewards/verify_math_reward/mean": 0.5680803656578064,
      "rewards/verify_math_reward/std": 0.4956200420856476,
      "step": 1220
    },
    {
      "clip_ratio/high_max": 0.0017083319780795136,
      "clip_ratio/high_mean": 0.0005138730077760556,
      "clip_ratio/low_mean": 0.0003482699106598375,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008621429242339218,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3697.0,
      "completions/mean_length": 637.671875,
      "completions/mean_terminated_length": 578.7900390625,
      "completions/min_length": 117.0,
      "completions/min_terminated_length": 117.0,
      "epoch": 11.410615339749198,
      "grad_norm": 0.1279296875,
      "learning_rate": 1e-06,
      "loss": -0.023,
      "num_tokens": 710823234.0,
      "reward": 0.4955357313156128,
      "reward_std": 0.23082607984542847,
      "rewards/verify_math_reward/mean": 0.4955357015132904,
      "rewards/verify_math_reward/std": 0.500259280204773,
      "step": 1221
    },
    {
      "clip_ratio/high_max": 0.0018757069447019603,
      "clip_ratio/high_mean": 0.000587552998240426,
      "clip_ratio/low_mean": 0.0003630494585422639,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009506024634902133,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2193.0,
      "completions/mean_length": 603.5223388671875,
      "completions/mean_terminated_length": 564.1038208007812,
      "completions/min_length": 157.0,
      "completions/min_terminated_length": 157.0,
      "epoch": 11.41994750656168,
      "grad_norm": 0.1376953125,
      "learning_rate": 1e-06,
      "loss": 0.015,
      "num_tokens": 711410422.0,
      "reward": 0.566964328289032,
      "reward_std": 0.2346218079328537,
      "rewards/verify_math_reward/mean": 0.5669642686843872,
      "rewards/verify_math_reward/std": 0.49577224254608154,
      "step": 1222
    },
    {
      "clip_ratio/high_max": 0.0016131734191731084,
      "clip_ratio/high_mean": 0.0005183955978509402,
      "clip_ratio/low_mean": 0.00035145649144396884,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008698520741745597,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4063.0,
      "completions/mean_length": 698.1563110351562,
      "completions/mean_terminated_length": 632.44140625,
      "completions/min_length": 132.0,
      "completions/min_terminated_length": 132.0,
      "epoch": 11.429279673374161,
      "grad_norm": 0.1279296875,
      "learning_rate": 1e-06,
      "loss": 0.0018,
      "num_tokens": 712046442.0,
      "reward": 0.4966517984867096,
      "reward_std": 0.23724789917469025,
      "rewards/verify_math_reward/mean": 0.4966517984867096,
      "rewards/verify_math_reward/std": 0.5002680420875549,
      "step": 1223
    },
    {
      "clip_ratio/high_max": 0.0016537298324692529,
      "clip_ratio/high_mean": 0.0005164458686977014,
      "clip_ratio/low_mean": 0.00037962824217174784,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008960741015471285,
      "completions/clipped_ratio": 0.0078125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2107.0,
      "completions/mean_length": 531.1517944335938,
      "completions/mean_terminated_length": 503.0821228027344,
      "completions/min_length": 109.0,
      "completions/min_terminated_length": 109.0,
      "epoch": 11.438611840186644,
      "grad_norm": 0.12451171875,
      "learning_rate": 1e-06,
      "loss": 0.003,
      "num_tokens": 712577826.0,
      "reward": 0.6116071939468384,
      "reward_std": 0.19783805310726166,
      "rewards/verify_math_reward/mean": 0.6116071343421936,
      "rewards/verify_math_reward/std": 0.48765692114830017,
      "step": 1224
    },
    {
      "clip_ratio/high_max": 0.001743421961691638,
      "clip_ratio/high_mean": 0.0005555848567837529,
      "clip_ratio/low_mean": 0.0003264261929416534,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008820110333545017,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3422.0,
      "completions/mean_length": 610.7455444335938,
      "completions/mean_terminated_length": 571.4085693359375,
      "completions/min_length": 3.0,
      "completions/min_terminated_length": 3.0,
      "epoch": 11.447944006999125,
      "grad_norm": 0.12060546875,
      "learning_rate": 1e-06,
      "loss": 0.0132,
      "num_tokens": 713176054.0,
      "reward": 0.5412946939468384,
      "reward_std": 0.2167356312274933,
      "rewards/verify_math_reward/mean": 0.5412946343421936,
      "rewards/verify_math_reward/std": 0.49857014417648315,
      "step": 1225
    },
    {
      "clip_ratio/high_max": 0.0014606723916585906,
      "clip_ratio/high_mean": 0.0004765461920896996,
      "clip_ratio/low_mean": 0.00041098473593592644,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008875309404174914,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1936.0,
      "completions/mean_length": 564.921875,
      "completions/mean_terminated_length": 533.1103515625,
      "completions/min_length": 40.0,
      "completions/min_terminated_length": 40.0,
      "epoch": 11.457276173811607,
      "grad_norm": 0.1259765625,
      "learning_rate": 1e-06,
      "loss": 0.0007,
      "num_tokens": 713725872.0,
      "reward": 0.5892857313156128,
      "reward_std": 0.2180815488100052,
      "rewards/verify_math_reward/mean": 0.5892857313156128,
      "rewards/verify_math_reward/std": 0.49223825335502625,
      "step": 1226
    },
    {
      "clip_ratio/high_max": 0.0020563315320032416,
      "clip_ratio/high_mean": 0.0006233833271380718,
      "clip_ratio/low_mean": 0.00030864210191339225,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009320254212070722,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2460.0,
      "completions/mean_length": 575.4642944335938,
      "completions/mean_terminated_length": 531.7062377929688,
      "completions/min_length": 127.0,
      "completions/min_terminated_length": 127.0,
      "epoch": 11.466608340624088,
      "grad_norm": 0.1376953125,
      "learning_rate": 1e-06,
      "loss": 0.0033,
      "num_tokens": 714278440.0,
      "reward": 0.5803571939468384,
      "reward_std": 0.20400065183639526,
      "rewards/verify_math_reward/mean": 0.5803571343421936,
      "rewards/verify_math_reward/std": 0.4937761425971985,
      "step": 1227
    },
    {
      "clip_ratio/high_max": 0.0015116178656171542,
      "clip_ratio/high_mean": 0.00046261808665803983,
      "clip_ratio/low_mean": 0.0003711675785780244,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008337856588696013,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4019.0,
      "completions/mean_length": 615.4576416015625,
      "completions/mean_terminated_length": 568.21044921875,
      "completions/min_length": 131.0,
      "completions/min_terminated_length": 131.0,
      "epoch": 11.47594050743657,
      "grad_norm": 0.12060546875,
      "learning_rate": 1e-06,
      "loss": -0.0024,
      "num_tokens": 714865338.0,
      "reward": 0.5569196939468384,
      "reward_std": 0.22699564695358276,
      "rewards/verify_math_reward/mean": 0.5569196343421936,
      "rewards/verify_math_reward/std": 0.4970270097255707,
      "step": 1228
    },
    {
      "clip_ratio/high_max": 0.0015121131345949834,
      "clip_ratio/high_mean": 0.0004476012652503414,
      "clip_ratio/low_mean": 0.00033094190712290583,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007785431607771898,
      "completions/clipped_ratio": 0.021205357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3422.0,
      "completions/mean_length": 591.7154541015625,
      "completions/mean_terminated_length": 515.7958984375,
      "completions/min_length": 92.0,
      "completions/min_terminated_length": 92.0,
      "epoch": 11.485272674249051,
      "grad_norm": 0.12255859375,
      "learning_rate": 1e-06,
      "loss": -0.0009,
      "num_tokens": 715389763.0,
      "reward": 0.5770089626312256,
      "reward_std": 0.19535532593727112,
      "rewards/verify_math_reward/mean": 0.5770089030265808,
      "rewards/verify_math_reward/std": 0.4943099319934845,
      "step": 1229
    },
    {
      "clip_ratio/high_max": 0.0016218743603531038,
      "clip_ratio/high_mean": 0.0005120393584547855,
      "clip_ratio/low_mean": 0.0004929801207254059,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010050194896393805,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4065.0,
      "completions/mean_length": 632.5245971679688,
      "completions/mean_terminated_length": 565.5403442382812,
      "completions/min_length": 136.0,
      "completions/min_terminated_length": 136.0,
      "epoch": 11.494604841061534,
      "grad_norm": 0.1318359375,
      "learning_rate": 1e-06,
      "loss": 0.0166,
      "num_tokens": 715978729.0,
      "reward": 0.5245535969734192,
      "reward_std": 0.24472180008888245,
      "rewards/verify_math_reward/mean": 0.5245535969734192,
      "rewards/verify_math_reward/std": 0.4996756911277771,
      "step": 1230
    },
    {
      "clip_ratio/high_max": 0.0016897195973797352,
      "clip_ratio/high_mean": 0.00047367160982503265,
      "clip_ratio/low_mean": 0.00027815549071874557,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007518271086155437,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3108.0,
      "completions/mean_length": 634.296875,
      "completions/mean_terminated_length": 571.3568115234375,
      "completions/min_length": 68.0,
      "completions/min_terminated_length": 68.0,
      "epoch": 11.503937007874015,
      "grad_norm": 0.1318359375,
      "learning_rate": 1e-06,
      "loss": 0.0042,
      "num_tokens": 716574787.0,
      "reward": 0.5055803656578064,
      "reward_std": 0.21022644639015198,
      "rewards/verify_math_reward/mean": 0.5055803656578064,
      "rewards/verify_math_reward/std": 0.5002480745315552,
      "step": 1231
    },
    {
      "clip_ratio/high_max": 0.001779398171493085,
      "clip_ratio/high_mean": 0.000525535028828017,
      "clip_ratio/low_mean": 0.0003985766495588905,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009241116840712493,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3413.0,
      "completions/mean_length": 598.2890625,
      "completions/mean_terminated_length": 558.8115234375,
      "completions/min_length": 119.0,
      "completions/min_terminated_length": 119.0,
      "epoch": 11.513269174686497,
      "grad_norm": 0.1396484375,
      "learning_rate": 1e-06,
      "loss": -0.0039,
      "num_tokens": 717161190.0,
      "reward": 0.5290178656578064,
      "reward_std": 0.2320656180381775,
      "rewards/verify_math_reward/mean": 0.5290178656578064,
      "rewards/verify_math_reward/std": 0.49943602085113525,
      "step": 1232
    },
    {
      "clip_ratio/high_max": 0.0019015505331481108,
      "clip_ratio/high_mean": 0.0005253872518551361,
      "clip_ratio/low_mean": 0.00030738215093606414,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008327694104082184,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4068.0,
      "completions/mean_length": 596.786865234375,
      "completions/mean_terminated_length": 537.2088623046875,
      "completions/min_length": 7.0,
      "completions/min_terminated_length": 7.0,
      "epoch": 11.52260134149898,
      "grad_norm": 0.12255859375,
      "learning_rate": 1e-06,
      "loss": -0.0068,
      "num_tokens": 717716911.0,
      "reward": 0.582589328289032,
      "reward_std": 0.1997230499982834,
      "rewards/verify_math_reward/mean": 0.5825892686843872,
      "rewards/verify_math_reward/std": 0.4934072494506836,
      "step": 1233
    },
    {
      "clip_ratio/high_max": 0.001570004318637075,
      "clip_ratio/high_mean": 0.0004583853195754273,
      "clip_ratio/low_mean": 0.0003396300830900145,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007980154096003389,
      "completions/clipped_ratio": 0.0279017857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3448.0,
      "completions/mean_length": 658.271240234375,
      "completions/mean_terminated_length": 559.5993041992188,
      "completions/min_length": 138.0,
      "completions/min_terminated_length": 138.0,
      "epoch": 11.531933508311461,
      "grad_norm": 0.1259765625,
      "learning_rate": 1e-06,
      "loss": 0.0065,
      "num_tokens": 718295802.0,
      "reward": 0.5479910969734192,
      "reward_std": 0.22251734137535095,
      "rewards/verify_math_reward/mean": 0.5479910969734192,
      "rewards/verify_math_reward/std": 0.49796947836875916,
      "step": 1234
    },
    {
      "clip_ratio/high_max": 0.0015217493146337802,
      "clip_ratio/high_mean": 0.00048210438262685784,
      "clip_ratio/low_mean": 0.00033898701758516836,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008210913938455633,
      "completions/clipped_ratio": 0.021205357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3704.0,
      "completions/mean_length": 686.388427734375,
      "completions/mean_terminated_length": 612.5199584960938,
      "completions/min_length": 87.0,
      "completions/min_terminated_length": 87.0,
      "epoch": 11.541265675123944,
      "grad_norm": 0.1201171875,
      "learning_rate": 1e-06,
      "loss": -0.0011,
      "num_tokens": 718921110.0,
      "reward": 0.4955357313156128,
      "reward_std": 0.2175191193819046,
      "rewards/verify_math_reward/mean": 0.4955357015132904,
      "rewards/verify_math_reward/std": 0.500259280204773,
      "step": 1235
    },
    {
      "clip_ratio/high_max": 0.001625314975171932,
      "clip_ratio/high_mean": 0.0005147203451087989,
      "clip_ratio/low_mean": 0.0003135977467536577,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008283180973194249,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2903.0,
      "completions/mean_length": 622.138427734375,
      "completions/mean_terminated_length": 566.9977416992188,
      "completions/min_length": 87.0,
      "completions/min_terminated_length": 87.0,
      "epoch": 11.550597841936424,
      "grad_norm": 0.1162109375,
      "learning_rate": 1e-06,
      "loss": -0.0049,
      "num_tokens": 719505434.0,
      "reward": 0.5837053656578064,
      "reward_std": 0.19527865946292877,
      "rewards/verify_math_reward/mean": 0.5837053656578064,
      "rewards/verify_math_reward/std": 0.49321895837783813,
      "step": 1236
    },
    {
      "clip_ratio/high_max": 0.001830635885198717,
      "clip_ratio/high_mean": 0.0005781422328254848,
      "clip_ratio/low_mean": 0.0004298345254483138,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010079767635033932,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3734.0,
      "completions/mean_length": 593.9877319335938,
      "completions/mean_terminated_length": 546.4490966796875,
      "completions/min_length": 66.0,
      "completions/min_terminated_length": 66.0,
      "epoch": 11.559930008748907,
      "grad_norm": 0.1455078125,
      "learning_rate": 1e-06,
      "loss": 0.0252,
      "num_tokens": 720093543.0,
      "reward": 0.5569196939468384,
      "reward_std": 0.24156899750232697,
      "rewards/verify_math_reward/mean": 0.5569196343421936,
      "rewards/verify_math_reward/std": 0.49702703952789307,
      "step": 1237
    },
    {
      "clip_ratio/high_max": 0.001967149275515112,
      "clip_ratio/high_mean": 0.0005794478338430054,
      "clip_ratio/low_mean": 0.00040232166224996035,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009817694872253924,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2611.0,
      "completions/mean_length": 630.9955444335938,
      "completions/mean_terminated_length": 587.9276733398438,
      "completions/min_length": 163.0,
      "completions/min_terminated_length": 163.0,
      "epoch": 11.569262175561388,
      "grad_norm": 0.138671875,
      "learning_rate": 1e-06,
      "loss": -0.0034,
      "num_tokens": 720702059.0,
      "reward": 0.53125,
      "reward_std": 0.26404082775115967,
      "rewards/verify_math_reward/mean": 0.53125,
      "rewards/verify_math_reward/std": 0.4993011951446533,
      "step": 1238
    },
    {
      "clip_ratio/high_max": 0.0022166906601341907,
      "clip_ratio/high_mean": 0.0006844085405646183,
      "clip_ratio/low_mean": 0.0003884082661897992,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010728167962952284,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2134.0,
      "completions/mean_length": 561.8973388671875,
      "completions/mean_terminated_length": 526.038330078125,
      "completions/min_length": 131.0,
      "completions/min_terminated_length": 131.0,
      "epoch": 11.57859434237387,
      "grad_norm": 0.1318359375,
      "learning_rate": 1e-06,
      "loss": -0.0005,
      "num_tokens": 721256103.0,
      "reward": 0.621651828289032,
      "reward_std": 0.22079460322856903,
      "rewards/verify_math_reward/mean": 0.6216517686843872,
      "rewards/verify_math_reward/std": 0.4852459728717804,
      "step": 1239
    },
    {
      "clip_ratio/high_max": 0.0014821816630501417,
      "clip_ratio/high_mean": 0.0004229704757108266,
      "clip_ratio/low_mean": 0.00023618010527570732,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006591505796222918,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3536.0,
      "completions/mean_length": 606.015625,
      "completions/mean_terminated_length": 550.6190795898438,
      "completions/min_length": 22.0,
      "completions/min_terminated_length": 22.0,
      "epoch": 11.587926509186351,
      "grad_norm": 0.12353515625,
      "learning_rate": 1e-06,
      "loss": -0.0039,
      "num_tokens": 721836101.0,
      "reward": 0.5401785969734192,
      "reward_std": 0.17551524937152863,
      "rewards/verify_math_reward/mean": 0.5401785969734192,
      "rewards/verify_math_reward/std": 0.49866142868995667,
      "step": 1240
    },
    {
      "clip_ratio/high_max": 0.0016147985652423813,
      "clip_ratio/high_mean": 0.0004946439746618125,
      "clip_ratio/low_mean": 0.0003169257595345698,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008115697264656774,
      "completions/clipped_ratio": 0.0200892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3862.0,
      "completions/mean_length": 636.3214721679688,
      "completions/mean_terminated_length": 565.3941040039062,
      "completions/min_length": 90.0,
      "completions/min_terminated_length": 90.0,
      "epoch": 11.597258675998834,
      "grad_norm": 0.125,
      "learning_rate": 1e-06,
      "loss": 0.0074,
      "num_tokens": 722437685.0,
      "reward": 0.5100446939468384,
      "reward_std": 0.19430406391620636,
      "rewards/verify_math_reward/mean": 0.5100446343421936,
      "rewards/verify_math_reward/std": 0.5001782774925232,
      "step": 1241
    },
    {
      "clip_ratio/high_max": 0.001336024504780653,
      "clip_ratio/high_mean": 0.0003556971034868184,
      "clip_ratio/low_mean": 0.00037930445262190915,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007350015512201935,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3995.0,
      "completions/mean_length": 628.1473388671875,
      "completions/mean_terminated_length": 577.0917358398438,
      "completions/min_length": 138.0,
      "completions/min_terminated_length": 138.0,
      "epoch": 11.606590842811315,
      "grad_norm": 0.1240234375,
      "learning_rate": 1e-06,
      "loss": 0.0066,
      "num_tokens": 723049737.0,
      "reward": 0.4765625298023224,
      "reward_std": 0.19456368684768677,
      "rewards/verify_math_reward/mean": 0.4765625,
      "rewards/verify_math_reward/std": 0.49972933530807495,
      "step": 1242
    },
    {
      "clip_ratio/high_max": 0.0015667741399738588,
      "clip_ratio/high_mean": 0.0004944293539210776,
      "clip_ratio/low_mean": 0.0002827515141916592,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007771808786856127,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1985.0,
      "completions/mean_length": 623.9085083007812,
      "completions/mean_terminated_length": 580.7525634765625,
      "completions/min_length": 130.0,
      "completions/min_terminated_length": 130.0,
      "epoch": 11.615923009623797,
      "grad_norm": 0.11279296875,
      "learning_rate": 1e-06,
      "loss": 0.011,
      "num_tokens": 723652263.0,
      "reward": 0.559151828289032,
      "reward_std": 0.2024155557155609,
      "rewards/verify_math_reward/mean": 0.5591517686843872,
      "rewards/verify_math_reward/std": 0.496766060590744,
      "step": 1243
    },
    {
      "clip_ratio/high_max": 0.0016037574614529149,
      "clip_ratio/high_mean": 0.0004749589256789477,
      "clip_ratio/low_mean": 0.00030728922411071835,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007822481584298657,
      "completions/clipped_ratio": 0.006696428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3717.0,
      "completions/mean_length": 580.1808471679688,
      "completions/mean_terminated_length": 556.4786376953125,
      "completions/min_length": 101.0,
      "completions/min_terminated_length": 101.0,
      "epoch": 11.625255176436278,
      "grad_norm": 0.1171875,
      "learning_rate": 1e-06,
      "loss": -0.006,
      "num_tokens": 724235737.0,
      "reward": 0.6227678656578064,
      "reward_std": 0.18648359179496765,
      "rewards/verify_math_reward/mean": 0.6227678656578064,
      "rewards/verify_math_reward/std": 0.4849644899368286,
      "step": 1244
    },
    {
      "clip_ratio/high_max": 0.0015045660693431273,
      "clip_ratio/high_mean": 0.00046128759026942134,
      "clip_ratio/low_mean": 0.00039577127654411015,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008570588602196949,
      "completions/clipped_ratio": 0.0234375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2840.0,
      "completions/mean_length": 643.0848388671875,
      "completions/mean_terminated_length": 560.21484375,
      "completions/min_length": 104.0,
      "completions/min_terminated_length": 104.0,
      "epoch": 11.63458734324876,
      "grad_norm": 0.12060546875,
      "learning_rate": 1e-06,
      "loss": -0.001,
      "num_tokens": 724813597.0,
      "reward": 0.5558035969734192,
      "reward_std": 0.20989085733890533,
      "rewards/verify_math_reward/mean": 0.5558035969734192,
      "rewards/verify_math_reward/std": 0.49715372920036316,
      "step": 1245
    },
    {
      "clip_ratio/high_max": 0.0018296742314305448,
      "clip_ratio/high_mean": 0.0005302263004978158,
      "clip_ratio/low_mean": 0.000352895020341748,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008831213299345109,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3971.0,
      "completions/mean_length": 625.333740234375,
      "completions/mean_terminated_length": 562.2306518554688,
      "completions/min_length": 4.0,
      "completions/min_terminated_length": 4.0,
      "epoch": 11.643919510061242,
      "grad_norm": 0.134765625,
      "learning_rate": 1e-06,
      "loss": 0.0056,
      "num_tokens": 725396704.0,
      "reward": 0.535714328289032,
      "reward_std": 0.20174476504325867,
      "rewards/verify_math_reward/mean": 0.5357142686843872,
      "rewards/verify_math_reward/std": 0.4990014135837555,
      "step": 1246
    },
    {
      "clip_ratio/high_max": 0.0016752165302023059,
      "clip_ratio/high_mean": 0.0004810842406186566,
      "clip_ratio/low_mean": 0.00031510455107763846,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007961887913552346,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2581.0,
      "completions/mean_length": 587.296875,
      "completions/mean_terminated_length": 531.6032104492188,
      "completions/min_length": 137.0,
      "completions/min_terminated_length": 137.0,
      "epoch": 11.653251676873724,
      "grad_norm": 0.12890625,
      "learning_rate": 1e-06,
      "loss": -0.0103,
      "num_tokens": 725954762.0,
      "reward": 0.578125,
      "reward_std": 0.21605345606803894,
      "rewards/verify_math_reward/mean": 0.578125,
      "rewards/verify_math_reward/std": 0.4941346049308777,
      "step": 1247
    },
    {
      "clip_ratio/high_max": 0.0018545566108514322,
      "clip_ratio/high_mean": 0.0005846017716066854,
      "clip_ratio/low_mean": 0.000446058412308048,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010306601780030178,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3776.0,
      "completions/mean_length": 621.5703125,
      "completions/mean_terminated_length": 554.374267578125,
      "completions/min_length": 140.0,
      "completions/min_terminated_length": 140.0,
      "epoch": 11.662583843686207,
      "grad_norm": 0.1396484375,
      "learning_rate": 1e-06,
      "loss": -0.0031,
      "num_tokens": 726522169.0,
      "reward": 0.5770089626312256,
      "reward_std": 0.23488323390483856,
      "rewards/verify_math_reward/mean": 0.5770089030265808,
      "rewards/verify_math_reward/std": 0.4943099319934845,
      "step": 1248
    },
    {
      "clip_ratio/high_max": 0.001839515312894946,
      "clip_ratio/high_mean": 0.0005849610543009476,
      "clip_ratio/low_mean": 0.00047545505958623835,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001060416106156481,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3943.0,
      "completions/mean_length": 617.1373291015625,
      "completions/mean_terminated_length": 561.917236328125,
      "completions/min_length": 104.0,
      "completions/min_terminated_length": 104.0,
      "epoch": 11.671916010498688,
      "grad_norm": 0.1396484375,
      "learning_rate": 1e-06,
      "loss": 0.0074,
      "num_tokens": 727102364.0,
      "reward": 0.543526828289032,
      "reward_std": 0.24833638966083527,
      "rewards/verify_math_reward/mean": 0.5435267686843872,
      "rewards/verify_math_reward/std": 0.49838000535964966,
      "step": 1249
    },
    {
      "clip_ratio/high_max": 0.00165630233095726,
      "clip_ratio/high_mean": 0.0005054166886111489,
      "clip_ratio/low_mean": 0.0004257477132796339,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009311644053013879,
      "completions/clipped_ratio": 0.021205357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3868.0,
      "completions/mean_length": 632.325927734375,
      "completions/mean_terminated_length": 557.2861938476562,
      "completions/min_length": 145.0,
      "completions/min_terminated_length": 145.0,
      "epoch": 11.68124817731117,
      "grad_norm": 0.1279296875,
      "learning_rate": 1e-06,
      "loss": 0.0047,
      "num_tokens": 727682776.0,
      "reward": 0.5167410969734192,
      "reward_std": 0.22480645775794983,
      "rewards/verify_math_reward/mean": 0.5167410969734192,
      "rewards/verify_math_reward/std": 0.4999987483024597,
      "step": 1250
    },
    {
      "clip_ratio/high_max": 0.0013985980440338608,
      "clip_ratio/high_mean": 0.0003993640550561395,
      "clip_ratio/low_mean": 0.0003613221728073768,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007606862300235662,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3050.0,
      "completions/mean_length": 640.7824096679688,
      "completions/mean_terminated_length": 605.7237548828125,
      "completions/min_length": 129.0,
      "completions/min_terminated_length": 129.0,
      "epoch": 11.690580344123651,
      "grad_norm": 0.11865234375,
      "learning_rate": 1e-06,
      "loss": 0.0049,
      "num_tokens": 728312045.0,
      "reward": 0.5111607313156128,
      "reward_std": 0.20102868974208832,
      "rewards/verify_math_reward/mean": 0.5111607313156128,
      "rewards/verify_math_reward/std": 0.5001546144485474,
      "step": 1251
    },
    {
      "clip_ratio/high_max": 0.0018681076990105794,
      "clip_ratio/high_mean": 0.0005925826236534704,
      "clip_ratio/low_mean": 0.00031510629719377903,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009076889082280104,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2492.0,
      "completions/mean_length": 592.7734375,
      "completions/mean_terminated_length": 549.2305297851562,
      "completions/min_length": 79.0,
      "completions/min_terminated_length": 79.0,
      "epoch": 11.699912510936134,
      "grad_norm": 0.1240234375,
      "learning_rate": 1e-06,
      "loss": -0.0032,
      "num_tokens": 728894554.0,
      "reward": 0.5714285969734192,
      "reward_std": 0.21365560591220856,
      "rewards/verify_math_reward/mean": 0.5714285969734192,
      "rewards/verify_math_reward/std": 0.49514803290367126,
      "step": 1252
    },
    {
      "clip_ratio/high_max": 0.0016980385498754913,
      "clip_ratio/high_mean": 0.0005554409699470853,
      "clip_ratio/low_mean": 0.0003138662875699083,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008693072650203248,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3484.0,
      "completions/mean_length": 629.7288208007812,
      "completions/mean_terminated_length": 578.6964721679688,
      "completions/min_length": 77.0,
      "completions/min_terminated_length": 77.0,
      "epoch": 11.709244677748615,
      "grad_norm": 0.125,
      "learning_rate": 1e-06,
      "loss": 0.004,
      "num_tokens": 729492471.0,
      "reward": 0.5837053656578064,
      "reward_std": 0.2323242574930191,
      "rewards/verify_math_reward/mean": 0.5837053656578064,
      "rewards/verify_math_reward/std": 0.49321895837783813,
      "step": 1253
    },
    {
      "clip_ratio/high_max": 0.0015078579999681097,
      "clip_ratio/high_mean": 0.0004032950316741335,
      "clip_ratio/low_mean": 0.000326658735843921,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007299537505787157,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3044.0,
      "completions/mean_length": 623.310302734375,
      "completions/mean_terminated_length": 564.1838989257812,
      "completions/min_length": 133.0,
      "completions/min_terminated_length": 133.0,
      "epoch": 11.718576844561097,
      "grad_norm": 0.12890625,
      "learning_rate": 1e-06,
      "loss": 0.0002,
      "num_tokens": 730079109.0,
      "reward": 0.5223214626312256,
      "reward_std": 0.21421831846237183,
      "rewards/verify_math_reward/mean": 0.5223214030265808,
      "rewards/verify_math_reward/std": 0.49978047609329224,
      "step": 1254
    },
    {
      "clip_ratio/high_max": 0.0017146104437415488,
      "clip_ratio/high_mean": 0.0005236497797795892,
      "clip_ratio/low_mean": 0.0003831217946981269,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009067715918718022,
      "completions/clipped_ratio": 0.021205357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4043.0,
      "completions/mean_length": 661.4944458007812,
      "completions/mean_terminated_length": 587.086669921875,
      "completions/min_length": 57.0,
      "completions/min_terminated_length": 57.0,
      "epoch": 11.727909011373578,
      "grad_norm": 0.1298828125,
      "learning_rate": 1e-06,
      "loss": -0.0179,
      "num_tokens": 730689096.0,
      "reward": 0.551339328289032,
      "reward_std": 0.23619845509529114,
      "rewards/verify_math_reward/mean": 0.5513392686843872,
      "rewards/verify_math_reward/std": 0.4976350665092468,
      "step": 1255
    },
    {
      "clip_ratio/high_max": 0.0017623059266043128,
      "clip_ratio/high_mean": 0.0005050417953498254,
      "clip_ratio/low_mean": 0.00023900017436062626,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007440419644808571,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3739.0,
      "completions/mean_length": 624.341552734375,
      "completions/mean_terminated_length": 557.1990966796875,
      "completions/min_length": 126.0,
      "completions/min_terminated_length": 126.0,
      "epoch": 11.73724117818606,
      "grad_norm": 0.1201171875,
      "learning_rate": 1e-06,
      "loss": 0.0003,
      "num_tokens": 731270906.0,
      "reward": 0.582589328289032,
      "reward_std": 0.16728995740413666,
      "rewards/verify_math_reward/mean": 0.5825892686843872,
      "rewards/verify_math_reward/std": 0.4934072494506836,
      "step": 1256
    },
    {
      "clip_ratio/high_max": 0.0017931064594449708,
      "clip_ratio/high_mean": 0.0005195500677928067,
      "clip_ratio/low_mean": 0.0002956146066708243,
      "clip_ratio/low_min": 1.3975849469716195e-05,
      "clip_ratio/region_mean": 0.0008151646707119653,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2304.0,
      "completions/mean_length": 604.9799194335938,
      "completions/mean_terminated_length": 565.577880859375,
      "completions/min_length": 89.0,
      "completions/min_terminated_length": 89.0,
      "epoch": 11.746573344998541,
      "grad_norm": 0.1171875,
      "learning_rate": 1e-06,
      "loss": -0.0029,
      "num_tokens": 731862856.0,
      "reward": 0.546875,
      "reward_std": 0.19223180413246155,
      "rewards/verify_math_reward/mean": 0.546875,
      "rewards/verify_math_reward/std": 0.4980759024620056,
      "step": 1257
    },
    {
      "clip_ratio/high_max": 0.0016030209390009986,
      "clip_ratio/high_mean": 0.0004903168048713269,
      "clip_ratio/low_mean": 0.00039110172519940534,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008814185353003268,
      "completions/clipped_ratio": 0.0279017857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3955.0,
      "completions/mean_length": 649.2545166015625,
      "completions/mean_terminated_length": 550.32373046875,
      "completions/min_length": 5.0,
      "completions/min_terminated_length": 5.0,
      "epoch": 11.755905511811024,
      "grad_norm": 0.142578125,
      "learning_rate": 1e-06,
      "loss": 0.0079,
      "num_tokens": 732437124.0,
      "reward": 0.4765625298023224,
      "reward_std": 0.23281168937683105,
      "rewards/verify_math_reward/mean": 0.4765625,
      "rewards/verify_math_reward/std": 0.49972933530807495,
      "step": 1258
    },
    {
      "clip_ratio/high_max": 0.0018444312427163823,
      "clip_ratio/high_mean": 0.0006008650298099383,
      "clip_ratio/low_mean": 0.00036294881215326313,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009638138421905751,
      "completions/clipped_ratio": 0.0279017857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2702.0,
      "completions/mean_length": 654.8828125,
      "completions/mean_terminated_length": 556.1136474609375,
      "completions/min_length": 141.0,
      "completions/min_terminated_length": 141.0,
      "epoch": 11.765237678623505,
      "grad_norm": 0.1279296875,
      "learning_rate": 1e-06,
      "loss": -0.0028,
      "num_tokens": 733012931.0,
      "reward": 0.5245535969734192,
      "reward_std": 0.22263678908348083,
      "rewards/verify_math_reward/mean": 0.5245535969734192,
      "rewards/verify_math_reward/std": 0.4996756911277771,
      "step": 1259
    },
    {
      "clip_ratio/high_max": 0.0015400391521325218,
      "clip_ratio/high_mean": 0.00044522428856907936,
      "clip_ratio/low_mean": 0.00026891370066550735,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007141379853692342,
      "completions/clipped_ratio": 0.0033482142857143016,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2768.0,
      "completions/mean_length": 538.982177734375,
      "completions/mean_terminated_length": 527.032470703125,
      "completions/min_length": 145.0,
      "completions/min_terminated_length": 145.0,
      "epoch": 11.774569845435988,
      "grad_norm": 0.1240234375,
      "learning_rate": 1e-06,
      "loss": 0.0065,
      "num_tokens": 733567811.0,
      "reward": 0.609375,
      "reward_std": 0.18840177357196808,
      "rewards/verify_math_reward/mean": 0.609375,
      "rewards/verify_math_reward/std": 0.48816296458244324,
      "step": 1260
    },
    {
      "clip_ratio/high_max": 0.0014853214934191783,
      "clip_ratio/high_mean": 0.0004668172549600058,
      "clip_ratio/low_mean": 0.0003691930512559338,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008360102988262952,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3929.0,
      "completions/mean_length": 563.6986694335938,
      "completions/mean_terminated_length": 519.7943725585938,
      "completions/min_length": 84.0,
      "completions/min_terminated_length": 84.0,
      "epoch": 11.783902012248468,
      "grad_norm": 0.125,
      "learning_rate": 1e-06,
      "loss": 0.0162,
      "num_tokens": 734110885.0,
      "reward": 0.598214328289032,
      "reward_std": 0.19276243448257446,
      "rewards/verify_math_reward/mean": 0.5982142686843872,
      "rewards/verify_math_reward/std": 0.49053287506103516,
      "step": 1261
    },
    {
      "clip_ratio/high_max": 0.0015220143450278556,
      "clip_ratio/high_mean": 0.0004234905613884621,
      "clip_ratio/low_mean": 0.00032890618831515894,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007523967351517058,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3383.0,
      "completions/mean_length": 596.0892944335938,
      "completions/mean_terminated_length": 536.4994506835938,
      "completions/min_length": 122.0,
      "completions/min_terminated_length": 122.0,
      "epoch": 11.793234179060951,
      "grad_norm": 0.1376953125,
      "learning_rate": 1e-06,
      "loss": -0.0105,
      "num_tokens": 734669845.0,
      "reward": 0.6037946939468384,
      "reward_std": 0.1965906322002411,
      "rewards/verify_math_reward/mean": 0.6037946343421936,
      "rewards/verify_math_reward/std": 0.48938122391700745,
      "step": 1262
    },
    {
      "clip_ratio/high_max": 0.0017434260353184072,
      "clip_ratio/high_mean": 0.00047081311981855833,
      "clip_ratio/low_mean": 0.00035179688006792276,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008226099962485023,
      "completions/clipped_ratio": 0.0200892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3485.0,
      "completions/mean_length": 663.1283569335938,
      "completions/mean_terminated_length": 592.7506103515625,
      "completions/min_length": 7.0,
      "completions/min_terminated_length": 7.0,
      "epoch": 11.802566345873432,
      "grad_norm": 0.1181640625,
      "learning_rate": 1e-06,
      "loss": 0.0008,
      "num_tokens": 735276000.0,
      "reward": 0.5167410969734192,
      "reward_std": 0.20880597829818726,
      "rewards/verify_math_reward/mean": 0.5167410969734192,
      "rewards/verify_math_reward/std": 0.4999987483024597,
      "step": 1263
    },
    {
      "clip_ratio/high_max": 0.0015636055650247727,
      "clip_ratio/high_mean": 0.00051671839150913,
      "clip_ratio/low_mean": 0.0003458441292423231,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008625625368949841,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2872.0,
      "completions/mean_length": 615.0592041015625,
      "completions/mean_terminated_length": 563.8108520507812,
      "completions/min_length": 75.0,
      "completions/min_terminated_length": 75.0,
      "epoch": 11.811898512685914,
      "grad_norm": 0.1357421875,
      "learning_rate": 1e-06,
      "loss": 0.0175,
      "num_tokens": 735860925.0,
      "reward": 0.5892857313156128,
      "reward_std": 0.24029332399368286,
      "rewards/verify_math_reward/mean": 0.5892857313156128,
      "rewards/verify_math_reward/std": 0.49223825335502625,
      "step": 1264
    },
    {
      "clip_ratio/high_max": 0.0014206711366568925,
      "clip_ratio/high_mean": 0.0003940347921798093,
      "clip_ratio/low_mean": 0.00027018483649499103,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.00066421962401364,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3907.0,
      "completions/mean_length": 659.5256958007812,
      "completions/mean_terminated_length": 593.063720703125,
      "completions/min_length": 149.0,
      "completions/min_terminated_length": 149.0,
      "epoch": 11.821230679498395,
      "grad_norm": 0.12109375,
      "learning_rate": 1e-06,
      "loss": 0.002,
      "num_tokens": 736470892.0,
      "reward": 0.559151828289032,
      "reward_std": 0.19114412367343903,
      "rewards/verify_math_reward/mean": 0.5591517686843872,
      "rewards/verify_math_reward/std": 0.496766060590744,
      "step": 1265
    },
    {
      "clip_ratio/high_max": 0.0014103773528404417,
      "clip_ratio/high_mean": 0.00042180658181223407,
      "clip_ratio/low_mean": 0.0004090827937943686,
      "clip_ratio/low_min": 1.2098334991605952e-05,
      "clip_ratio/region_mean": 0.0008308893839057419,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3368.0,
      "completions/mean_length": 632.0926513671875,
      "completions/mean_terminated_length": 581.0950927734375,
      "completions/min_length": 135.0,
      "completions/min_terminated_length": 135.0,
      "epoch": 11.830562846310878,
      "grad_norm": 0.130859375,
      "learning_rate": 1e-06,
      "loss": 0.0104,
      "num_tokens": 737067039.0,
      "reward": 0.5,
      "reward_std": 0.22218288481235504,
      "rewards/verify_math_reward/mean": 0.5,
      "rewards/verify_math_reward/std": 0.5002792477607727,
      "step": 1266
    },
    {
      "clip_ratio/high_max": 0.0014555100688085076,
      "clip_ratio/high_mean": 0.0004522725402011929,
      "clip_ratio/low_mean": 0.0003380375752612963,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007903101081865316,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3823.0,
      "completions/mean_length": 610.6652221679688,
      "completions/mean_terminated_length": 559.3521728515625,
      "completions/min_length": 99.0,
      "completions/min_terminated_length": 99.0,
      "epoch": 11.83989501312336,
      "grad_norm": 0.1181640625,
      "learning_rate": 1e-06,
      "loss": -0.0003,
      "num_tokens": 737653339.0,
      "reward": 0.5758928656578064,
      "reward_std": 0.202602818608284,
      "rewards/verify_math_reward/mean": 0.5758928656578064,
      "rewards/verify_math_reward/std": 0.49448272585868835,
      "step": 1267
    },
    {
      "clip_ratio/high_max": 0.001670739199653326,
      "clip_ratio/high_mean": 0.0004310596360710406,
      "clip_ratio/low_mean": 0.00029482788818313566,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007258875193656422,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3527.0,
      "completions/mean_length": 591.5324096679688,
      "completions/mean_terminated_length": 531.8649291992188,
      "completions/min_length": 93.0,
      "completions/min_terminated_length": 93.0,
      "epoch": 11.849227179935841,
      "grad_norm": 0.1259765625,
      "learning_rate": 1e-06,
      "loss": -0.0006,
      "num_tokens": 738224960.0,
      "reward": 0.5446428656578064,
      "reward_std": 0.18261782824993134,
      "rewards/verify_math_reward/mean": 0.5446428656578064,
      "rewards/verify_math_reward/std": 0.49828118085861206,
      "step": 1268
    },
    {
      "clip_ratio/high_max": 0.0018886373818531865,
      "clip_ratio/high_mean": 0.0006127497381385183,
      "clip_ratio/low_mean": 0.0003710556979967805,
      "clip_ratio/low_min": 1.0192433364863973e-05,
      "clip_ratio/region_mean": 0.0009838054302235832,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3482.0,
      "completions/mean_length": 588.2098388671875,
      "completions/mean_terminated_length": 532.5306396484375,
      "completions/min_length": 120.0,
      "completions/min_terminated_length": 120.0,
      "epoch": 11.858559346748324,
      "grad_norm": 0.140625,
      "learning_rate": 1e-06,
      "loss": -0.0106,
      "num_tokens": 738782620.0,
      "reward": 0.5725446939468384,
      "reward_std": 0.23953697085380554,
      "rewards/verify_math_reward/mean": 0.5725446343421936,
      "rewards/verify_math_reward/std": 0.49498558044433594,
      "step": 1269
    },
    {
      "clip_ratio/high_max": 0.001498469925536483,
      "clip_ratio/high_mean": 0.00044745506534127344,
      "clip_ratio/low_mean": 0.0004049732694966224,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008524283375663799,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3885.0,
      "completions/mean_length": 602.6975708007812,
      "completions/mean_terminated_length": 539.1829223632812,
      "completions/min_length": 144.0,
      "completions/min_terminated_length": 144.0,
      "epoch": 11.867891513560805,
      "grad_norm": 0.140625,
      "learning_rate": 1e-06,
      "loss": 0.0055,
      "num_tokens": 739348637.0,
      "reward": 0.5770089626312256,
      "reward_std": 0.21538014709949493,
      "rewards/verify_math_reward/mean": 0.5770089030265808,
      "rewards/verify_math_reward/std": 0.4943099319934845,
      "step": 1270
    },
    {
      "clip_ratio/high_max": 0.0013045662499280297,
      "clip_ratio/high_mean": 0.0003715952520906285,
      "clip_ratio/low_mean": 0.0002883205274883949,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006599157804885181,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2819.0,
      "completions/mean_length": 599.9955444335938,
      "completions/mean_terminated_length": 536.4318237304688,
      "completions/min_length": 136.0,
      "completions/min_terminated_length": 136.0,
      "epoch": 11.877223680373287,
      "grad_norm": 0.1162109375,
      "learning_rate": 1e-06,
      "loss": 0.0115,
      "num_tokens": 739917097.0,
      "reward": 0.6037946939468384,
      "reward_std": 0.18438448011875153,
      "rewards/verify_math_reward/mean": 0.6037946343421936,
      "rewards/verify_math_reward/std": 0.48938122391700745,
      "step": 1271
    },
    {
      "clip_ratio/high_max": 0.001831248866437818,
      "clip_ratio/high_mean": 0.0005290896176575188,
      "clip_ratio/low_mean": 0.0003391159937109478,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008682056013640249,
      "completions/clipped_ratio": 0.006696428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3494.0,
      "completions/mean_length": 567.4453125,
      "completions/mean_terminated_length": 543.6572875976562,
      "completions/min_length": 126.0,
      "completions/min_terminated_length": 126.0,
      "epoch": 11.886555847185768,
      "grad_norm": 0.126953125,
      "learning_rate": 1e-06,
      "loss": 0.0005,
      "num_tokens": 740496048.0,
      "reward": 0.5647321939468384,
      "reward_std": 0.19197219610214233,
      "rewards/verify_math_reward/mean": 0.5647321343421936,
      "rewards/verify_math_reward/std": 0.49606892466545105,
      "step": 1272
    },
    {
      "clip_ratio/high_max": 0.0017426248832634883,
      "clip_ratio/high_mean": 0.0005477522827277426,
      "clip_ratio/low_mean": 0.0002948331142533789,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008425853984590503,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2761.0,
      "completions/mean_length": 639.9486694335938,
      "completions/mean_terminated_length": 573.1080322265625,
      "completions/min_length": 115.0,
      "completions/min_terminated_length": 115.0,
      "epoch": 11.89588801399825,
      "grad_norm": 0.12158203125,
      "learning_rate": 1e-06,
      "loss": -0.0177,
      "num_tokens": 741088490.0,
      "reward": 0.566964328289032,
      "reward_std": 0.19956304132938385,
      "rewards/verify_math_reward/mean": 0.5669642686843872,
      "rewards/verify_math_reward/std": 0.49577224254608154,
      "step": 1273
    },
    {
      "clip_ratio/high_max": 0.0013651914941874566,
      "clip_ratio/high_mean": 0.0004101007530152856,
      "clip_ratio/low_mean": 0.0003461956030150759,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007562963492091512,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3307.0,
      "completions/mean_length": 638.1551513671875,
      "completions/mean_terminated_length": 603.0698852539062,
      "completions/min_length": 144.0,
      "completions/min_terminated_length": 144.0,
      "epoch": 11.905220180810732,
      "grad_norm": 0.12353515625,
      "learning_rate": 1e-06,
      "loss": 0.0095,
      "num_tokens": 741712741.0,
      "reward": 0.5178571939468384,
      "reward_std": 0.19681887328624725,
      "rewards/verify_math_reward/mean": 0.5178571343421936,
      "rewards/verify_math_reward/std": 0.4999600946903229,
      "step": 1274
    },
    {
      "clip_ratio/high_max": 0.001752853102516383,
      "clip_ratio/high_mean": 0.0004983964163329802,
      "clip_ratio/low_mean": 0.00033527855714510224,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008336749533555121,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2770.0,
      "completions/mean_length": 657.325927734375,
      "completions/mean_terminated_length": 606.6998901367188,
      "completions/min_length": 111.0,
      "completions/min_terminated_length": 111.0,
      "epoch": 11.914552347623214,
      "grad_norm": 0.119140625,
      "learning_rate": 1e-06,
      "loss": 0.0131,
      "num_tokens": 742339633.0,
      "reward": 0.5267857313156128,
      "reward_std": 0.2151198536157608,
      "rewards/verify_math_reward/mean": 0.5267857313156128,
      "rewards/verify_math_reward/std": 0.4995608627796173,
      "step": 1275
    },
    {
      "clip_ratio/high_max": 0.0017284796349485987,
      "clip_ratio/high_mean": 0.0005174747725504858,
      "clip_ratio/low_mean": 0.0004058257002270693,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009233004866473493,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3381.0,
      "completions/mean_length": 616.536865234375,
      "completions/mean_terminated_length": 577.2652587890625,
      "completions/min_length": 14.0,
      "completions/min_terminated_length": 14.0,
      "epoch": 11.923884514435695,
      "grad_norm": 0.134765625,
      "learning_rate": 1e-06,
      "loss": 0.0022,
      "num_tokens": 742942770.0,
      "reward": 0.4977678656578064,
      "reward_std": 0.2377803474664688,
      "rewards/verify_math_reward/mean": 0.4977678656578064,
      "rewards/verify_math_reward/std": 0.5002743005752563,
      "step": 1276
    },
    {
      "clip_ratio/high_max": 0.001540876210128772,
      "clip_ratio/high_mean": 0.00041743938800209435,
      "clip_ratio/low_mean": 0.0003925449013877369,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008099842816591263,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2496.0,
      "completions/mean_length": 594.8828125,
      "completions/mean_terminated_length": 563.3412475585938,
      "completions/min_length": 126.0,
      "completions/min_terminated_length": 126.0,
      "epoch": 11.933216681248178,
      "grad_norm": 0.1259765625,
      "learning_rate": 1e-06,
      "loss": 0.01,
      "num_tokens": 743533489.0,
      "reward": 0.5792410969734192,
      "reward_std": 0.21436099708080292,
      "rewards/verify_math_reward/mean": 0.5792410969734192,
      "rewards/verify_math_reward/std": 0.49395665526390076,
      "step": 1277
    },
    {
      "clip_ratio/high_max": 0.001457372645745636,
      "clip_ratio/high_mean": 0.00048528808770242904,
      "clip_ratio/low_mean": 0.0004092531180504011,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008945412091634353,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3916.0,
      "completions/mean_length": 647.7600708007812,
      "completions/mean_terminated_length": 581.0704956054688,
      "completions/min_length": 140.0,
      "completions/min_terminated_length": 140.0,
      "epoch": 11.942548848060659,
      "grad_norm": 0.134765625,
      "learning_rate": 1e-06,
      "loss": -0.0072,
      "num_tokens": 744128266.0,
      "reward": 0.5770089626312256,
      "reward_std": 0.2130589634180069,
      "rewards/verify_math_reward/mean": 0.5770089030265808,
      "rewards/verify_math_reward/std": 0.4943099319934845,
      "step": 1278
    },
    {
      "clip_ratio/high_max": 0.0012613602284545777,
      "clip_ratio/high_mean": 0.00037037625725133694,
      "clip_ratio/low_mean": 0.000281501633935477,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006518779018733767,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2290.0,
      "completions/mean_length": 628.4553833007812,
      "completions/mean_terminated_length": 573.4149780273438,
      "completions/min_length": 119.0,
      "completions/min_terminated_length": 119.0,
      "epoch": 11.951881014873141,
      "grad_norm": 0.11865234375,
      "learning_rate": 1e-06,
      "loss": 0.0002,
      "num_tokens": 744715634.0,
      "reward": 0.5602678656578064,
      "reward_std": 0.18283648788928986,
      "rewards/verify_math_reward/mean": 0.5602678656578064,
      "rewards/verify_math_reward/std": 0.4966317415237427,
      "step": 1279
    },
    {
      "clip_ratio/high_max": 0.0018274919157192926,
      "clip_ratio/high_mean": 0.0005502146226490368,
      "clip_ratio/low_mean": 0.00035680173800756165,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009070163514479646,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3529.0,
      "completions/mean_length": 551.4096069335938,
      "completions/mean_terminated_length": 515.4441528320312,
      "completions/min_length": 16.0,
      "completions/min_terminated_length": 16.0,
      "epoch": 11.961213181685622,
      "grad_norm": 0.14453125,
      "learning_rate": 1e-06,
      "loss": 0.0034,
      "num_tokens": 745266617.0,
      "reward": 0.5837053656578064,
      "reward_std": 0.24633900821208954,
      "rewards/verify_math_reward/mean": 0.5837053656578064,
      "rewards/verify_math_reward/std": 0.49321895837783813,
      "step": 1280
    },
    {
      "clip_ratio/high_max": 0.0017539462169224862,
      "clip_ratio/high_mean": 0.0006478564841927437,
      "clip_ratio/low_mean": 0.00044828502723248675,
      "clip_ratio/low_min": 1.151861397374887e-05,
      "clip_ratio/region_mean": 0.0010961415009660413,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3247.0,
      "completions/mean_length": 575.724365234375,
      "completions/mean_terminated_length": 544.0101318359375,
      "completions/min_length": 87.0,
      "completions/min_terminated_length": 87.0,
      "epoch": 11.970545348498105,
      "grad_norm": 0.14453125,
      "learning_rate": 1e-06,
      "loss": 0.0009,
      "num_tokens": 745841866.0,
      "reward": 0.5837053656578064,
      "reward_std": 0.25254228711128235,
      "rewards/verify_math_reward/mean": 0.5837053656578064,
      "rewards/verify_math_reward/std": 0.49321892857551575,
      "step": 1281
    },
    {
      "clip_ratio/high_max": 0.0018160486351916916,
      "clip_ratio/high_mean": 0.000605407185275908,
      "clip_ratio/low_mean": 0.0003585709459912323,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000963978141953703,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2837.0,
      "completions/mean_length": 601.1495971679688,
      "completions/mean_terminated_length": 569.6644287109375,
      "completions/min_length": 85.0,
      "completions/min_terminated_length": 85.0,
      "epoch": 11.979877515310585,
      "grad_norm": 0.130859375,
      "learning_rate": 1e-06,
      "loss": 0.0011,
      "num_tokens": 746441256.0,
      "reward": 0.559151828289032,
      "reward_std": 0.23684489727020264,
      "rewards/verify_math_reward/mean": 0.5591517686843872,
      "rewards/verify_math_reward/std": 0.496766060590744,
      "step": 1282
    },
    {
      "clip_ratio/high_max": 0.001632143199458369,
      "clip_ratio/high_mean": 0.0004890562947821309,
      "clip_ratio/low_mean": 0.0003405987251881015,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008296550331579056,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2619.0,
      "completions/mean_length": 654.8817138671875,
      "completions/mean_terminated_length": 576.3173217773438,
      "completions/min_length": 125.0,
      "completions/min_terminated_length": 125.0,
      "epoch": 11.989209682123068,
      "grad_norm": 0.1181640625,
      "learning_rate": 1e-06,
      "loss": 0.0027,
      "num_tokens": 747039302.0,
      "reward": 0.546875,
      "reward_std": 0.2061375081539154,
      "rewards/verify_math_reward/mean": 0.546875,
      "rewards/verify_math_reward/std": 0.4980759024620056,
      "step": 1283
    },
    {
      "clip_ratio/high_max": 0.0014544960076818825,
      "clip_ratio/high_mean": 0.0004236216881281507,
      "clip_ratio/low_mean": 0.00033478742693660024,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000758409119953285,
      "completions/clipped_ratio": 0.014204545454545414,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2703.0,
      "completions/mean_length": 622.5767211914062,
      "completions/mean_terminated_length": 572.52734375,
      "completions/min_length": 139.0,
      "completions/min_terminated_length": 139.0,
      "epoch": 11.998541848935549,
      "grad_norm": 0.11767578125,
      "learning_rate": 1e-06,
      "loss": 0.0118,
      "num_tokens": 747607454.0,
      "reward": 0.5915178656578064,
      "reward_std": 0.1876882165670395,
      "rewards/verify_math_reward/mean": 0.5915178656578064,
      "rewards/verify_math_reward/std": 0.49182769656181335,
      "step": 1284
    },
    {
      "clip_ratio/high_max": 0.002111249454173958,
      "clip_ratio/high_mean": 0.0005780745918855246,
      "clip_ratio/low_mean": 0.0002785197034427256,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000856594292599766,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3269.0,
      "completions/mean_length": 585.9765625,
      "completions/mean_terminated_length": 522.157958984375,
      "completions/min_length": 44.0,
      "completions/min_terminated_length": 44.0,
      "epoch": 12.009332166812483,
      "grad_norm": 0.1318359375,
      "learning_rate": 1e-06,
      "loss": -0.0036,
      "num_tokens": 748154449.0,
      "reward": 0.5379464626312256,
      "reward_std": 0.19122152030467987,
      "rewards/verify_math_reward/mean": 0.5379464030265808,
      "rewards/verify_math_reward/std": 0.4988364577293396,
      "step": 1285
    },
    {
      "clip_ratio/high_max": 0.0018276780647283886,
      "clip_ratio/high_mean": 0.0005416600556600315,
      "clip_ratio/low_mean": 0.0004022405148589314,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009439005571039161,
      "completions/clipped_ratio": 0.006696428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2381.0,
      "completions/mean_length": 549.6217041015625,
      "completions/mean_terminated_length": 525.7135009765625,
      "completions/min_length": 155.0,
      "completions/min_terminated_length": 155.0,
      "epoch": 12.018664333624963,
      "grad_norm": 0.12890625,
      "learning_rate": 1e-06,
      "loss": -0.0023,
      "num_tokens": 748710414.0,
      "reward": 0.559151828289032,
      "reward_std": 0.2138826996088028,
      "rewards/verify_math_reward/mean": 0.5591517686843872,
      "rewards/verify_math_reward/std": 0.496766060590744,
      "step": 1286
    },
    {
      "clip_ratio/high_max": 0.0016587318723395583,
      "clip_ratio/high_mean": 0.0004891278510967823,
      "clip_ratio/low_mean": 0.00039340282819466665,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008825306913422537,
      "completions/clipped_ratio": 0.006696428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2889.0,
      "completions/mean_length": 567.2589721679688,
      "completions/mean_terminated_length": 543.4696655273438,
      "completions/min_length": 113.0,
      "completions/min_terminated_length": 113.0,
      "epoch": 12.027996500437446,
      "grad_norm": 0.12060546875,
      "learning_rate": 1e-06,
      "loss": 0.0112,
      "num_tokens": 749285470.0,
      "reward": 0.582589328289032,
      "reward_std": 0.208912655711174,
      "rewards/verify_math_reward/mean": 0.5825892686843872,
      "rewards/verify_math_reward/std": 0.4934072494506836,
      "step": 1287
    },
    {
      "clip_ratio/high_max": 0.0017563361643624376,
      "clip_ratio/high_mean": 0.000560080104833105,
      "clip_ratio/low_mean": 0.00037574479779323156,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009358249071738101,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3690.0,
      "completions/mean_length": 595.8671875,
      "completions/mean_terminated_length": 556.3623046875,
      "completions/min_length": 115.0,
      "completions/min_terminated_length": 115.0,
      "epoch": 12.037328667249927,
      "grad_norm": 0.12890625,
      "learning_rate": 1e-06,
      "loss": -0.008,
      "num_tokens": 749866727.0,
      "reward": 0.6049107313156128,
      "reward_std": 0.2463369071483612,
      "rewards/verify_math_reward/mean": 0.6049107313156128,
      "rewards/verify_math_reward/std": 0.48914292454719543,
      "step": 1288
    },
    {
      "clip_ratio/high_max": 0.0015858357965043979,
      "clip_ratio/high_mean": 0.0004953791228672344,
      "clip_ratio/low_mean": 0.0003331395057557529,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008285186449938919,
      "completions/clipped_ratio": 0.0267857142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2684.0,
      "completions/mean_length": 642.193115234375,
      "completions/mean_terminated_length": 547.1341552734375,
      "completions/min_length": 129.0,
      "completions/min_terminated_length": 129.0,
      "epoch": 12.04666083406241,
      "grad_norm": 0.1279296875,
      "learning_rate": 1e-06,
      "loss": -0.0013,
      "num_tokens": 750421764.0,
      "reward": 0.6194196939468384,
      "reward_std": 0.22713902592658997,
      "rewards/verify_math_reward/mean": 0.6194196343421936,
      "rewards/verify_math_reward/std": 0.48580074310302734,
      "step": 1289
    },
    {
      "clip_ratio/high_max": 0.0016000282575987512,
      "clip_ratio/high_mean": 0.0004811051303477143,
      "clip_ratio/low_mean": 0.00029532301141443895,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007764281344861956,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3579.0,
      "completions/mean_length": 601.4788208007812,
      "completions/mean_terminated_length": 558.0440673828125,
      "completions/min_length": 123.0,
      "completions/min_terminated_length": 123.0,
      "epoch": 12.05599300087489,
      "grad_norm": 0.11865234375,
      "learning_rate": 1e-06,
      "loss": 0.0007,
      "num_tokens": 751002225.0,
      "reward": 0.5625,
      "reward_std": 0.21350222826004028,
      "rewards/verify_math_reward/mean": 0.5625,
      "rewards/verify_math_reward/std": 0.49635544419288635,
      "step": 1290
    },
    {
      "clip_ratio/high_max": 0.0014871957937430125,
      "clip_ratio/high_mean": 0.00042845377947742236,
      "clip_ratio/low_mean": 0.0002470025140155485,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006754562973583234,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3640.0,
      "completions/mean_length": 605.4420166015625,
      "completions/mean_terminated_length": 550.0363159179688,
      "completions/min_length": 67.0,
      "completions/min_terminated_length": 67.0,
      "epoch": 12.065325167687373,
      "grad_norm": 0.1103515625,
      "learning_rate": 1e-06,
      "loss": -0.0068,
      "num_tokens": 751577965.0,
      "reward": 0.5412946939468384,
      "reward_std": 0.18039585649967194,
      "rewards/verify_math_reward/mean": 0.5412946343421936,
      "rewards/verify_math_reward/std": 0.49857014417648315,
      "step": 1291
    },
    {
      "clip_ratio/high_max": 0.0017766085093171569,
      "clip_ratio/high_mean": 0.0006102052429923788,
      "clip_ratio/low_mean": 0.00036779468780423485,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009779999272723217,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3753.0,
      "completions/mean_length": 617.357177734375,
      "completions/mean_terminated_length": 570.1357421875,
      "completions/min_length": 116.0,
      "completions/min_terminated_length": 116.0,
      "epoch": 12.074657334499854,
      "grad_norm": 0.12890625,
      "learning_rate": 1e-06,
      "loss": 0.0096,
      "num_tokens": 752176501.0,
      "reward": 0.5725446939468384,
      "reward_std": 0.23149938881397247,
      "rewards/verify_math_reward/mean": 0.5725446343421936,
      "rewards/verify_math_reward/std": 0.49498558044433594,
      "step": 1292
    },
    {
      "clip_ratio/high_max": 0.0015194485486063058,
      "clip_ratio/high_mean": 0.0004642161998162919,
      "clip_ratio/low_mean": 0.0003009372112501296,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007651534215256106,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2347.0,
      "completions/mean_length": 644.2957763671875,
      "completions/mean_terminated_length": 601.3932495117188,
      "completions/min_length": 114.0,
      "completions/min_terminated_length": 114.0,
      "epoch": 12.083989501312336,
      "grad_norm": 0.119140625,
      "learning_rate": 1e-06,
      "loss": 0.0115,
      "num_tokens": 752798990.0,
      "reward": 0.5446428656578064,
      "reward_std": 0.19223181903362274,
      "rewards/verify_math_reward/mean": 0.5446428656578064,
      "rewards/verify_math_reward/std": 0.49828118085861206,
      "step": 1293
    },
    {
      "clip_ratio/high_max": 0.0015338030989369145,
      "clip_ratio/high_mean": 0.0004702502242253104,
      "clip_ratio/low_mean": 0.00039518083610801114,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008654310668134713,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3720.0,
      "completions/mean_length": 608.8203125,
      "completions/mean_terminated_length": 561.4830322265625,
      "completions/min_length": 96.0,
      "completions/min_terminated_length": 96.0,
      "epoch": 12.093321668124817,
      "grad_norm": 0.1328125,
      "learning_rate": 1e-06,
      "loss": 0.006,
      "num_tokens": 753389789.0,
      "reward": 0.5,
      "reward_std": 0.22748348116874695,
      "rewards/verify_math_reward/mean": 0.5,
      "rewards/verify_math_reward/std": 0.5002792477607727,
      "step": 1294
    },
    {
      "clip_ratio/high_max": 0.0016932627568166936,
      "clip_ratio/high_mean": 0.0005789731208096782,
      "clip_ratio/low_mean": 0.0003626756431458489,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009416487591806799,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3856.0,
      "completions/mean_length": 647.4475708007812,
      "completions/mean_terminated_length": 608.5248413085938,
      "completions/min_length": 117.0,
      "completions/min_terminated_length": 117.0,
      "epoch": 12.1026538349373,
      "grad_norm": 0.150390625,
      "learning_rate": 1e-06,
      "loss": 0.0029,
      "num_tokens": 754021646.0,
      "reward": 0.5546875,
      "reward_std": 0.26287147402763367,
      "rewards/verify_math_reward/mean": 0.5546875,
      "rewards/verify_math_reward/std": 0.4972778558731079,
      "step": 1295
    },
    {
      "clip_ratio/high_max": 0.002040789042439428,
      "clip_ratio/high_mean": 0.0006191360489538056,
      "clip_ratio/low_mean": 0.0003115602083880731,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009306962592745549,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3324.0,
      "completions/mean_length": 596.380615234375,
      "completions/mean_terminated_length": 544.8572998046875,
      "completions/min_length": 78.0,
      "completions/min_terminated_length": 78.0,
      "epoch": 12.11198600174978,
      "grad_norm": 0.13671875,
      "learning_rate": 1e-06,
      "loss": 0.0018,
      "num_tokens": 754585835.0,
      "reward": 0.5424107313156128,
      "reward_std": 0.2160520702600479,
      "rewards/verify_math_reward/mean": 0.5424107313156128,
      "rewards/verify_math_reward/std": 0.4984763264656067,
      "step": 1296
    },
    {
      "clip_ratio/high_max": 0.001216744318298879,
      "clip_ratio/high_mean": 0.00034977001041625044,
      "clip_ratio/low_mean": 0.0003800091785706172,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007297791808014154,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3509.0,
      "completions/mean_length": 598.8895263671875,
      "completions/mean_terminated_length": 559.4187622070312,
      "completions/min_length": 115.0,
      "completions/min_terminated_length": 115.0,
      "epoch": 12.121318168562263,
      "grad_norm": 0.1279296875,
      "learning_rate": 1e-06,
      "loss": 0.003,
      "num_tokens": 755167584.0,
      "reward": 0.5379464626312256,
      "reward_std": 0.22683270275592804,
      "rewards/verify_math_reward/mean": 0.5379464030265808,
      "rewards/verify_math_reward/std": 0.4988364577293396,
      "step": 1297
    },
    {
      "clip_ratio/high_max": 0.001482685913288151,
      "clip_ratio/high_mean": 0.0004471595132145012,
      "clip_ratio/low_mean": 0.0003107851521235716,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007579446782983723,
      "completions/clipped_ratio": 0.0200892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2543.0,
      "completions/mean_length": 651.341552734375,
      "completions/mean_terminated_length": 580.7221069335938,
      "completions/min_length": 135.0,
      "completions/min_terminated_length": 135.0,
      "epoch": 12.130650335374744,
      "grad_norm": 0.12451171875,
      "learning_rate": 1e-06,
      "loss": 0.0093,
      "num_tokens": 755770994.0,
      "reward": 0.5212053656578064,
      "reward_std": 0.19340254366397858,
      "rewards/verify_math_reward/mean": 0.5212053656578064,
      "rewards/verify_math_reward/std": 0.49982914328575134,
      "step": 1298
    },
    {
      "clip_ratio/high_max": 0.0015878854601396597,
      "clip_ratio/high_mean": 0.00045524305164690304,
      "clip_ratio/low_mean": 0.0003832767840776796,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008385198325413512,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3743.0,
      "completions/mean_length": 604.0859375,
      "completions/mean_terminated_length": 544.6322631835938,
      "completions/min_length": 145.0,
      "completions/min_terminated_length": 145.0,
      "epoch": 12.139982502187227,
      "grad_norm": 0.1357421875,
      "learning_rate": 1e-06,
      "loss": 0.0019,
      "num_tokens": 756336135.0,
      "reward": 0.578125,
      "reward_std": 0.2112463265657425,
      "rewards/verify_math_reward/mean": 0.578125,
      "rewards/verify_math_reward/std": 0.4941346049308777,
      "step": 1299
    },
    {
      "clip_ratio/high_max": 0.0016629375404590974,
      "clip_ratio/high_mean": 0.0004791853848473693,
      "clip_ratio/low_mean": 0.0003064827004664039,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007856680845179653,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3746.0,
      "completions/mean_length": 649.060302734375,
      "completions/mean_terminated_length": 606.2169799804688,
      "completions/min_length": 128.0,
      "completions/min_terminated_length": 128.0,
      "epoch": 12.149314668999708,
      "grad_norm": 0.119140625,
      "learning_rate": 1e-06,
      "loss": -0.0057,
      "num_tokens": 756972629.0,
      "reward": 0.5078125,
      "reward_std": 0.1943693608045578,
      "rewards/verify_math_reward/mean": 0.5078125,
      "rewards/verify_math_reward/std": 0.5002182126045227,
      "step": 1300
    },
    {
      "clip_ratio/high_max": 0.0019123325018881587,
      "clip_ratio/high_mean": 0.0005961817856814378,
      "clip_ratio/low_mean": 0.00030001878246821434,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008962005608736945,
      "completions/clipped_ratio": 0.0078125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3020.0,
      "completions/mean_length": 540.4152221679688,
      "completions/mean_terminated_length": 512.41845703125,
      "completions/min_length": 40.0,
      "completions/min_terminated_length": 40.0,
      "epoch": 12.15864683581219,
      "grad_norm": 0.1318359375,
      "learning_rate": 1e-06,
      "loss": -0.0005,
      "num_tokens": 757512697.0,
      "reward": 0.6283482313156128,
      "reward_std": 0.20339517295360565,
      "rewards/verify_math_reward/mean": 0.6283482313156128,
      "rewards/verify_math_reward/std": 0.4835159480571747,
      "step": 1301
    },
    {
      "clip_ratio/high_max": 0.0014530583339364966,
      "clip_ratio/high_mean": 0.0004249860510299186,
      "clip_ratio/low_mean": 0.00036164326274956693,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007866293171900907,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4011.0,
      "completions/mean_length": 637.5736694335938,
      "completions/mean_terminated_length": 574.6931762695312,
      "completions/min_length": 51.0,
      "completions/min_terminated_length": 51.0,
      "epoch": 12.167979002624673,
      "grad_norm": 0.12060546875,
      "learning_rate": 1e-06,
      "loss": -0.009,
      "num_tokens": 758108867.0,
      "reward": 0.5424107313156128,
      "reward_std": 0.2099343240261078,
      "rewards/verify_math_reward/mean": 0.5424107313156128,
      "rewards/verify_math_reward/std": 0.4984763264656067,
      "step": 1302
    },
    {
      "clip_ratio/high_max": 0.00189531694559264,
      "clip_ratio/high_mean": 0.0006254707104744739,
      "clip_ratio/low_mean": 0.0002795308180338907,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009050015250977594,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3845.0,
      "completions/mean_length": 591.786865234375,
      "completions/mean_terminated_length": 544.2183227539062,
      "completions/min_length": 104.0,
      "completions/min_terminated_length": 104.0,
      "epoch": 12.177311169437154,
      "grad_norm": 0.12890625,
      "learning_rate": 1e-06,
      "loss": -0.0054,
      "num_tokens": 758672668.0,
      "reward": 0.6049107313156128,
      "reward_std": 0.21684511005878448,
      "rewards/verify_math_reward/mean": 0.6049107313156128,
      "rewards/verify_math_reward/std": 0.48914292454719543,
      "step": 1303
    },
    {
      "clip_ratio/high_max": 0.0017920883110491559,
      "clip_ratio/high_mean": 0.0005238584753897157,
      "clip_ratio/low_mean": 0.0002807907675332899,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008046492453104293,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3315.0,
      "completions/mean_length": 596.279052734375,
      "completions/mean_terminated_length": 528.5938110351562,
      "completions/min_length": 115.0,
      "completions/min_terminated_length": 115.0,
      "epoch": 12.186643336249636,
      "grad_norm": 0.1279296875,
      "learning_rate": 1e-06,
      "loss": 0.0074,
      "num_tokens": 759216982.0,
      "reward": 0.6116071939468384,
      "reward_std": 0.1867866963148117,
      "rewards/verify_math_reward/mean": 0.6116071343421936,
      "rewards/verify_math_reward/std": 0.48765692114830017,
      "step": 1304
    },
    {
      "clip_ratio/high_max": 0.001743438082485227,
      "clip_ratio/high_mean": 0.00048671018521417864,
      "clip_ratio/low_mean": 0.0002289760147959896,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007156861984185525,
      "completions/clipped_ratio": 0.005580357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2576.0,
      "completions/mean_length": 597.5379638671875,
      "completions/mean_terminated_length": 577.90576171875,
      "completions/min_length": 47.0,
      "completions/min_terminated_length": 47.0,
      "epoch": 12.195975503062117,
      "grad_norm": 0.11572265625,
      "learning_rate": 1e-06,
      "loss": 0.0017,
      "num_tokens": 759822592.0,
      "reward": 0.5792410969734192,
      "reward_std": 0.17652486264705658,
      "rewards/verify_math_reward/mean": 0.5792410969734192,
      "rewards/verify_math_reward/std": 0.49395665526390076,
      "step": 1305
    },
    {
      "clip_ratio/high_max": 0.0018792225855577271,
      "clip_ratio/high_mean": 0.0005498812547557463,
      "clip_ratio/low_mean": 0.0003889644242462964,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009388456892338581,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3961.0,
      "completions/mean_length": 607.3058471679688,
      "completions/mean_terminated_length": 551.9296875,
      "completions/min_length": 78.0,
      "completions/min_terminated_length": 78.0,
      "epoch": 12.2053076698746,
      "grad_norm": 0.1298828125,
      "learning_rate": 1e-06,
      "loss": -0.0052,
      "num_tokens": 760395762.0,
      "reward": 0.5290178656578064,
      "reward_std": 0.22710372507572174,
      "rewards/verify_math_reward/mean": 0.5290178656578064,
      "rewards/verify_math_reward/std": 0.49943602085113525,
      "step": 1306
    },
    {
      "clip_ratio/high_max": 0.001504525847849436,
      "clip_ratio/high_mean": 0.0004240454372848035,
      "clip_ratio/low_mean": 0.0002640022747755211,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006880477149024955,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3016.0,
      "completions/mean_length": 621.7600708007812,
      "completions/mean_terminated_length": 574.5984497070312,
      "completions/min_length": 137.0,
      "completions/min_terminated_length": 137.0,
      "epoch": 12.21463983668708,
      "grad_norm": 0.1201171875,
      "learning_rate": 1e-06,
      "loss": 0.0016,
      "num_tokens": 760992355.0,
      "reward": 0.566964328289032,
      "reward_std": 0.2045213133096695,
      "rewards/verify_math_reward/mean": 0.5669642686843872,
      "rewards/verify_math_reward/std": 0.49577224254608154,
      "step": 1307
    },
    {
      "clip_ratio/high_max": 0.0015156005474636913,
      "clip_ratio/high_mean": 0.00044573200568720495,
      "clip_ratio/low_mean": 0.00032972581243484456,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007754578136882628,
      "completions/clipped_ratio": 0.024553571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3881.0,
      "completions/mean_length": 675.0022583007812,
      "completions/mean_terminated_length": 588.89013671875,
      "completions/min_length": 164.0,
      "completions/min_terminated_length": 164.0,
      "epoch": 12.223972003499563,
      "grad_norm": 0.12353515625,
      "learning_rate": 1e-06,
      "loss": 0.0071,
      "num_tokens": 761592933.0,
      "reward": 0.543526828289032,
      "reward_std": 0.19227346777915955,
      "rewards/verify_math_reward/mean": 0.5435267686843872,
      "rewards/verify_math_reward/std": 0.49838000535964966,
      "step": 1308
    },
    {
      "clip_ratio/high_max": 0.0014195018447935581,
      "clip_ratio/high_mean": 0.0003841636016659322,
      "clip_ratio/low_mean": 0.00029450061424540763,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006786642152292188,
      "completions/clipped_ratio": 0.030133928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3192.0,
      "completions/mean_length": 670.6998291015625,
      "completions/mean_terminated_length": 564.2750244140625,
      "completions/min_length": 86.0,
      "completions/min_terminated_length": 86.0,
      "epoch": 12.233304170312044,
      "grad_norm": 0.12060546875,
      "learning_rate": 1e-06,
      "loss": -0.0166,
      "num_tokens": 762170096.0,
      "reward": 0.5167410969734192,
      "reward_std": 0.18850985169410706,
      "rewards/verify_math_reward/mean": 0.5167410969734192,
      "rewards/verify_math_reward/std": 0.4999987483024597,
      "step": 1309
    },
    {
      "clip_ratio/high_max": 0.0015460189688383252,
      "clip_ratio/high_mean": 0.00041675835473142797,
      "clip_ratio/low_mean": 0.0002649051477874309,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006816635018367379,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2552.0,
      "completions/mean_length": 591.8449096679688,
      "completions/mean_terminated_length": 548.2904052734375,
      "completions/min_length": 130.0,
      "completions/min_terminated_length": 130.0,
      "epoch": 12.242636337124527,
      "grad_norm": 0.10986328125,
      "learning_rate": 1e-06,
      "loss": -0.0024,
      "num_tokens": 762742605.0,
      "reward": 0.5691964626312256,
      "reward_std": 0.17934276163578033,
      "rewards/verify_math_reward/mean": 0.5691964030265808,
      "rewards/verify_math_reward/std": 0.4954652488231659,
      "step": 1310
    },
    {
      "clip_ratio/high_max": 0.0015403651232190896,
      "clip_ratio/high_mean": 0.00041163716650771676,
      "clip_ratio/low_mean": 0.0003618612611262506,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007734984287708357,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3269.0,
      "completions/mean_length": 596.2902221679688,
      "completions/mean_terminated_length": 536.7037963867188,
      "completions/min_length": 142.0,
      "completions/min_terminated_length": 142.0,
      "epoch": 12.251968503937007,
      "grad_norm": 0.13671875,
      "learning_rate": 1e-06,
      "loss": 0.0037,
      "num_tokens": 763295785.0,
      "reward": 0.5870535969734192,
      "reward_std": 0.20305564999580383,
      "rewards/verify_math_reward/mean": 0.5870535969734192,
      "rewards/verify_math_reward/std": 0.49263834953308105,
      "step": 1311
    },
    {
      "clip_ratio/high_max": 0.0014359627748490311,
      "clip_ratio/high_mean": 0.00041973795032390626,
      "clip_ratio/low_mean": 0.0003717834165399836,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007915213773230789,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3662.0,
      "completions/mean_length": 627.7835083007812,
      "completions/mean_terminated_length": 572.732421875,
      "completions/min_length": 5.0,
      "completions/min_terminated_length": 5.0,
      "epoch": 12.26130067074949,
      "grad_norm": 0.12255859375,
      "learning_rate": 1e-06,
      "loss": -0.0047,
      "num_tokens": 763893279.0,
      "reward": 0.5345982313156128,
      "reward_std": 0.20366504788398743,
      "rewards/verify_math_reward/mean": 0.5345982313156128,
      "rewards/verify_math_reward/std": 0.4990801215171814,
      "step": 1312
    },
    {
      "clip_ratio/high_max": 0.0015365403196483385,
      "clip_ratio/high_mean": 0.0004191620234905713,
      "clip_ratio/low_mean": 0.00035687019101260375,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007760322178000933,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3612.0,
      "completions/mean_length": 651.7444458007812,
      "completions/mean_terminated_length": 597.07373046875,
      "completions/min_length": 142.0,
      "completions/min_terminated_length": 142.0,
      "epoch": 12.27063283756197,
      "grad_norm": 0.11572265625,
      "learning_rate": 1e-06,
      "loss": 0.0027,
      "num_tokens": 764505522.0,
      "reward": 0.5602678656578064,
      "reward_std": 0.1982831358909607,
      "rewards/verify_math_reward/mean": 0.5602678656578064,
      "rewards/verify_math_reward/std": 0.4966317415237427,
      "step": 1313
    },
    {
      "clip_ratio/high_max": 0.001468470062718552,
      "clip_ratio/high_mean": 0.0004182748273251491,
      "clip_ratio/low_mean": 0.0003367118406458758,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007549866759291035,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3434.0,
      "completions/mean_length": 565.216552734375,
      "completions/mean_terminated_length": 533.4076538085938,
      "completions/min_length": 121.0,
      "completions/min_terminated_length": 121.0,
      "epoch": 12.279965004374453,
      "grad_norm": 0.12890625,
      "learning_rate": 1e-06,
      "loss": 0.0198,
      "num_tokens": 765070220.0,
      "reward": 0.5859375,
      "reward_std": 0.2056485265493393,
      "rewards/verify_math_reward/mean": 0.5859375,
      "rewards/verify_math_reward/std": 0.4928344786167145,
      "step": 1314
    },
    {
      "clip_ratio/high_max": 0.0018188020530942595,
      "clip_ratio/high_mean": 0.0005683027397935803,
      "clip_ratio/low_mean": 0.00036719275317409483,
      "clip_ratio/low_min": 1.0539629329286981e-05,
      "clip_ratio/region_mean": 0.0009354955009257537,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2320.0,
      "completions/mean_length": 541.7991333007812,
      "completions/mean_terminated_length": 505.7361755371094,
      "completions/min_length": 133.0,
      "completions/min_terminated_length": 133.0,
      "epoch": 12.289297171186934,
      "grad_norm": 0.140625,
      "learning_rate": 1e-06,
      "loss": 0.0132,
      "num_tokens": 765599024.0,
      "reward": 0.625,
      "reward_std": 0.21225804090499878,
      "rewards/verify_math_reward/mean": 0.625,
      "rewards/verify_math_reward/std": 0.48439329862594604,
      "step": 1315
    },
    {
      "clip_ratio/high_max": 0.0017290630557909026,
      "clip_ratio/high_mean": 0.0005711032329145382,
      "clip_ratio/low_mean": 0.0003347759829921415,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009058792084033485,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2950.0,
      "completions/mean_length": 598.5971069335938,
      "completions/mean_terminated_length": 555.1265869140625,
      "completions/min_length": 131.0,
      "completions/min_terminated_length": 131.0,
      "epoch": 12.298629337999417,
      "grad_norm": 0.1259765625,
      "learning_rate": 1e-06,
      "loss": 0.0126,
      "num_tokens": 766182983.0,
      "reward": 0.535714328289032,
      "reward_std": 0.2175600677728653,
      "rewards/verify_math_reward/mean": 0.5357142686843872,
      "rewards/verify_math_reward/std": 0.4990014135837555,
      "step": 1316
    },
    {
      "clip_ratio/high_max": 0.001851984688983066,
      "clip_ratio/high_mean": 0.0005933641227784392,
      "clip_ratio/low_mean": 0.00034520033500484715,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.00093856447347207,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3424.0,
      "completions/mean_length": 598.552490234375,
      "completions/mean_terminated_length": 559.077880859375,
      "completions/min_length": 127.0,
      "completions/min_terminated_length": 127.0,
      "epoch": 12.307961504811898,
      "grad_norm": 0.130859375,
      "learning_rate": 1e-06,
      "loss": 0.005,
      "num_tokens": 766765470.0,
      "reward": 0.6104910969734192,
      "reward_std": 0.20929424464702606,
      "rewards/verify_math_reward/mean": 0.6104910969734192,
      "rewards/verify_math_reward/std": 0.48791128396987915,
      "step": 1317
    },
    {
      "clip_ratio/high_max": 0.0013350010449357796,
      "clip_ratio/high_mean": 0.0002992231101188736,
      "clip_ratio/low_mean": 0.00033686329379634117,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006360864131238486,
      "completions/clipped_ratio": 0.0234375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2309.0,
      "completions/mean_length": 677.078125,
      "completions/mean_terminated_length": 595.0239868164062,
      "completions/min_length": 123.0,
      "completions/min_terminated_length": 123.0,
      "epoch": 12.31729367162438,
      "grad_norm": 0.10595703125,
      "learning_rate": 1e-06,
      "loss": -0.002,
      "num_tokens": 767385668.0,
      "reward": 0.520089328289032,
      "reward_std": 0.16608785092830658,
      "rewards/verify_math_reward/mean": 0.5200892686843872,
      "rewards/verify_math_reward/std": 0.4998753070831299,
      "step": 1318
    },
    {
      "clip_ratio/high_max": 0.0017508927767266869,
      "clip_ratio/high_mean": 0.00045783716450387146,
      "clip_ratio/low_mean": 0.00028557491077663144,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007434120889229234,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3778.0,
      "completions/mean_length": 575.4230346679688,
      "completions/mean_terminated_length": 523.5911254882812,
      "completions/min_length": 6.0,
      "completions/min_terminated_length": 6.0,
      "epoch": 12.326625838436861,
      "grad_norm": 0.13671875,
      "learning_rate": 1e-06,
      "loss": -0.0063,
      "num_tokens": 767931287.0,
      "reward": 0.5792410969734192,
      "reward_std": 0.18701490759849548,
      "rewards/verify_math_reward/mean": 0.5792410969734192,
      "rewards/verify_math_reward/std": 0.49395665526390076,
      "step": 1319
    },
    {
      "clip_ratio/high_max": 0.0017550511274748715,
      "clip_ratio/high_mean": 0.0005480323541178223,
      "clip_ratio/low_mean": 0.0002626408290780091,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008106731884254259,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4045.0,
      "completions/mean_length": 669.03125,
      "completions/mean_terminated_length": 590.7899169921875,
      "completions/min_length": 116.0,
      "completions/min_terminated_length": 116.0,
      "epoch": 12.335958005249344,
      "grad_norm": 0.1279296875,
      "learning_rate": 1e-06,
      "loss": -0.0107,
      "num_tokens": 768535219.0,
      "reward": 0.5658482313156128,
      "reward_std": 0.2036636620759964,
      "rewards/verify_math_reward/mean": 0.5658482313156128,
      "rewards/verify_math_reward/std": 0.49592188000679016,
      "step": 1320
    },
    {
      "clip_ratio/high_max": 0.0014951197581467568,
      "clip_ratio/high_mean": 0.0004573226940465247,
      "clip_ratio/low_mean": 0.0003901700511050876,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008474927590214065,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2326.0,
      "completions/mean_length": 647.1027221679688,
      "completions/mean_terminated_length": 584.3954467773438,
      "completions/min_length": 166.0,
      "completions/min_terminated_length": 166.0,
      "epoch": 12.345290172061826,
      "grad_norm": 0.12890625,
      "learning_rate": 1e-06,
      "loss": -0.0042,
      "num_tokens": 769134111.0,
      "reward": 0.5066964626312256,
      "reward_std": 0.1962229609489441,
      "rewards/verify_math_reward/mean": 0.5066964030265808,
      "rewards/verify_math_reward/std": 0.5002344250679016,
      "step": 1321
    },
    {
      "clip_ratio/high_max": 0.0010565303255134495,
      "clip_ratio/high_mean": 0.0002735761662506775,
      "clip_ratio/low_mean": 0.00023922488844618783,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0005128010548105522,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3989.0,
      "completions/mean_length": 605.0569458007812,
      "completions/mean_terminated_length": 553.661376953125,
      "completions/min_length": 124.0,
      "completions/min_terminated_length": 124.0,
      "epoch": 12.354622338874307,
      "grad_norm": 0.10498046875,
      "learning_rate": 1e-06,
      "loss": 0.0051,
      "num_tokens": 769709802.0,
      "reward": 0.5725446939468384,
      "reward_std": 0.1291857659816742,
      "rewards/verify_math_reward/mean": 0.5725446343421936,
      "rewards/verify_math_reward/std": 0.49498558044433594,
      "step": 1322
    },
    {
      "clip_ratio/high_max": 0.001643934969251859,
      "clip_ratio/high_mean": 0.0005014874755033816,
      "clip_ratio/low_mean": 0.000300719930919513,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008022074189284467,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1879.0,
      "completions/mean_length": 608.5267944335938,
      "completions/mean_terminated_length": 549.1487426757812,
      "completions/min_length": 9.0,
      "completions/min_terminated_length": 9.0,
      "epoch": 12.36395450568679,
      "grad_norm": 0.1279296875,
      "learning_rate": 1e-06,
      "loss": 0.003,
      "num_tokens": 770280882.0,
      "reward": 0.5714285969734192,
      "reward_std": 0.200238436460495,
      "rewards/verify_math_reward/mean": 0.5714285969734192,
      "rewards/verify_math_reward/std": 0.49514806270599365,
      "step": 1323
    },
    {
      "clip_ratio/high_max": 0.0016534529640921392,
      "clip_ratio/high_mean": 0.00048265144096149015,
      "clip_ratio/low_mean": 0.0004150785985075345,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008977300421975087,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3974.0,
      "completions/mean_length": 650.7533569335938,
      "completions/mean_terminated_length": 588.1124877929688,
      "completions/min_length": 148.0,
      "completions/min_terminated_length": 148.0,
      "epoch": 12.37328667249927,
      "grad_norm": 0.142578125,
      "learning_rate": 1e-06,
      "loss": -0.0025,
      "num_tokens": 770894517.0,
      "reward": 0.455357164144516,
      "reward_std": 0.22575047612190247,
      "rewards/verify_math_reward/mean": 0.4553571343421936,
      "rewards/verify_math_reward/std": 0.49828118085861206,
      "step": 1324
    },
    {
      "clip_ratio/high_max": 0.001632866355976148,
      "clip_ratio/high_mean": 0.0005132672069976252,
      "clip_ratio/low_mean": 0.00030067291129398654,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008139401115840883,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3435.0,
      "completions/mean_length": 663.083740234375,
      "completions/mean_terminated_length": 604.634521484375,
      "completions/min_length": 123.0,
      "completions/min_terminated_length": 123.0,
      "epoch": 12.382618839311753,
      "grad_norm": 0.12109375,
      "learning_rate": 1e-06,
      "loss": 0.0085,
      "num_tokens": 771512992.0,
      "reward": 0.5167410969734192,
      "reward_std": 0.21064084768295288,
      "rewards/verify_math_reward/mean": 0.5167410969734192,
      "rewards/verify_math_reward/std": 0.4999987483024597,
      "step": 1325
    },
    {
      "clip_ratio/high_max": 0.0015619659827734722,
      "clip_ratio/high_mean": 0.0004668235363283202,
      "clip_ratio/low_mean": 0.0002761273143505605,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007429508582390554,
      "completions/clipped_ratio": 0.024553571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3833.0,
      "completions/mean_length": 706.9688110351562,
      "completions/mean_terminated_length": 621.6613159179688,
      "completions/min_length": 141.0,
      "completions/min_terminated_length": 141.0,
      "epoch": 12.391951006124234,
      "grad_norm": 0.11572265625,
      "learning_rate": 1e-06,
      "loss": -0.0112,
      "num_tokens": 772149284.0,
      "reward": 0.4441964626312256,
      "reward_std": 0.19580857455730438,
      "rewards/verify_math_reward/mean": 0.4441964328289032,
      "rewards/verify_math_reward/std": 0.49715369939804077,
      "step": 1326
    },
    {
      "clip_ratio/high_max": 0.0016889770995476283,
      "clip_ratio/high_mean": 0.00048299741774826543,
      "clip_ratio/low_mean": 0.00036200213139636617,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000844999559376447,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2471.0,
      "completions/mean_length": 605.1038208007812,
      "completions/mean_terminated_length": 545.6674194335938,
      "completions/min_length": 179.0,
      "completions/min_terminated_length": 179.0,
      "epoch": 12.401283172936717,
      "grad_norm": 0.134765625,
      "learning_rate": 1e-06,
      "loss": -0.0009,
      "num_tokens": 772708641.0,
      "reward": 0.5758928656578064,
      "reward_std": 0.20741882920265198,
      "rewards/verify_math_reward/mean": 0.5758928656578064,
      "rewards/verify_math_reward/std": 0.49448272585868835,
      "step": 1327
    },
    {
      "clip_ratio/high_max": 0.0019424109823376057,
      "clip_ratio/high_mean": 0.0006247407210366873,
      "clip_ratio/low_mean": 0.0004140146727422689,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010387554048065795,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2134.0,
      "completions/mean_length": 627.3158569335938,
      "completions/mean_terminated_length": 568.2576904296875,
      "completions/min_length": 42.0,
      "completions/min_terminated_length": 42.0,
      "epoch": 12.410615339749198,
      "grad_norm": 0.1328125,
      "learning_rate": 1e-06,
      "loss": 0.0063,
      "num_tokens": 773298468.0,
      "reward": 0.5301339626312256,
      "reward_std": 0.2515665888786316,
      "rewards/verify_math_reward/mean": 0.5301339030265808,
      "rewards/verify_math_reward/std": 0.49936988949775696,
      "step": 1328
    },
    {
      "clip_ratio/high_max": 0.0016957903771981364,
      "clip_ratio/high_mean": 0.0005747592945226643,
      "clip_ratio/low_mean": 0.00045203657191450475,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010267958823533263,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1939.0,
      "completions/mean_length": 576.208740234375,
      "completions/mean_terminated_length": 520.3389892578125,
      "completions/min_length": 120.0,
      "completions/min_terminated_length": 120.0,
      "epoch": 12.41994750656168,
      "grad_norm": 0.1396484375,
      "learning_rate": 1e-06,
      "loss": 0.0319,
      "num_tokens": 773847591.0,
      "reward": 0.5970982313156128,
      "reward_std": 0.23916973173618317,
      "rewards/verify_math_reward/mean": 0.5970982313156128,
      "rewards/verify_math_reward/std": 0.49075525999069214,
      "step": 1329
    },
    {
      "clip_ratio/high_max": 0.0018559801901574247,
      "clip_ratio/high_mean": 0.0005546434127836619,
      "clip_ratio/low_mean": 0.0003098287588727544,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008644721729069715,
      "completions/clipped_ratio": 0.0078125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3005.0,
      "completions/mean_length": 586.9017944335938,
      "completions/mean_terminated_length": 559.2711181640625,
      "completions/min_length": 135.0,
      "completions/min_terminated_length": 135.0,
      "epoch": 12.429279673374161,
      "grad_norm": 0.125,
      "learning_rate": 1e-06,
      "loss": 0.0048,
      "num_tokens": 774445887.0,
      "reward": 0.5479910969734192,
      "reward_std": 0.1940765529870987,
      "rewards/verify_math_reward/mean": 0.5479910969734192,
      "rewards/verify_math_reward/std": 0.49796950817108154,
      "step": 1330
    },
    {
      "clip_ratio/high_max": 0.0017397524388798047,
      "clip_ratio/high_mean": 0.0005094935975193948,
      "clip_ratio/low_mean": 0.0003674017425510101,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008768953507569677,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2870.0,
      "completions/mean_length": 584.7377319335938,
      "completions/mean_terminated_length": 545.1072387695312,
      "completions/min_length": 127.0,
      "completions/min_terminated_length": 127.0,
      "epoch": 12.438611840186644,
      "grad_norm": 0.1298828125,
      "learning_rate": 1e-06,
      "loss": 0.005,
      "num_tokens": 775016404.0,
      "reward": 0.598214328289032,
      "reward_std": 0.21775297820568085,
      "rewards/verify_math_reward/mean": 0.5982142686843872,
      "rewards/verify_math_reward/std": 0.49053290486335754,
      "step": 1331
    },
    {
      "clip_ratio/high_max": 0.0017374656772517483,
      "clip_ratio/high_mean": 0.0005541824407373497,
      "clip_ratio/low_mean": 0.00036899929591527325,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009231817352883809,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3193.0,
      "completions/mean_length": 568.6060791015625,
      "completions/mean_terminated_length": 528.79345703125,
      "completions/min_length": 136.0,
      "completions/min_terminated_length": 136.0,
      "epoch": 12.447944006999125,
      "grad_norm": 0.13671875,
      "learning_rate": 1e-06,
      "loss": 0.0161,
      "num_tokens": 775570939.0,
      "reward": 0.6026785969734192,
      "reward_std": 0.22567518055438995,
      "rewards/verify_math_reward/mean": 0.6026785969734192,
      "rewards/verify_math_reward/std": 0.48961687088012695,
      "step": 1332
    },
    {
      "clip_ratio/high_max": 0.0012483817890824866,
      "clip_ratio/high_mean": 0.0003135813240078278,
      "clip_ratio/low_mean": 0.000292292923631976,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006058742455934407,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2985.0,
      "completions/mean_length": 644.6796875,
      "completions/mean_terminated_length": 581.9284057617188,
      "completions/min_length": 132.0,
      "completions/min_terminated_length": 132.0,
      "epoch": 12.457276173811607,
      "grad_norm": 0.1181640625,
      "learning_rate": 1e-06,
      "loss": -0.0201,
      "num_tokens": 776184036.0,
      "reward": 0.527901828289032,
      "reward_std": 0.17505809664726257,
      "rewards/verify_math_reward/mean": 0.5279017686843872,
      "rewards/verify_math_reward/std": 0.49949967861175537,
      "step": 1333
    },
    {
      "clip_ratio/high_max": 0.0018242450987600023,
      "clip_ratio/high_mean": 0.0005425435836059478,
      "clip_ratio/low_mean": 0.00029184862955844437,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008343922236235812,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3891.0,
      "completions/mean_length": 622.1373291015625,
      "completions/mean_terminated_length": 570.9931640625,
      "completions/min_length": 137.0,
      "completions/min_terminated_length": 137.0,
      "epoch": 12.466608340624088,
      "grad_norm": 0.1298828125,
      "learning_rate": 1e-06,
      "loss": 0.0013,
      "num_tokens": 776780287.0,
      "reward": 0.5524553656578064,
      "reward_std": 0.21722166240215302,
      "rewards/verify_math_reward/mean": 0.5524553656578064,
      "rewards/verify_math_reward/std": 0.49751853942871094,
      "step": 1334
    },
    {
      "clip_ratio/high_max": 0.0018869915465984377,
      "clip_ratio/high_mean": 0.000658529456359247,
      "clip_ratio/low_mean": 0.00034590689188007673,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010044363489214447,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3828.0,
      "completions/mean_length": 560.786865234375,
      "completions/mean_terminated_length": 524.9165649414062,
      "completions/min_length": 157.0,
      "completions/min_terminated_length": 157.0,
      "epoch": 12.47594050743657,
      "grad_norm": 0.1318359375,
      "learning_rate": 1e-06,
      "loss": 0.0077,
      "num_tokens": 777326504.0,
      "reward": 0.6227678656578064,
      "reward_std": 0.21989238262176514,
      "rewards/verify_math_reward/mean": 0.6227678656578064,
      "rewards/verify_math_reward/std": 0.4849644899368286,
      "step": 1335
    },
    {
      "clip_ratio/high_max": 0.0016766611024650047,
      "clip_ratio/high_mean": 0.0005130787046709884,
      "clip_ratio/low_mean": 0.0003538968317116087,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008669755347909813,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3928.0,
      "completions/mean_length": 583.9163208007812,
      "completions/mean_terminated_length": 548.2807006835938,
      "completions/min_length": 3.0,
      "completions/min_terminated_length": 3.0,
      "epoch": 12.485272674249051,
      "grad_norm": 0.1298828125,
      "learning_rate": 1e-06,
      "loss": -0.0071,
      "num_tokens": 777907597.0,
      "reward": 0.5569196939468384,
      "reward_std": 0.22198784351348877,
      "rewards/verify_math_reward/mean": 0.5569196343421936,
      "rewards/verify_math_reward/std": 0.4970270097255707,
      "step": 1336
    },
    {
      "clip_ratio/high_max": 0.0017860114867289667,
      "clip_ratio/high_mean": 0.0005190521185340913,
      "clip_ratio/low_mean": 0.00031431715660801274,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008333692812811933,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3945.0,
      "completions/mean_length": 600.2355346679688,
      "completions/mean_terminated_length": 536.6761474609375,
      "completions/min_length": 119.0,
      "completions/min_terminated_length": 119.0,
      "epoch": 12.494604841061534,
      "grad_norm": 0.12890625,
      "learning_rate": 1e-06,
      "loss": -0.0052,
      "num_tokens": 778472224.0,
      "reward": 0.5915178656578064,
      "reward_std": 0.20478203892707825,
      "rewards/verify_math_reward/mean": 0.5915178656578064,
      "rewards/verify_math_reward/std": 0.49182769656181335,
      "step": 1337
    },
    {
      "clip_ratio/high_max": 0.0014998347751316032,
      "clip_ratio/high_mean": 0.00045739044674064644,
      "clip_ratio/low_mean": 0.00024417975680535164,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007015701978616562,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2504.0,
      "completions/mean_length": 614.5123291015625,
      "completions/mean_terminated_length": 571.2395629882812,
      "completions/min_length": 162.0,
      "completions/min_terminated_length": 162.0,
      "epoch": 12.503937007874015,
      "grad_norm": 0.1259765625,
      "learning_rate": 1e-06,
      "loss": 0.0027,
      "num_tokens": 779067547.0,
      "reward": 0.5524553656578064,
      "reward_std": 0.1862974464893341,
      "rewards/verify_math_reward/mean": 0.5524553656578064,
      "rewards/verify_math_reward/std": 0.49751853942871094,
      "step": 1338
    },
    {
      "clip_ratio/high_max": 0.0016813044057926163,
      "clip_ratio/high_mean": 0.00047154157800832763,
      "clip_ratio/low_mean": 0.0002729561776959599,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007444977500199457,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3681.0,
      "completions/mean_length": 668.7589721679688,
      "completions/mean_terminated_length": 606.4454345703125,
      "completions/min_length": 108.0,
      "completions/min_terminated_length": 108.0,
      "epoch": 12.513269174686497,
      "grad_norm": 0.1181640625,
      "learning_rate": 1e-06,
      "loss": 0.0055,
      "num_tokens": 779702187.0,
      "reward": 0.5111607313156128,
      "reward_std": 0.19411791861057281,
      "rewards/verify_math_reward/mean": 0.5111607313156128,
      "rewards/verify_math_reward/std": 0.5001546144485474,
      "step": 1339
    },
    {
      "clip_ratio/high_max": 0.0016666199371684343,
      "clip_ratio/high_mean": 0.0004731992985398392,
      "clip_ratio/low_mean": 0.00031938857523527986,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007925878726382507,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3986.0,
      "completions/mean_length": 649.583740234375,
      "completions/mean_terminated_length": 590.9046630859375,
      "completions/min_length": 18.0,
      "completions/min_terminated_length": 18.0,
      "epoch": 12.52260134149898,
      "grad_norm": 0.12158203125,
      "learning_rate": 1e-06,
      "loss": 0.0024,
      "num_tokens": 780312742.0,
      "reward": 0.559151828289032,
      "reward_std": 0.22195462882518768,
      "rewards/verify_math_reward/mean": 0.5591517686843872,
      "rewards/verify_math_reward/std": 0.496766060590744,
      "step": 1340
    },
    {
      "clip_ratio/high_max": 0.0019046869902012986,
      "clip_ratio/high_mean": 0.0005148489103703469,
      "clip_ratio/low_mean": 0.00033319990075142414,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008480487922497559,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2395.0,
      "completions/mean_length": 645.7879638671875,
      "completions/mean_terminated_length": 583.0568237304688,
      "completions/min_length": 74.0,
      "completions/min_terminated_length": 74.0,
      "epoch": 12.531933508311461,
      "grad_norm": 0.1162109375,
      "learning_rate": 1e-06,
      "loss": -0.0069,
      "num_tokens": 780909128.0,
      "reward": 0.5558035969734192,
      "reward_std": 0.20162348449230194,
      "rewards/verify_math_reward/mean": 0.5558035969734192,
      "rewards/verify_math_reward/std": 0.49715372920036316,
      "step": 1341
    },
    {
      "clip_ratio/high_max": 0.0018353088380536065,
      "clip_ratio/high_mean": 0.0006077824446037994,
      "clip_ratio/low_mean": 0.0003275464979424214,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009353289542559651,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4034.0,
      "completions/mean_length": 619.8694458007812,
      "completions/mean_terminated_length": 580.6354370117188,
      "completions/min_length": 71.0,
      "completions/min_terminated_length": 71.0,
      "epoch": 12.541265675123944,
      "grad_norm": 0.12890625,
      "learning_rate": 1e-06,
      "loss": 0.0082,
      "num_tokens": 781512203.0,
      "reward": 0.5345982313156128,
      "reward_std": 0.22485990822315216,
      "rewards/verify_math_reward/mean": 0.5345982313156128,
      "rewards/verify_math_reward/std": 0.4990801215171814,
      "step": 1342
    },
    {
      "clip_ratio/high_max": 0.0015425090605276637,
      "clip_ratio/high_mean": 0.0005186793109714927,
      "clip_ratio/low_mean": 0.00037904443502156937,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000897723743946699,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3859.0,
      "completions/mean_length": 638.1194458007812,
      "completions/mean_terminated_length": 575.2488403320312,
      "completions/min_length": 82.0,
      "completions/min_terminated_length": 82.0,
      "epoch": 12.550597841936424,
      "grad_norm": 0.1279296875,
      "learning_rate": 1e-06,
      "loss": -0.0071,
      "num_tokens": 782100870.0,
      "reward": 0.535714328289032,
      "reward_std": 0.2338722199201584,
      "rewards/verify_math_reward/mean": 0.5357142686843872,
      "rewards/verify_math_reward/std": 0.4990014135837555,
      "step": 1343
    },
    {
      "clip_ratio/high_max": 0.002033382579611498,
      "clip_ratio/high_mean": 0.0005803411145279824,
      "clip_ratio/low_mean": 0.00043659423477038217,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010169353408855386,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4093.0,
      "completions/mean_length": 594.0625,
      "completions/mean_terminated_length": 526.33447265625,
      "completions/min_length": 98.0,
      "completions/min_terminated_length": 98.0,
      "epoch": 12.559930008748907,
      "grad_norm": 0.13671875,
      "learning_rate": 1e-06,
      "loss": 0.0019,
      "num_tokens": 782643982.0,
      "reward": 0.5915178656578064,
      "reward_std": 0.23064091801643372,
      "rewards/verify_math_reward/mean": 0.5915178656578064,
      "rewards/verify_math_reward/std": 0.49182769656181335,
      "step": 1344
    },
    {
      "clip_ratio/high_max": 0.0017592068570593256,
      "clip_ratio/high_mean": 0.0005086708604267187,
      "clip_ratio/low_mean": 0.00036662707270807005,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008752979128985316,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3057.0,
      "completions/mean_length": 620.708740234375,
      "completions/mean_terminated_length": 573.5328369140625,
      "completions/min_length": 151.0,
      "completions/min_terminated_length": 151.0,
      "epoch": 12.569262175561388,
      "grad_norm": 0.1376953125,
      "learning_rate": 1e-06,
      "loss": 0.0092,
      "num_tokens": 783244393.0,
      "reward": 0.5267857313156128,
      "reward_std": 0.1980655938386917,
      "rewards/verify_math_reward/mean": 0.5267857313156128,
      "rewards/verify_math_reward/std": 0.4995608627796173,
      "step": 1345
    },
    {
      "clip_ratio/high_max": 0.0017595783992874203,
      "clip_ratio/high_mean": 0.000571181706391144,
      "clip_ratio/low_mean": 0.00035188769788874197,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009230693999597861,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2412.0,
      "completions/mean_length": 585.169677734375,
      "completions/mean_terminated_length": 529.4421997070312,
      "completions/min_length": 84.0,
      "completions/min_terminated_length": 84.0,
      "epoch": 12.57859434237387,
      "grad_norm": 0.1298828125,
      "learning_rate": 1e-06,
      "loss": 0.0239,
      "num_tokens": 783805673.0,
      "reward": 0.6004464626312256,
      "reward_std": 0.20636393129825592,
      "rewards/verify_math_reward/mean": 0.6004464030265808,
      "rewards/verify_math_reward/std": 0.49008017778396606,
      "step": 1346
    },
    {
      "clip_ratio/high_max": 0.0018661543399502989,
      "clip_ratio/high_mean": 0.0005619721421226131,
      "clip_ratio/low_mean": 0.0003518954197261337,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009138675513895578,
      "completions/clipped_ratio": 0.021205357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2460.0,
      "completions/mean_length": 618.2890625,
      "completions/mean_terminated_length": 542.9452514648438,
      "completions/min_length": 8.0,
      "completions/min_terminated_length": 8.0,
      "epoch": 12.587926509186351,
      "grad_norm": 0.130859375,
      "learning_rate": 1e-06,
      "loss": 0.0067,
      "num_tokens": 784372164.0,
      "reward": 0.5892857313156128,
      "reward_std": 0.2003892958164215,
      "rewards/verify_math_reward/mean": 0.5892857313156128,
      "rewards/verify_math_reward/std": 0.49223825335502625,
      "step": 1347
    },
    {
      "clip_ratio/high_max": 0.001620668293980998,
      "clip_ratio/high_mean": 0.0005225414563483355,
      "clip_ratio/low_mean": 0.000327016052210638,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008495574954849872,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2856.0,
      "completions/mean_length": 590.1239013671875,
      "completions/mean_terminated_length": 530.4324951171875,
      "completions/min_length": 127.0,
      "completions/min_terminated_length": 127.0,
      "epoch": 12.597258675998834,
      "grad_norm": 0.12890625,
      "learning_rate": 1e-06,
      "loss": -0.001,
      "num_tokens": 784927435.0,
      "reward": 0.6026785969734192,
      "reward_std": 0.21184365451335907,
      "rewards/verify_math_reward/mean": 0.6026785969734192,
      "rewards/verify_math_reward/std": 0.48961687088012695,
      "step": 1348
    },
    {
      "clip_ratio/high_max": 0.0013208903519625892,
      "clip_ratio/high_mean": 0.00040145785442291526,
      "clip_ratio/low_mean": 0.00038392885812754685,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007853867066387465,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3601.0,
      "completions/mean_length": 650.529052734375,
      "completions/mean_terminated_length": 591.8660888671875,
      "completions/min_length": 153.0,
      "completions/min_terminated_length": 153.0,
      "epoch": 12.606590842811315,
      "grad_norm": 0.126953125,
      "learning_rate": 1e-06,
      "loss": 0.0141,
      "num_tokens": 785546997.0,
      "reward": 0.494419664144516,
      "reward_std": 0.19170935451984406,
      "rewards/verify_math_reward/mean": 0.4944196343421936,
      "rewards/verify_math_reward/std": 0.5002480745315552,
      "step": 1349
    },
    {
      "clip_ratio/high_max": 0.0013343321516003925,
      "clip_ratio/high_mean": 0.0003621808348270861,
      "clip_ratio/low_mean": 0.00033473987821253104,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006969207147449197,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2803.0,
      "completions/mean_length": 586.4308471679688,
      "completions/mean_terminated_length": 526.676513671875,
      "completions/min_length": 69.0,
      "completions/min_terminated_length": 69.0,
      "epoch": 12.615923009623797,
      "grad_norm": 0.1259765625,
      "learning_rate": 1e-06,
      "loss": 0.0023,
      "num_tokens": 786104367.0,
      "reward": 0.535714328289032,
      "reward_std": 0.1892675906419754,
      "rewards/verify_math_reward/mean": 0.5357142686843872,
      "rewards/verify_math_reward/std": 0.4990014135837555,
      "step": 1350
    },
    {
      "clip_ratio/high_max": 0.001797109693143284,
      "clip_ratio/high_mean": 0.0005520653926396335,
      "clip_ratio/low_mean": 0.00040413533145056135,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009562007217027713,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3401.0,
      "completions/mean_length": 617.1864013671875,
      "completions/mean_terminated_length": 565.9694213867188,
      "completions/min_length": 151.0,
      "completions/min_terminated_length": 151.0,
      "epoch": 12.625255176436278,
      "grad_norm": 0.1396484375,
      "learning_rate": 1e-06,
      "loss": -0.0055,
      "num_tokens": 786693614.0,
      "reward": 0.5390625,
      "reward_std": 0.25088509917259216,
      "rewards/verify_math_reward/mean": 0.5390625,
      "rewards/verify_math_reward/std": 0.4987502098083496,
      "step": 1351
    },
    {
      "clip_ratio/high_max": 0.0013580147542597842,
      "clip_ratio/high_mean": 0.0004347970145772706,
      "clip_ratio/low_mean": 0.0003087151912950503,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007435122161041363,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4048.0,
      "completions/mean_length": 642.2756958007812,
      "completions/mean_terminated_length": 579.4806518554688,
      "completions/min_length": 143.0,
      "completions/min_terminated_length": 143.0,
      "epoch": 12.63458734324876,
      "grad_norm": 0.126953125,
      "learning_rate": 1e-06,
      "loss": 0.0011,
      "num_tokens": 787291469.0,
      "reward": 0.5189732313156128,
      "reward_std": 0.20336057245731354,
      "rewards/verify_math_reward/mean": 0.5189732313156128,
      "rewards/verify_math_reward/std": 0.49991893768310547,
      "step": 1352
    },
    {
      "clip_ratio/high_max": 0.0015516348994424334,
      "clip_ratio/high_mean": 0.0004743716763186967,
      "clip_ratio/low_mean": 0.0003102460071886526,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007846176763450785,
      "completions/clipped_ratio": 0.0234375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2980.0,
      "completions/mean_length": 634.2667846679688,
      "completions/mean_terminated_length": 551.1851196289062,
      "completions/min_length": 11.0,
      "completions/min_terminated_length": 11.0,
      "epoch": 12.643919510061242,
      "grad_norm": 0.13671875,
      "learning_rate": 1e-06,
      "loss": 0.004,
      "num_tokens": 787867788.0,
      "reward": 0.559151828289032,
      "reward_std": 0.20474812388420105,
      "rewards/verify_math_reward/mean": 0.5591517686843872,
      "rewards/verify_math_reward/std": 0.496766060590744,
      "step": 1353
    },
    {
      "clip_ratio/high_max": 0.002083352690533502,
      "clip_ratio/high_mean": 0.0006984349906815623,
      "clip_ratio/low_mean": 0.000275083659403208,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009735186304169474,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4035.0,
      "completions/mean_length": 526.3471069335938,
      "completions/mean_terminated_length": 486.0575866699219,
      "completions/min_length": 79.0,
      "completions/min_terminated_length": 79.0,
      "epoch": 12.653251676873724,
      "grad_norm": 0.1337890625,
      "learning_rate": 1e-06,
      "loss": 0.0046,
      "num_tokens": 788375771.0,
      "reward": 0.6328125,
      "reward_std": 0.20267769694328308,
      "rewards/verify_math_reward/mean": 0.6328125,
      "rewards/verify_math_reward/std": 0.48230743408203125,
      "step": 1354
    },
    {
      "clip_ratio/high_max": 0.0016434017079518526,
      "clip_ratio/high_mean": 0.0005675661604982452,
      "clip_ratio/low_mean": 0.00045137354595681245,
      "clip_ratio/low_min": 1.3174536434235051e-05,
      "clip_ratio/region_mean": 0.0010189397307840409,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3527.0,
      "completions/mean_length": 620.0625,
      "completions/mean_terminated_length": 568.8878784179688,
      "completions/min_length": 138.0,
      "completions/min_terminated_length": 138.0,
      "epoch": 12.662583843686207,
      "grad_norm": 0.1357421875,
      "learning_rate": 1e-06,
      "loss": 0.0013,
      "num_tokens": 788971259.0,
      "reward": 0.5691964626312256,
      "reward_std": 0.2634360194206238,
      "rewards/verify_math_reward/mean": 0.5691964030265808,
      "rewards/verify_math_reward/std": 0.4954652488231659,
      "step": 1355
    },
    {
      "clip_ratio/high_max": 0.0014816271814197535,
      "clip_ratio/high_mean": 0.0004082731221615177,
      "clip_ratio/low_mean": 0.0003605608969792229,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007688340228924062,
      "completions/clipped_ratio": 0.0200892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3368.0,
      "completions/mean_length": 647.2801513671875,
      "completions/mean_terminated_length": 576.5774536132812,
      "completions/min_length": 102.0,
      "completions/min_terminated_length": 102.0,
      "epoch": 12.671916010498688,
      "grad_norm": 0.1259765625,
      "learning_rate": 1e-06,
      "loss": -0.0105,
      "num_tokens": 789579166.0,
      "reward": 0.4921875298023224,
      "reward_std": 0.20080046355724335,
      "rewards/verify_math_reward/mean": 0.4921875,
      "rewards/verify_math_reward/std": 0.5002182126045227,
      "step": 1356
    },
    {
      "clip_ratio/high_max": 0.0015836158358979446,
      "clip_ratio/high_mean": 0.00048607764722419233,
      "clip_ratio/low_mean": 0.0003926212223177572,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008786988810243201,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3176.0,
      "completions/mean_length": 638.0614013671875,
      "completions/mean_terminated_length": 579.1861572265625,
      "completions/min_length": 134.0,
      "completions/min_terminated_length": 134.0,
      "epoch": 12.68124817731117,
      "grad_norm": 0.134765625,
      "learning_rate": 1e-06,
      "loss": 0.0118,
      "num_tokens": 790176213.0,
      "reward": 0.5412946939468384,
      "reward_std": 0.20508696138858795,
      "rewards/verify_math_reward/mean": 0.5412946343421936,
      "rewards/verify_math_reward/std": 0.49857014417648315,
      "step": 1357
    },
    {
      "clip_ratio/high_max": 0.0015654539438401116,
      "clip_ratio/high_mean": 0.0004823506724278559,
      "clip_ratio/low_mean": 0.0002884349729583846,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007707856429988169,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2358.0,
      "completions/mean_length": 556.9765625,
      "completions/mean_terminated_length": 517.03271484375,
      "completions/min_length": 118.0,
      "completions/min_terminated_length": 118.0,
      "epoch": 12.690580344123651,
      "grad_norm": 0.119140625,
      "learning_rate": 1e-06,
      "loss": -0.0059,
      "num_tokens": 790720536.0,
      "reward": 0.6183035969734192,
      "reward_std": 0.18889032304286957,
      "rewards/verify_math_reward/mean": 0.6183035969734192,
      "rewards/verify_math_reward/std": 0.4860740303993225,
      "step": 1358
    },
    {
      "clip_ratio/high_max": 0.0018910308717750013,
      "clip_ratio/high_mean": 0.0006210268591075874,
      "clip_ratio/low_mean": 0.0004180689832082862,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010390958459538524,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2288.0,
      "completions/mean_length": 572.1317138671875,
      "completions/mean_terminated_length": 528.3322143554688,
      "completions/min_length": 129.0,
      "completions/min_terminated_length": 129.0,
      "epoch": 12.699912510936134,
      "grad_norm": 0.154296875,
      "learning_rate": 1e-06,
      "loss": 0.0077,
      "num_tokens": 791278166.0,
      "reward": 0.613839328289032,
      "reward_std": 0.25539591908454895,
      "rewards/verify_math_reward/mean": 0.6138392686843872,
      "rewards/verify_math_reward/std": 0.48714008927345276,
      "step": 1359
    },
    {
      "clip_ratio/high_max": 0.001492702578616445,
      "clip_ratio/high_mean": 0.00044245443530144257,
      "clip_ratio/low_mean": 0.0003830527193713351,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008255071525127278,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3763.0,
      "completions/mean_length": 619.552490234375,
      "completions/mean_terminated_length": 560.3621215820312,
      "completions/min_length": 5.0,
      "completions/min_terminated_length": 5.0,
      "epoch": 12.709244677748615,
      "grad_norm": 0.1298828125,
      "learning_rate": 1e-06,
      "loss": 0.0055,
      "num_tokens": 791855125.0,
      "reward": 0.5234375,
      "reward_std": 0.2184620350599289,
      "rewards/verify_math_reward/mean": 0.5234375,
      "rewards/verify_math_reward/std": 0.49972933530807495,
      "step": 1360
    },
    {
      "clip_ratio/high_max": 0.0016415607551607536,
      "clip_ratio/high_mean": 0.00048394609439128544,
      "clip_ratio/low_mean": 0.00030661542632515193,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.00079056152026169,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3761.0,
      "completions/mean_length": 597.318115234375,
      "completions/mean_terminated_length": 545.80859375,
      "completions/min_length": 3.0,
      "completions/min_terminated_length": 3.0,
      "epoch": 12.718576844561097,
      "grad_norm": 0.1298828125,
      "learning_rate": 1e-06,
      "loss": 0.0088,
      "num_tokens": 792418370.0,
      "reward": 0.5334821939468384,
      "reward_std": 0.21673493087291718,
      "rewards/verify_math_reward/mean": 0.5334821343421936,
      "rewards/verify_math_reward/std": 0.49915632605552673,
      "step": 1361
    },
    {
      "clip_ratio/high_max": 0.0012727250514217303,
      "clip_ratio/high_mean": 0.0003514046095460799,
      "clip_ratio/low_mean": 0.0003181941166303659,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006695987376588164,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4011.0,
      "completions/mean_length": 651.5770263671875,
      "completions/mean_terminated_length": 604.8201904296875,
      "completions/min_length": 139.0,
      "completions/min_terminated_length": 139.0,
      "epoch": 12.727909011373578,
      "grad_norm": 0.11376953125,
      "learning_rate": 1e-06,
      "loss": -0.0001,
      "num_tokens": 793044295.0,
      "reward": 0.4676339626312256,
      "reward_std": 0.18239323794841766,
      "rewards/verify_math_reward/mean": 0.4676339328289032,
      "rewards/verify_math_reward/std": 0.4992299973964691,
      "step": 1362
    },
    {
      "clip_ratio/high_max": 0.001995025560972863,
      "clip_ratio/high_mean": 0.000556725545379777,
      "clip_ratio/low_mean": 0.00031709733048046473,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008738228671063553,
      "completions/clipped_ratio": 0.024553571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2530.0,
      "completions/mean_length": 674.005615234375,
      "completions/mean_terminated_length": 587.868408203125,
      "completions/min_length": 96.0,
      "completions/min_terminated_length": 96.0,
      "epoch": 12.73724117818606,
      "grad_norm": 0.1318359375,
      "learning_rate": 1e-06,
      "loss": -0.0069,
      "num_tokens": 793651412.0,
      "reward": 0.515625,
      "reward_std": 0.21899224817752838,
      "rewards/verify_math_reward/mean": 0.515625,
      "rewards/verify_math_reward/std": 0.5000349283218384,
      "step": 1363
    },
    {
      "clip_ratio/high_max": 0.0013632476238853997,
      "clip_ratio/high_mean": 0.0004527963271812041,
      "clip_ratio/low_mean": 0.00034688679193095595,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007996831172931707,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3827.0,
      "completions/mean_length": 600.3683471679688,
      "completions/mean_terminated_length": 564.899658203125,
      "completions/min_length": 152.0,
      "completions/min_terminated_length": 152.0,
      "epoch": 12.746573344998541,
      "grad_norm": 0.130859375,
      "learning_rate": 1e-06,
      "loss": 0.0019,
      "num_tokens": 794253750.0,
      "reward": 0.546875,
      "reward_std": 0.2136916220188141,
      "rewards/verify_math_reward/mean": 0.546875,
      "rewards/verify_math_reward/std": 0.4980759024620056,
      "step": 1364
    },
    {
      "clip_ratio/high_max": 0.001735398400342092,
      "clip_ratio/high_mean": 0.00045994657079972967,
      "clip_ratio/low_mean": 0.00034610448278726835,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008060510581344715,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3983.0,
      "completions/mean_length": 566.786865234375,
      "completions/mean_terminated_length": 534.9921264648438,
      "completions/min_length": 100.0,
      "completions/min_terminated_length": 100.0,
      "epoch": 12.755905511811024,
      "grad_norm": 0.1259765625,
      "learning_rate": 1e-06,
      "loss": 0.0123,
      "num_tokens": 794808903.0,
      "reward": 0.5714285969734192,
      "reward_std": 0.2222556322813034,
      "rewards/verify_math_reward/mean": 0.5714285969734192,
      "rewards/verify_math_reward/std": 0.49514803290367126,
      "step": 1365
    },
    {
      "clip_ratio/high_max": 0.0014769204062758945,
      "clip_ratio/high_mean": 0.0003818334614607011,
      "clip_ratio/low_mean": 0.0002928974295173248,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006747308880221681,
      "completions/clipped_ratio": 0.024553571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3841.0,
      "completions/mean_length": 668.0178833007812,
      "completions/mean_terminated_length": 581.72998046875,
      "completions/min_length": 83.0,
      "completions/min_terminated_length": 83.0,
      "epoch": 12.765237678623505,
      "grad_norm": 0.11767578125,
      "learning_rate": 1e-06,
      "loss": 0.0149,
      "num_tokens": 795407791.0,
      "reward": 0.515625,
      "reward_std": 0.1898314207792282,
      "rewards/verify_math_reward/mean": 0.515625,
      "rewards/verify_math_reward/std": 0.5000349283218384,
      "step": 1366
    },
    {
      "clip_ratio/high_max": 0.0018882580034187413,
      "clip_ratio/high_mean": 0.0005480299162172741,
      "clip_ratio/low_mean": 0.00035163842062502226,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000899668327292602,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4078.0,
      "completions/mean_length": 599.9944458007812,
      "completions/mean_terminated_length": 556.541259765625,
      "completions/min_length": 128.0,
      "completions/min_terminated_length": 128.0,
      "epoch": 12.774569845435988,
      "grad_norm": 0.138671875,
      "learning_rate": 1e-06,
      "loss": 0.0096,
      "num_tokens": 795986418.0,
      "reward": 0.5379464626312256,
      "reward_std": 0.21993333101272583,
      "rewards/verify_math_reward/mean": 0.5379464030265808,
      "rewards/verify_math_reward/std": 0.4988364577293396,
      "step": 1367
    },
    {
      "clip_ratio/high_max": 0.0015762244338475284,
      "clip_ratio/high_mean": 0.000493550815576782,
      "clip_ratio/low_mean": 0.0002903348273548545,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007838856463422417,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3824.0,
      "completions/mean_length": 601.7890625,
      "completions/mean_terminated_length": 538.2579345703125,
      "completions/min_length": 61.0,
      "completions/min_terminated_length": 61.0,
      "epoch": 12.783902012248468,
      "grad_norm": 0.1142578125,
      "learning_rate": 1e-06,
      "loss": -0.0018,
      "num_tokens": 796546229.0,
      "reward": 0.5859375,
      "reward_std": 0.17473429441452026,
      "rewards/verify_math_reward/mean": 0.5859375,
      "rewards/verify_math_reward/std": 0.4928344786167145,
      "step": 1368
    },
    {
      "clip_ratio/high_max": 0.0018076913638651604,
      "clip_ratio/high_mean": 0.0005719085979762895,
      "clip_ratio/low_mean": 0.00035960120885647484,
      "clip_ratio/low_min": 1.1192693818884436e-05,
      "clip_ratio/region_mean": 0.0009315098195656901,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3877.0,
      "completions/mean_length": 612.7098388671875,
      "completions/mean_terminated_length": 569.4146728515625,
      "completions/min_length": 153.0,
      "completions/min_terminated_length": 153.0,
      "epoch": 12.793234179060951,
      "grad_norm": 0.13671875,
      "learning_rate": 1e-06,
      "loss": 0.0135,
      "num_tokens": 797151969.0,
      "reward": 0.566964328289032,
      "reward_std": 0.24408239126205444,
      "rewards/verify_math_reward/mean": 0.5669642686843872,
      "rewards/verify_math_reward/std": 0.49577224254608154,
      "step": 1369
    },
    {
      "clip_ratio/high_max": 0.0018303513852515607,
      "clip_ratio/high_mean": 0.0005259707338609587,
      "clip_ratio/low_mean": 0.0003362928214301064,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008622635577921756,
      "completions/clipped_ratio": 0.0200892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4094.0,
      "completions/mean_length": 626.7924194335938,
      "completions/mean_terminated_length": 555.6697387695312,
      "completions/min_length": 136.0,
      "completions/min_terminated_length": 136.0,
      "epoch": 12.802566345873432,
      "grad_norm": 0.1337890625,
      "learning_rate": 1e-06,
      "loss": -0.006,
      "num_tokens": 797730607.0,
      "reward": 0.5680803656578064,
      "reward_std": 0.2282680869102478,
      "rewards/verify_math_reward/mean": 0.5680803656578064,
      "rewards/verify_math_reward/std": 0.4956200420856476,
      "step": 1370
    },
    {
      "clip_ratio/high_max": 0.0014975834533288435,
      "clip_ratio/high_mean": 0.00044420376389098237,
      "clip_ratio/low_mean": 0.00037671682684958796,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000820920591650065,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3668.0,
      "completions/mean_length": 641.2567138671875,
      "completions/mean_terminated_length": 574.44140625,
      "completions/min_length": 96.0,
      "completions/min_terminated_length": 96.0,
      "epoch": 12.811898512685914,
      "grad_norm": 0.12060546875,
      "learning_rate": 1e-06,
      "loss": 0.012,
      "num_tokens": 798338549.0,
      "reward": 0.4933035969734192,
      "reward_std": 0.19227276742458344,
      "rewards/verify_math_reward/mean": 0.4933035671710968,
      "rewards/verify_math_reward/std": 0.5002344250679016,
      "step": 1371
    },
    {
      "clip_ratio/high_max": 0.001852706509453128,
      "clip_ratio/high_mean": 0.0005725582295781351,
      "clip_ratio/low_mean": 0.00041772364602365997,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009902818865157315,
      "completions/clipped_ratio": 0.03125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3443.0,
      "completions/mean_length": 672.1942138671875,
      "completions/mean_terminated_length": 561.7488403320312,
      "completions/min_length": 139.0,
      "completions/min_terminated_length": 139.0,
      "epoch": 12.821230679498395,
      "grad_norm": 0.138671875,
      "learning_rate": 1e-06,
      "loss": 0.0011,
      "num_tokens": 798919547.0,
      "reward": 0.5033482313156128,
      "reward_std": 0.22038161754608154,
      "rewards/verify_math_reward/mean": 0.5033482313156128,
      "rewards/verify_math_reward/std": 0.5002680420875549,
      "step": 1372
    },
    {
      "clip_ratio/high_max": 0.0016795955361885717,
      "clip_ratio/high_mean": 0.0005697418389445374,
      "clip_ratio/low_mean": 0.0003776335677230236,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009473754089412978,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1969.0,
      "completions/mean_length": 624.3092041015625,
      "completions/mean_terminated_length": 569.2029418945312,
      "completions/min_length": 24.0,
      "completions/min_terminated_length": 24.0,
      "epoch": 12.830562846310878,
      "grad_norm": 0.12890625,
      "learning_rate": 1e-06,
      "loss": 0.0172,
      "num_tokens": 799518296.0,
      "reward": 0.5691964626312256,
      "reward_std": 0.21936173737049103,
      "rewards/verify_math_reward/mean": 0.5691964030265808,
      "rewards/verify_math_reward/std": 0.4954652786254883,
      "step": 1373
    },
    {
      "clip_ratio/high_max": 0.0015312329906009836,
      "clip_ratio/high_mean": 0.00047515602091152687,
      "clip_ratio/low_mean": 0.00036321660013527435,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008383726217289222,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3892.0,
      "completions/mean_length": 603.40625,
      "completions/mean_terminated_length": 551.9863891601562,
      "completions/min_length": 24.0,
      "completions/min_terminated_length": 24.0,
      "epoch": 12.83989501312336,
      "grad_norm": 0.138671875,
      "learning_rate": 1e-06,
      "loss": -0.0017,
      "num_tokens": 800092716.0,
      "reward": 0.5926339626312256,
      "reward_std": 0.19892321527004242,
      "rewards/verify_math_reward/mean": 0.5926339030265808,
      "rewards/verify_math_reward/std": 0.49161848425865173,
      "step": 1374
    },
    {
      "clip_ratio/high_max": 0.0018305032135685906,
      "clip_ratio/high_mean": 0.0005841719332693174,
      "clip_ratio/low_mean": 0.00022793832374645717,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008121102459881513,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2963.0,
      "completions/mean_length": 569.2756958007812,
      "completions/mean_terminated_length": 521.401611328125,
      "completions/min_length": 14.0,
      "completions/min_terminated_length": 14.0,
      "epoch": 12.849227179935841,
      "grad_norm": 0.12890625,
      "learning_rate": 1e-06,
      "loss": -0.0007,
      "num_tokens": 800645619.0,
      "reward": 0.6104910969734192,
      "reward_std": 0.20110353827476501,
      "rewards/verify_math_reward/mean": 0.6104910969734192,
      "rewards/verify_math_reward/std": 0.48791125416755676,
      "step": 1375
    },
    {
      "clip_ratio/high_max": 0.0018039065389530151,
      "clip_ratio/high_mean": 0.0005342165708270841,
      "clip_ratio/low_mean": 0.00036941456937711337,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009036311448653578,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2808.0,
      "completions/mean_length": 594.372802734375,
      "completions/mean_terminated_length": 562.8265991210938,
      "completions/min_length": 11.0,
      "completions/min_terminated_length": 11.0,
      "epoch": 12.858559346748324,
      "grad_norm": 0.1298828125,
      "learning_rate": 1e-06,
      "loss": 0.0111,
      "num_tokens": 801233449.0,
      "reward": 0.5066964626312256,
      "reward_std": 0.2362728863954544,
      "rewards/verify_math_reward/mean": 0.5066964030265808,
      "rewards/verify_math_reward/std": 0.5002344250679016,
      "step": 1376
    },
    {
      "clip_ratio/high_max": 0.00152376087862649,
      "clip_ratio/high_mean": 0.000517280071562709,
      "clip_ratio/low_mean": 0.0004199211927016222,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009372012737003388,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2306.0,
      "completions/mean_length": 638.482177734375,
      "completions/mean_terminated_length": 579.6140747070312,
      "completions/min_length": 83.0,
      "completions/min_terminated_length": 83.0,
      "epoch": 12.867891513560805,
      "grad_norm": 0.1259765625,
      "learning_rate": 1e-06,
      "loss": 0.0111,
      "num_tokens": 801829073.0,
      "reward": 0.535714328289032,
      "reward_std": 0.22425049543380737,
      "rewards/verify_math_reward/mean": 0.5357142686843872,
      "rewards/verify_math_reward/std": 0.4990014135837555,
      "step": 1377
    },
    {
      "clip_ratio/high_max": 0.0013722723051614594,
      "clip_ratio/high_mean": 0.000370172837847349,
      "clip_ratio/low_mean": 0.0003418285873522109,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007120014261090546,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3839.0,
      "completions/mean_length": 688.7857666015625,
      "completions/mean_terminated_length": 610.9954223632812,
      "completions/min_length": 170.0,
      "completions/min_terminated_length": 170.0,
      "epoch": 12.877223680373287,
      "grad_norm": 0.12158203125,
      "learning_rate": 1e-06,
      "loss": 0.0028,
      "num_tokens": 802467361.0,
      "reward": 0.4799107313156128,
      "reward_std": 0.19035457074642181,
      "rewards/verify_math_reward/mean": 0.4799107015132904,
      "rewards/verify_math_reward/std": 0.4998752772808075,
      "step": 1378
    },
    {
      "clip_ratio/high_max": 0.0017724383887980366,
      "clip_ratio/high_mean": 0.0005452787702324713,
      "clip_ratio/low_mean": 0.00040946645935946435,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009547452236802201,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2504.0,
      "completions/mean_length": 598.6451416015625,
      "completions/mean_terminated_length": 563.158935546875,
      "completions/min_length": 106.0,
      "completions/min_terminated_length": 106.0,
      "epoch": 12.886555847185768,
      "grad_norm": 0.12255859375,
      "learning_rate": 1e-06,
      "loss": 0.0089,
      "num_tokens": 803063875.0,
      "reward": 0.5546875,
      "reward_std": 0.2230491042137146,
      "rewards/verify_math_reward/mean": 0.5546875,
      "rewards/verify_math_reward/std": 0.4972778558731079,
      "step": 1379
    },
    {
      "clip_ratio/high_max": 0.0019459667928458657,
      "clip_ratio/high_mean": 0.0006290923111009761,
      "clip_ratio/low_mean": 0.00042494892102240556,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010540412358750473,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3898.0,
      "completions/mean_length": 636.7533569335938,
      "completions/mean_terminated_length": 589.7952880859375,
      "completions/min_length": 127.0,
      "completions/min_terminated_length": 127.0,
      "epoch": 12.89588801399825,
      "grad_norm": 0.140625,
      "learning_rate": 1e-06,
      "loss": 0.0138,
      "num_tokens": 803671742.0,
      "reward": 0.5234375,
      "reward_std": 0.24822258949279785,
      "rewards/verify_math_reward/mean": 0.5234375,
      "rewards/verify_math_reward/std": 0.49972933530807495,
      "step": 1380
    },
    {
      "clip_ratio/high_max": 0.001682820825408271,
      "clip_ratio/high_mean": 0.0005148701404777967,
      "clip_ratio/low_mean": 0.0002758318174755914,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000790701953064854,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3577.0,
      "completions/mean_length": 646.4765625,
      "completions/mean_terminated_length": 595.6907958984375,
      "completions/min_length": 153.0,
      "completions/min_terminated_length": 153.0,
      "epoch": 12.905220180810732,
      "grad_norm": 0.111328125,
      "learning_rate": 1e-06,
      "loss": 0.0047,
      "num_tokens": 804295249.0,
      "reward": 0.5290178656578064,
      "reward_std": 0.20662352442741394,
      "rewards/verify_math_reward/mean": 0.5290178656578064,
      "rewards/verify_math_reward/std": 0.49943602085113525,
      "step": 1381
    },
    {
      "clip_ratio/high_max": 0.0016614306960036629,
      "clip_ratio/high_mean": 0.0005450017248449512,
      "clip_ratio/low_mean": 0.0003277860500929819,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008727877766432357,
      "completions/clipped_ratio": 0.006696428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3049.0,
      "completions/mean_length": 559.6350708007812,
      "completions/mean_terminated_length": 535.7943725585938,
      "completions/min_length": 18.0,
      "completions/min_terminated_length": 18.0,
      "epoch": 12.914552347623214,
      "grad_norm": 0.1298828125,
      "learning_rate": 1e-06,
      "loss": -0.007,
      "num_tokens": 804852650.0,
      "reward": 0.5892857313156128,
      "reward_std": 0.20760175585746765,
      "rewards/verify_math_reward/mean": 0.5892857313156128,
      "rewards/verify_math_reward/std": 0.49223825335502625,
      "step": 1382
    },
    {
      "clip_ratio/high_max": 0.0017409946558473166,
      "clip_ratio/high_mean": 0.0005558953894251317,
      "clip_ratio/low_mean": 0.00034601925892729923,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009019146436912706,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2556.0,
      "completions/mean_length": 577.6361694335938,
      "completions/mean_terminated_length": 529.8756103515625,
      "completions/min_length": 72.0,
      "completions/min_terminated_length": 72.0,
      "epoch": 12.923884514435695,
      "grad_norm": 0.1337890625,
      "learning_rate": 1e-06,
      "loss": 0.0049,
      "num_tokens": 805418948.0,
      "reward": 0.6305803656578064,
      "reward_std": 0.21684300899505615,
      "rewards/verify_math_reward/mean": 0.6305803656578064,
      "rewards/verify_math_reward/std": 0.4829172194004059,
      "step": 1383
    },
    {
      "clip_ratio/high_max": 0.001836070126046252,
      "clip_ratio/high_mean": 0.0005716071525512234,
      "clip_ratio/low_mean": 0.00031157654245816957,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008831836926219694,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3486.0,
      "completions/mean_length": 606.5234375,
      "completions/mean_terminated_length": 559.155029296875,
      "completions/min_length": 151.0,
      "completions/min_terminated_length": 151.0,
      "epoch": 12.933216681248178,
      "grad_norm": 0.12109375,
      "learning_rate": 1e-06,
      "loss": 0.008,
      "num_tokens": 806005665.0,
      "reward": 0.5558035969734192,
      "reward_std": 0.20342515408992767,
      "rewards/verify_math_reward/mean": 0.5558035969734192,
      "rewards/verify_math_reward/std": 0.49715372920036316,
      "step": 1384
    },
    {
      "clip_ratio/high_max": 0.0016403847803303506,
      "clip_ratio/high_mean": 0.0005073323721944689,
      "clip_ratio/low_mean": 0.00043890280744562915,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009462351745241904,
      "completions/clipped_ratio": 0.0234375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3239.0,
      "completions/mean_length": 646.333740234375,
      "completions/mean_terminated_length": 563.5416870117188,
      "completions/min_length": 128.0,
      "completions/min_terminated_length": 128.0,
      "epoch": 12.942548848060659,
      "grad_norm": 0.1298828125,
      "learning_rate": 1e-06,
      "loss": -0.0047,
      "num_tokens": 806587972.0,
      "reward": 0.566964328289032,
      "reward_std": 0.23066525161266327,
      "rewards/verify_math_reward/mean": 0.5669642686843872,
      "rewards/verify_math_reward/std": 0.49577224254608154,
      "step": 1385
    },
    {
      "clip_ratio/high_max": 0.001550680462969467,
      "clip_ratio/high_mean": 0.0004051927962791524,
      "clip_ratio/low_mean": 0.0003070536570248805,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007122464498934278,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3795.0,
      "completions/mean_length": 615.6495971679688,
      "completions/mean_terminated_length": 564.4099731445312,
      "completions/min_length": 107.0,
      "completions/min_terminated_length": 107.0,
      "epoch": 12.951881014873141,
      "grad_norm": 0.12109375,
      "learning_rate": 1e-06,
      "loss": -0.0001,
      "num_tokens": 807172562.0,
      "reward": 0.5245535969734192,
      "reward_std": 0.1925002932548523,
      "rewards/verify_math_reward/mean": 0.5245535969734192,
      "rewards/verify_math_reward/std": 0.4996756613254547,
      "step": 1386
    },
    {
      "clip_ratio/high_max": 0.0013207304273237241,
      "clip_ratio/high_mean": 0.00037152499521653226,
      "clip_ratio/low_mean": 0.0002873082819405681,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006588332821593212,
      "completions/clipped_ratio": 0.006696428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2246.0,
      "completions/mean_length": 544.3917846679688,
      "completions/mean_terminated_length": 520.4483032226562,
      "completions/min_length": 125.0,
      "completions/min_terminated_length": 125.0,
      "epoch": 12.961213181685622,
      "grad_norm": 0.12451171875,
      "learning_rate": 1e-06,
      "loss": 0.0025,
      "num_tokens": 807732345.0,
      "reward": 0.5524553656578064,
      "reward_std": 0.18080630898475647,
      "rewards/verify_math_reward/mean": 0.5524553656578064,
      "rewards/verify_math_reward/std": 0.49751853942871094,
      "step": 1387
    },
    {
      "clip_ratio/high_max": 0.001588572553373524,
      "clip_ratio/high_mean": 0.00046897146330593387,
      "clip_ratio/low_mean": 0.0003560639931947662,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008250354630945367,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3299.0,
      "completions/mean_length": 593.8895263671875,
      "completions/mean_terminated_length": 542.3295288085938,
      "completions/min_length": 129.0,
      "completions/min_terminated_length": 129.0,
      "epoch": 12.970545348498105,
      "grad_norm": 0.1298828125,
      "learning_rate": 1e-06,
      "loss": 0.0038,
      "num_tokens": 808298438.0,
      "reward": 0.520089328289032,
      "reward_std": 0.20482411980628967,
      "rewards/verify_math_reward/mean": 0.5200892686843872,
      "rewards/verify_math_reward/std": 0.4998753070831299,
      "step": 1388
    },
    {
      "clip_ratio/high_max": 0.00182184676759789,
      "clip_ratio/high_mean": 0.0005334302263690915,
      "clip_ratio/low_mean": 0.0003179357873932531,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008513660141034052,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3071.0,
      "completions/mean_length": 615.411865234375,
      "completions/mean_terminated_length": 535.9463500976562,
      "completions/min_length": 169.0,
      "completions/min_terminated_length": 169.0,
      "epoch": 12.979877515310585,
      "grad_norm": 0.1279296875,
      "learning_rate": 1e-06,
      "loss": 0.0049,
      "num_tokens": 808859143.0,
      "reward": 0.5524553656578064,
      "reward_std": 0.1967414915561676,
      "rewards/verify_math_reward/mean": 0.5524553656578064,
      "rewards/verify_math_reward/std": 0.49751853942871094,
      "step": 1389
    },
    {
      "clip_ratio/high_max": 0.001653337037168967,
      "clip_ratio/high_mean": 0.0005041189310759364,
      "clip_ratio/low_mean": 0.00041369527457391087,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009178142099699471,
      "completions/clipped_ratio": 0.0200892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3034.0,
      "completions/mean_length": 656.328125,
      "completions/mean_terminated_length": 585.8109741210938,
      "completions/min_length": 92.0,
      "completions/min_terminated_length": 92.0,
      "epoch": 12.989209682123068,
      "grad_norm": 0.12890625,
      "learning_rate": 1e-06,
      "loss": 0.027,
      "num_tokens": 809462053.0,
      "reward": 0.5569196939468384,
      "reward_std": 0.22368991374969482,
      "rewards/verify_math_reward/mean": 0.5569196343421936,
      "rewards/verify_math_reward/std": 0.4970270097255707,
      "step": 1390
    },
    {
      "clip_ratio/high_max": 0.0015501457201025914,
      "clip_ratio/high_mean": 0.000498158777190838,
      "clip_ratio/low_mean": 0.00030075828976805496,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007989170762812137,
      "completions/clipped_ratio": 0.011363636363636354,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3766.0,
      "completions/mean_length": 645.76708984375,
      "completions/mean_terminated_length": 606.1091918945312,
      "completions/min_length": 163.0,
      "completions/min_terminated_length": 163.0,
      "epoch": 12.998541848935549,
      "grad_norm": 0.11279296875,
      "learning_rate": 1e-06,
      "loss": -0.0101,
      "num_tokens": 810060617.0,
      "reward": 0.5033482313156128,
      "reward_std": 0.2123749852180481,
      "rewards/verify_math_reward/mean": 0.5033482313156128,
      "rewards/verify_math_reward/std": 0.5002680420875549,
      "step": 1391
    },
    {
      "clip_ratio/high_max": 0.0014908629564160947,
      "clip_ratio/high_mean": 0.00046155454947438557,
      "clip_ratio/low_mean": 0.0004029211452234449,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008644757017464144,
      "completions/clipped_ratio": 0.0200892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3670.0,
      "completions/mean_length": 638.1596069335938,
      "completions/mean_terminated_length": 567.2699584960938,
      "completions/min_length": 124.0,
      "completions/min_terminated_length": 124.0,
      "epoch": 13.009332166812483,
      "grad_norm": 0.1396484375,
      "learning_rate": 1e-06,
      "loss": 0.0074,
      "num_tokens": 810647328.0,
      "reward": 0.5223214626312256,
      "reward_std": 0.21372976899147034,
      "rewards/verify_math_reward/mean": 0.5223214030265808,
      "rewards/verify_math_reward/std": 0.49978047609329224,
      "step": 1392
    },
    {
      "clip_ratio/high_max": 0.0019903739121218678,
      "clip_ratio/high_mean": 0.000658585734981898,
      "clip_ratio/low_mean": 0.00034729962862911634,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001005885374979698,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3891.0,
      "completions/mean_length": 667.21875,
      "completions/mean_terminated_length": 588.93603515625,
      "completions/min_length": 161.0,
      "completions/min_terminated_length": 161.0,
      "epoch": 13.018664333624963,
      "grad_norm": 0.12158203125,
      "learning_rate": 1e-06,
      "loss": -0.0045,
      "num_tokens": 811242604.0,
      "reward": 0.5848214626312256,
      "reward_std": 0.2144126296043396,
      "rewards/verify_math_reward/mean": 0.5848214030265808,
      "rewards/verify_math_reward/std": 0.49302801489830017,
      "step": 1393
    },
    {
      "clip_ratio/high_max": 0.0017515275121695595,
      "clip_ratio/high_mean": 0.0005732004801757284,
      "clip_ratio/low_mean": 0.000437591222635092,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010107917132700095,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3998.0,
      "completions/mean_length": 641.200927734375,
      "completions/mean_terminated_length": 606.1465454101562,
      "completions/min_length": 135.0,
      "completions/min_terminated_length": 135.0,
      "epoch": 13.027996500437446,
      "grad_norm": 0.1376953125,
      "learning_rate": 1e-06,
      "loss": 0.0104,
      "num_tokens": 811870784.0,
      "reward": 0.4810267984867096,
      "reward_std": 0.24164429306983948,
      "rewards/verify_math_reward/mean": 0.4810267984867096,
      "rewards/verify_math_reward/std": 0.49991896748542786,
      "step": 1394
    },
    {
      "clip_ratio/high_max": 0.0017871686850412516,
      "clip_ratio/high_mean": 0.0005313890735578752,
      "clip_ratio/low_mean": 0.00039288076459342847,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000924269836559688,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2221.0,
      "completions/mean_length": 547.8861694335938,
      "completions/mean_terminated_length": 511.8849792480469,
      "completions/min_length": 90.0,
      "completions/min_terminated_length": 90.0,
      "epoch": 13.037328667249927,
      "grad_norm": 0.12890625,
      "learning_rate": 1e-06,
      "loss": 0.001,
      "num_tokens": 812412354.0,
      "reward": 0.5736607313156128,
      "reward_std": 0.18922775983810425,
      "rewards/verify_math_reward/mean": 0.5736607313156128,
      "rewards/verify_math_reward/std": 0.4948205351829529,
      "step": 1395
    },
    {
      "clip_ratio/high_max": 0.0012242884558872902,
      "clip_ratio/high_mean": 0.00033330466567349504,
      "clip_ratio/low_mean": 0.0003139990266163295,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006473036974057322,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3274.0,
      "completions/mean_length": 601.6160888671875,
      "completions/mean_terminated_length": 538.081787109375,
      "completions/min_length": 144.0,
      "completions/min_terminated_length": 144.0,
      "epoch": 13.04666083406241,
      "grad_norm": 0.11767578125,
      "learning_rate": 1e-06,
      "loss": 0.0004,
      "num_tokens": 812977906.0,
      "reward": 0.5234375,
      "reward_std": 0.16010887920856476,
      "rewards/verify_math_reward/mean": 0.5234375,
      "rewards/verify_math_reward/std": 0.49972933530807495,
      "step": 1396
    },
    {
      "clip_ratio/high_max": 0.0014899282905389555,
      "clip_ratio/high_mean": 0.00045767979509037104,
      "clip_ratio/low_mean": 0.0003265507307332882,
      "clip_ratio/low_min": 1.0434057003294583e-05,
      "clip_ratio/region_mean": 0.0007842305321901222,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2518.0,
      "completions/mean_length": 578.6585083007812,
      "completions/mean_terminated_length": 534.9401245117188,
      "completions/min_length": 117.0,
      "completions/min_terminated_length": 117.0,
      "epoch": 13.05599300087489,
      "grad_norm": 0.123046875,
      "learning_rate": 1e-06,
      "loss": 0.0131,
      "num_tokens": 813544280.0,
      "reward": 0.6026785969734192,
      "reward_std": 0.21176443994045258,
      "rewards/verify_math_reward/mean": 0.6026785969734192,
      "rewards/verify_math_reward/std": 0.48961687088012695,
      "step": 1397
    },
    {
      "clip_ratio/high_max": 0.00198770187489572,
      "clip_ratio/high_mean": 0.0006603341907975846,
      "clip_ratio/low_mean": 0.0004361434478141746,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010964776374748908,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2008.0,
      "completions/mean_length": 573.0736694335938,
      "completions/mean_terminated_length": 533.3115234375,
      "completions/min_length": 142.0,
      "completions/min_terminated_length": 142.0,
      "epoch": 13.065325167687373,
      "grad_norm": 0.1611328125,
      "learning_rate": 1e-06,
      "loss": -0.0036,
      "num_tokens": 814094682.0,
      "reward": 0.6049107313156128,
      "reward_std": 0.26373884081840515,
      "rewards/verify_math_reward/mean": 0.6049107313156128,
      "rewards/verify_math_reward/std": 0.48914292454719543,
      "step": 1398
    },
    {
      "clip_ratio/high_max": 0.0017021954217852908,
      "clip_ratio/high_mean": 0.0005294370050705766,
      "clip_ratio/low_mean": 0.0003776409980673634,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009070780097317765,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3772.0,
      "completions/mean_length": 606.1317138671875,
      "completions/mean_terminated_length": 562.7548217773438,
      "completions/min_length": 175.0,
      "completions/min_terminated_length": 175.0,
      "epoch": 13.074657334499854,
      "grad_norm": 0.1396484375,
      "learning_rate": 1e-06,
      "loss": 0.0073,
      "num_tokens": 814681144.0,
      "reward": 0.546875,
      "reward_std": 0.21835143864154816,
      "rewards/verify_math_reward/mean": 0.546875,
      "rewards/verify_math_reward/std": 0.4980759024620056,
      "step": 1399
    },
    {
      "clip_ratio/high_max": 0.0017483852880104678,
      "clip_ratio/high_mean": 0.0004886082367647759,
      "clip_ratio/low_mean": 0.0003104416707628843,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007990499070729129,
      "completions/clipped_ratio": 0.025669642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3630.0,
      "completions/mean_length": 634.896240234375,
      "completions/mean_terminated_length": 543.710205078125,
      "completions/min_length": 3.0,
      "completions/min_terminated_length": 3.0,
      "epoch": 13.083989501312336,
      "grad_norm": 0.1298828125,
      "learning_rate": 1e-06,
      "loss": -0.0028,
      "num_tokens": 815252739.0,
      "reward": 0.5625,
      "reward_std": 0.22161512076854706,
      "rewards/verify_math_reward/mean": 0.5625,
      "rewards/verify_math_reward/std": 0.49635544419288635,
      "step": 1400
    },
    {
      "clip_ratio/high_max": 0.0018322273699595826,
      "clip_ratio/high_mean": 0.0005806700842185819,
      "clip_ratio/low_mean": 0.0003205304074072046,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009012004911710392,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3697.0,
      "completions/mean_length": 640.1529541015625,
      "completions/mean_terminated_length": 597.1989135742188,
      "completions/min_length": 128.0,
      "completions/min_terminated_length": 128.0,
      "epoch": 13.093321668124817,
      "grad_norm": 0.126953125,
      "learning_rate": 1e-06,
      "loss": 0.0086,
      "num_tokens": 815863092.0,
      "reward": 0.5558035969734192,
      "reward_std": 0.2427729368209839,
      "rewards/verify_math_reward/mean": 0.5558035969734192,
      "rewards/verify_math_reward/std": 0.49715372920036316,
      "step": 1401
    },
    {
      "clip_ratio/high_max": 0.0016442310152342543,
      "clip_ratio/high_mean": 0.0005161672067970358,
      "clip_ratio/low_mean": 0.00031519197102625185,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008313591815749533,
      "completions/clipped_ratio": 0.0078125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3790.0,
      "completions/mean_length": 600.4319458007812,
      "completions/mean_terminated_length": 572.9077758789062,
      "completions/min_length": 137.0,
      "completions/min_terminated_length": 137.0,
      "epoch": 13.1026538349373,
      "grad_norm": 0.12158203125,
      "learning_rate": 1e-06,
      "loss": 0.0045,
      "num_tokens": 816464631.0,
      "reward": 0.5993303656578064,
      "reward_std": 0.1950497031211853,
      "rewards/verify_math_reward/mean": 0.5993303656578064,
      "rewards/verify_math_reward/std": 0.49030786752700806,
      "step": 1402
    },
    {
      "clip_ratio/high_max": 0.001461252688386594,
      "clip_ratio/high_mean": 0.0003993860646005487,
      "clip_ratio/low_mean": 0.00027730073372822517,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006766867950318556,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3808.0,
      "completions/mean_length": 595.8359375,
      "completions/mean_terminated_length": 544.3046264648438,
      "completions/min_length": 40.0,
      "completions/min_terminated_length": 40.0,
      "epoch": 13.11198600174978,
      "grad_norm": 0.1103515625,
      "learning_rate": 1e-06,
      "loss": -0.0031,
      "num_tokens": 817040612.0,
      "reward": 0.5223214626312256,
      "reward_std": 0.17084869742393494,
      "rewards/verify_math_reward/mean": 0.5223214030265808,
      "rewards/verify_math_reward/std": 0.49978047609329224,
      "step": 1403
    },
    {
      "clip_ratio/high_max": 0.0015426845184265403,
      "clip_ratio/high_mean": 0.0004398583009788126,
      "clip_ratio/low_mean": 0.00029567180195044784,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007355300995186553,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2705.0,
      "completions/mean_length": 625.4710083007812,
      "completions/mean_terminated_length": 570.3832397460938,
      "completions/min_length": 128.0,
      "completions/min_terminated_length": 128.0,
      "epoch": 13.121318168562263,
      "grad_norm": 0.130859375,
      "learning_rate": 1e-06,
      "loss": 0.006,
      "num_tokens": 817635210.0,
      "reward": 0.5323660969734192,
      "reward_std": 0.20688749849796295,
      "rewards/verify_math_reward/mean": 0.5323660969734192,
      "rewards/verify_math_reward/std": 0.4992299973964691,
      "step": 1404
    },
    {
      "clip_ratio/high_max": 0.0011363478852217668,
      "clip_ratio/high_mean": 0.00033415246343793115,
      "clip_ratio/low_mean": 0.0003630368685207941,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006971893290028675,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4014.0,
      "completions/mean_length": 694.427490234375,
      "completions/mean_terminated_length": 636.511962890625,
      "completions/min_length": 139.0,
      "completions/min_terminated_length": 139.0,
      "epoch": 13.130650335374744,
      "grad_norm": 0.1083984375,
      "learning_rate": 1e-06,
      "loss": 0.0132,
      "num_tokens": 818291897.0,
      "reward": 0.4497767984867096,
      "reward_std": 0.19058279693126678,
      "rewards/verify_math_reward/mean": 0.4497767984867096,
      "rewards/verify_math_reward/std": 0.49774909019470215,
      "step": 1405
    },
    {
      "clip_ratio/high_max": 0.0014495681762127788,
      "clip_ratio/high_mean": 0.00044815292858402245,
      "clip_ratio/low_mean": 0.0003247402498800511,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000772893182329426,
      "completions/clipped_ratio": 0.025669642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3978.0,
      "completions/mean_length": 674.888427734375,
      "completions/mean_terminated_length": 584.7560424804688,
      "completions/min_length": 72.0,
      "completions/min_terminated_length": 72.0,
      "epoch": 13.139982502187227,
      "grad_norm": 0.12060546875,
      "learning_rate": 1e-06,
      "loss": 0.0005,
      "num_tokens": 818896533.0,
      "reward": 0.5502232313156128,
      "reward_std": 0.20932631194591522,
      "rewards/verify_math_reward/mean": 0.5502232313156128,
      "rewards/verify_math_reward/std": 0.49774909019470215,
      "step": 1406
    },
    {
      "clip_ratio/high_max": 0.0020581455355568323,
      "clip_ratio/high_mean": 0.0005896722814213717,
      "clip_ratio/low_mean": 0.0003218375377400662,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009115098077927541,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2483.0,
      "completions/mean_length": 592.114990234375,
      "completions/mean_terminated_length": 536.4977416992188,
      "completions/min_length": 125.0,
      "completions/min_terminated_length": 125.0,
      "epoch": 13.149314668999708,
      "grad_norm": 0.130859375,
      "learning_rate": 1e-06,
      "loss": 0.0038,
      "num_tokens": 819450604.0,
      "reward": 0.5725446939468384,
      "reward_std": 0.21711428463459015,
      "rewards/verify_math_reward/mean": 0.5725446343421936,
      "rewards/verify_math_reward/std": 0.49498558044433594,
      "step": 1407
    },
    {
      "clip_ratio/high_max": 0.00174430378319812,
      "clip_ratio/high_mean": 0.00046465968171105487,
      "clip_ratio/low_mean": 0.0003192824785855919,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007839421646167466,
      "completions/clipped_ratio": 0.030133928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2591.0,
      "completions/mean_length": 665.7957763671875,
      "completions/mean_terminated_length": 559.2186279296875,
      "completions/min_length": 17.0,
      "completions/min_terminated_length": 17.0,
      "epoch": 13.15864683581219,
      "grad_norm": 0.12890625,
      "learning_rate": 1e-06,
      "loss": 0.0086,
      "num_tokens": 820030469.0,
      "reward": 0.5223214626312256,
      "reward_std": 0.19602863490581512,
      "rewards/verify_math_reward/mean": 0.5223214030265808,
      "rewards/verify_math_reward/std": 0.49978047609329224,
      "step": 1408
    },
    {
      "clip_ratio/high_max": 0.0016913519211811945,
      "clip_ratio/high_mean": 0.0004851412346624784,
      "clip_ratio/low_mean": 0.0002790674901689272,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007642087211934268,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3897.0,
      "completions/mean_length": 587.4342041015625,
      "completions/mean_terminated_length": 535.7791748046875,
      "completions/min_length": 2.0,
      "completions/min_terminated_length": 2.0,
      "epoch": 13.167979002624673,
      "grad_norm": 0.1201171875,
      "learning_rate": 1e-06,
      "loss": 0.0024,
      "num_tokens": 820610154.0,
      "reward": 0.4877232313156128,
      "reward_std": 0.17490725219249725,
      "rewards/verify_math_reward/mean": 0.4877232015132904,
      "rewards/verify_math_reward/std": 0.500128448009491,
      "step": 1409
    },
    {
      "clip_ratio/high_max": 0.0017227934913535137,
      "clip_ratio/high_mean": 0.0005312004236657231,
      "clip_ratio/low_mean": 0.0003712047946464736,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009024052496897639,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2800.0,
      "completions/mean_length": 584.6261596679688,
      "completions/mean_terminated_length": 532.9297485351562,
      "completions/min_length": 112.0,
      "completions/min_terminated_length": 112.0,
      "epoch": 13.177311169437154,
      "grad_norm": 0.1328125,
      "learning_rate": 1e-06,
      "loss": -0.0039,
      "num_tokens": 821174931.0,
      "reward": 0.6350446939468384,
      "reward_std": 0.20339377224445343,
      "rewards/verify_math_reward/mean": 0.6350446343421936,
      "rewards/verify_math_reward/std": 0.481686532497406,
      "step": 1410
    },
    {
      "clip_ratio/high_max": 0.001779132331648725,
      "clip_ratio/high_mean": 0.0005360412642403389,
      "clip_ratio/low_mean": 0.0004373191009108268,
      "clip_ratio/low_min": 1.0632867997628637e-05,
      "clip_ratio/region_mean": 0.0009733603583299555,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3741.0,
      "completions/mean_length": 596.0971069335938,
      "completions/mean_terminated_length": 528.4083862304688,
      "completions/min_length": 42.0,
      "completions/min_terminated_length": 42.0,
      "epoch": 13.186643336249636,
      "grad_norm": 0.140625,
      "learning_rate": 1e-06,
      "loss": 0.0015,
      "num_tokens": 821723786.0,
      "reward": 0.5892857313156128,
      "reward_std": 0.23736734688282013,
      "rewards/verify_math_reward/mean": 0.5892857313156128,
      "rewards/verify_math_reward/std": 0.49223825335502625,
      "step": 1411
    },
    {
      "clip_ratio/high_max": 0.0015953919264575234,
      "clip_ratio/high_mean": 0.0004669561702712599,
      "clip_ratio/low_mean": 0.0004391115152202474,
      "clip_ratio/low_min": 1.5401676137116738e-05,
      "clip_ratio/region_mean": 0.0009060676734407025,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2364.0,
      "completions/mean_length": 606.1015625,
      "completions/mean_terminated_length": 570.6910400390625,
      "completions/min_length": 45.0,
      "completions/min_terminated_length": 45.0,
      "epoch": 13.195975503062117,
      "grad_norm": 0.126953125,
      "learning_rate": 1e-06,
      "loss": 0.0017,
      "num_tokens": 822324837.0,
      "reward": 0.470982164144516,
      "reward_std": 0.22590160369873047,
      "rewards/verify_math_reward/mean": 0.4709821343421936,
      "rewards/verify_math_reward/std": 0.49943602085113525,
      "step": 1412
    },
    {
      "clip_ratio/high_max": 0.0014832056222076062,
      "clip_ratio/high_mean": 0.00043488904896094027,
      "clip_ratio/low_mean": 0.0003625766225923144,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007974656627993681,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4049.0,
      "completions/mean_length": 597.9230346679688,
      "completions/mean_terminated_length": 558.4413452148438,
      "completions/min_length": 132.0,
      "completions/min_terminated_length": 132.0,
      "epoch": 13.2053076698746,
      "grad_norm": 0.1328125,
      "learning_rate": 1e-06,
      "loss": 0.0152,
      "num_tokens": 822908888.0,
      "reward": 0.5569196939468384,
      "reward_std": 0.21504385769367218,
      "rewards/verify_math_reward/mean": 0.5569196343421936,
      "rewards/verify_math_reward/std": 0.4970270097255707,
      "step": 1413
    },
    {
      "clip_ratio/high_max": 0.0016544329637326882,
      "clip_ratio/high_mean": 0.0005267449384973588,
      "clip_ratio/low_mean": 0.000406088698809981,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009328336482212762,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2479.0,
      "completions/mean_length": 630.1160888671875,
      "completions/mean_terminated_length": 563.0853271484375,
      "completions/min_length": 98.0,
      "completions/min_terminated_length": 98.0,
      "epoch": 13.21463983668708,
      "grad_norm": 0.150390625,
      "learning_rate": 1e-06,
      "loss": -0.0033,
      "num_tokens": 823492536.0,
      "reward": 0.5446428656578064,
      "reward_std": 0.24904923141002655,
      "rewards/verify_math_reward/mean": 0.5446428656578064,
      "rewards/verify_math_reward/std": 0.4982811510562897,
      "step": 1414
    },
    {
      "clip_ratio/high_max": 0.0016178957084775902,
      "clip_ratio/high_mean": 0.0004918382628602558,
      "clip_ratio/low_mean": 0.00030420989901358553,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007960481489135418,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3425.0,
      "completions/mean_length": 647.59375,
      "completions/mean_terminated_length": 588.880859375,
      "completions/min_length": 144.0,
      "completions/min_terminated_length": 144.0,
      "epoch": 13.223972003499563,
      "grad_norm": 0.11962890625,
      "learning_rate": 1e-06,
      "loss": 0.0033,
      "num_tokens": 824107836.0,
      "reward": 0.5736607313156128,
      "reward_std": 0.1931743025779724,
      "rewards/verify_math_reward/mean": 0.5736607313156128,
      "rewards/verify_math_reward/std": 0.4948205351829529,
      "step": 1415
    },
    {
      "clip_ratio/high_max": 0.0018230609248348628,
      "clip_ratio/high_mean": 0.000527394566233852,
      "clip_ratio/low_mean": 0.0003095360673341929,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008369306497115758,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3961.0,
      "completions/mean_length": 553.671875,
      "completions/mean_terminated_length": 509.6429443359375,
      "completions/min_length": 120.0,
      "completions/min_terminated_length": 120.0,
      "epoch": 13.233304170312044,
      "grad_norm": 0.1396484375,
      "learning_rate": 1e-06,
      "loss": -0.0047,
      "num_tokens": 824654734.0,
      "reward": 0.5569196939468384,
      "reward_std": 0.19846926629543304,
      "rewards/verify_math_reward/mean": 0.5569196343421936,
      "rewards/verify_math_reward/std": 0.4970270097255707,
      "step": 1416
    },
    {
      "clip_ratio/high_max": 0.0016410772277595242,
      "clip_ratio/high_mean": 0.0005012257963699085,
      "clip_ratio/low_mean": 0.00030725744647952524,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008084832384156471,
      "completions/clipped_ratio": 0.005580357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2627.0,
      "completions/mean_length": 591.833740234375,
      "completions/mean_terminated_length": 572.1694946289062,
      "completions/min_length": 26.0,
      "completions/min_terminated_length": 26.0,
      "epoch": 13.242636337124527,
      "grad_norm": 0.1220703125,
      "learning_rate": 1e-06,
      "loss": 0.0046,
      "num_tokens": 825244777.0,
      "reward": 0.5758928656578064,
      "reward_std": 0.20064888894557953,
      "rewards/verify_math_reward/mean": 0.5758928656578064,
      "rewards/verify_math_reward/std": 0.49448275566101074,
      "step": 1417
    },
    {
      "clip_ratio/high_max": 0.0015912118806227227,
      "clip_ratio/high_mean": 0.00045222694188851165,
      "clip_ratio/low_mean": 0.0002968669411416158,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007490938860428287,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2554.0,
      "completions/mean_length": 616.5569458007812,
      "completions/mean_terminated_length": 565.3306884765625,
      "completions/min_length": 118.0,
      "completions/min_terminated_length": 118.0,
      "epoch": 13.251968503937007,
      "grad_norm": 0.1103515625,
      "learning_rate": 1e-06,
      "loss": 0.0106,
      "num_tokens": 825837188.0,
      "reward": 0.5558035969734192,
      "reward_std": 0.18659871816635132,
      "rewards/verify_math_reward/mean": 0.5558035969734192,
      "rewards/verify_math_reward/std": 0.49715372920036316,
      "step": 1418
    },
    {
      "clip_ratio/high_max": 0.001553700647491496,
      "clip_ratio/high_mean": 0.0005001623684393053,
      "clip_ratio/low_mean": 0.0003352726730554423,
      "clip_ratio/low_min": 1.1007397006324027e-05,
      "clip_ratio/region_mean": 0.0008354350493391394,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2727.0,
      "completions/mean_length": 637.638427734375,
      "completions/mean_terminated_length": 594.6531372070312,
      "completions/min_length": 97.0,
      "completions/min_terminated_length": 97.0,
      "epoch": 13.26130067074949,
      "grad_norm": 0.12060546875,
      "learning_rate": 1e-06,
      "loss": 0.0104,
      "num_tokens": 826452280.0,
      "reward": 0.543526828289032,
      "reward_std": 0.21951329708099365,
      "rewards/verify_math_reward/mean": 0.5435267686843872,
      "rewards/verify_math_reward/std": 0.49838000535964966,
      "step": 1419
    },
    {
      "clip_ratio/high_max": 0.0013382088554863003,
      "clip_ratio/high_mean": 0.0004431610839219502,
      "clip_ratio/low_mean": 0.0003010817285939993,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007442428141075652,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3921.0,
      "completions/mean_length": 586.2745971679688,
      "completions/mean_terminated_length": 522.4613647460938,
      "completions/min_length": 157.0,
      "completions/min_terminated_length": 157.0,
      "epoch": 13.27063283756197,
      "grad_norm": 0.1337890625,
      "learning_rate": 1e-06,
      "loss": -0.0048,
      "num_tokens": 827006926.0,
      "reward": 0.6316964626312256,
      "reward_std": 0.2113974541425705,
      "rewards/verify_math_reward/mean": 0.6316964030265808,
      "rewards/verify_math_reward/std": 0.4826137125492096,
      "step": 1420
    },
    {
      "clip_ratio/high_max": 0.0016555947495362489,
      "clip_ratio/high_mean": 0.00048758478237687086,
      "clip_ratio/low_mean": 0.00029536799843299377,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007829527758076438,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3334.0,
      "completions/mean_length": 552.4810791015625,
      "completions/mean_terminated_length": 504.37896728515625,
      "completions/min_length": 80.0,
      "completions/min_terminated_length": 80.0,
      "epoch": 13.279965004374453,
      "grad_norm": 0.12255859375,
      "learning_rate": 1e-06,
      "loss": -0.0096,
      "num_tokens": 827533645.0,
      "reward": 0.5625,
      "reward_std": 0.1928700953722,
      "rewards/verify_math_reward/mean": 0.5625,
      "rewards/verify_math_reward/std": 0.49635544419288635,
      "step": 1421
    },
    {
      "clip_ratio/high_max": 0.00183762901724549,
      "clip_ratio/high_mean": 0.0005509638019702834,
      "clip_ratio/low_mean": 0.0003238330432395742,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008747968468014733,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3772.0,
      "completions/mean_length": 615.818115234375,
      "completions/mean_terminated_length": 556.5641479492188,
      "completions/min_length": 70.0,
      "completions/min_terminated_length": 70.0,
      "epoch": 13.289297171186934,
      "grad_norm": 0.11962890625,
      "learning_rate": 1e-06,
      "loss": -0.0046,
      "num_tokens": 828116050.0,
      "reward": 0.5446428656578064,
      "reward_std": 0.20658259093761444,
      "rewards/verify_math_reward/mean": 0.5446428656578064,
      "rewards/verify_math_reward/std": 0.4982811510562897,
      "step": 1422
    },
    {
      "clip_ratio/high_max": 0.001617481862922432,
      "clip_ratio/high_mean": 0.0004946846997881948,
      "clip_ratio/low_mean": 0.00028170491623313865,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000776389628299512,
      "completions/clipped_ratio": 0.0200892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3567.0,
      "completions/mean_length": 621.8560791015625,
      "completions/mean_terminated_length": 550.6321411132812,
      "completions/min_length": 95.0,
      "completions/min_terminated_length": 95.0,
      "epoch": 13.298629337999417,
      "grad_norm": 0.1279296875,
      "learning_rate": 1e-06,
      "loss": 0.0096,
      "num_tokens": 828688833.0,
      "reward": 0.59375,
      "reward_std": 0.21330790221691132,
      "rewards/verify_math_reward/mean": 0.59375,
      "rewards/verify_math_reward/std": 0.4914066195487976,
      "step": 1423
    },
    {
      "clip_ratio/high_max": 0.001688760930846911,
      "clip_ratio/high_mean": 0.0005318934149727284,
      "clip_ratio/low_mean": 0.0003530990504714282,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008849924656715302,
      "completions/clipped_ratio": 0.004464285714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3614.0,
      "completions/mean_length": 588.5614013671875,
      "completions/mean_terminated_length": 572.8330078125,
      "completions/min_length": 106.0,
      "completions/min_terminated_length": 106.0,
      "epoch": 13.307961504811898,
      "grad_norm": 0.134765625,
      "learning_rate": 1e-06,
      "loss": 0.0083,
      "num_tokens": 829293224.0,
      "reward": 0.5212053656578064,
      "reward_std": 0.22063983976840973,
      "rewards/verify_math_reward/mean": 0.5212053656578064,
      "rewards/verify_math_reward/std": 0.49982914328575134,
      "step": 1424
    },
    {
      "clip_ratio/high_max": 0.0016055243195296498,
      "clip_ratio/high_mean": 0.00042092477588084876,
      "clip_ratio/low_mean": 0.0003820010576873756,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008029258333408507,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3871.0,
      "completions/mean_length": 624.0982666015625,
      "completions/mean_terminated_length": 572.9829711914062,
      "completions/min_length": 129.0,
      "completions/min_terminated_length": 129.0,
      "epoch": 13.31729367162438,
      "grad_norm": 0.12353515625,
      "learning_rate": 1e-06,
      "loss": 0.0233,
      "num_tokens": 829879392.0,
      "reward": 0.4665178656578064,
      "reward_std": 0.19441214203834534,
      "rewards/verify_math_reward/mean": 0.4665178656578064,
      "rewards/verify_math_reward/std": 0.49915632605552673,
      "step": 1425
    },
    {
      "clip_ratio/high_max": 0.001567191261528933,
      "clip_ratio/high_mean": 0.000487123599214101,
      "clip_ratio/low_mean": 0.0003807999275977636,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008679235115778283,
      "completions/clipped_ratio": 0.006696428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2740.0,
      "completions/mean_length": 571.1964721679688,
      "completions/mean_terminated_length": 547.4337158203125,
      "completions/min_length": 95.0,
      "completions/min_terminated_length": 95.0,
      "epoch": 13.326625838436861,
      "grad_norm": 0.123046875,
      "learning_rate": 1e-06,
      "loss": 0.0068,
      "num_tokens": 830451184.0,
      "reward": 0.578125,
      "reward_std": 0.19625869393348694,
      "rewards/verify_math_reward/mean": 0.578125,
      "rewards/verify_math_reward/std": 0.4941346049308777,
      "step": 1426
    },
    {
      "clip_ratio/high_max": 0.0018317081994609907,
      "clip_ratio/high_mean": 0.0005963384990081977,
      "clip_ratio/low_mean": 0.00035139673855155706,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009477352523390437,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3920.0,
      "completions/mean_length": 665.9609375,
      "completions/mean_terminated_length": 587.6495361328125,
      "completions/min_length": 89.0,
      "completions/min_terminated_length": 89.0,
      "epoch": 13.335958005249344,
      "grad_norm": 0.1328125,
      "learning_rate": 1e-06,
      "loss": -0.0229,
      "num_tokens": 831052829.0,
      "reward": 0.5368303656578064,
      "reward_std": 0.23206490278244019,
      "rewards/verify_math_reward/mean": 0.5368303656578064,
      "rewards/verify_math_reward/std": 0.49892017245292664,
      "step": 1427
    },
    {
      "clip_ratio/high_max": 0.0015894861026026774,
      "clip_ratio/high_mean": 0.00048236274869850604,
      "clip_ratio/low_mean": 0.0003868053936457727,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008691681387062999,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4095.0,
      "completions/mean_length": 627.0859375,
      "completions/mean_terminated_length": 579.9966430664062,
      "completions/min_length": 130.0,
      "completions/min_terminated_length": 130.0,
      "epoch": 13.345290172061826,
      "grad_norm": 0.12353515625,
      "learning_rate": 1e-06,
      "loss": 0.0038,
      "num_tokens": 831653826.0,
      "reward": 0.5424107313156128,
      "reward_std": 0.22409965097904205,
      "rewards/verify_math_reward/mean": 0.5424107313156128,
      "rewards/verify_math_reward/std": 0.4984763264656067,
      "step": 1428
    },
    {
      "clip_ratio/high_max": 0.001791535582015058,
      "clip_ratio/high_mean": 0.0005492749760378501,
      "clip_ratio/low_mean": 0.00029483045170763944,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008441054242211976,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3727.0,
      "completions/mean_length": 574.4721069335938,
      "completions/mean_terminated_length": 522.6262817382812,
      "completions/min_length": 114.0,
      "completions/min_terminated_length": 114.0,
      "epoch": 13.354622338874307,
      "grad_norm": 0.134765625,
      "learning_rate": 1e-06,
      "loss": 0.0092,
      "num_tokens": 832197241.0,
      "reward": 0.6104910969734192,
      "reward_std": 0.1893460899591446,
      "rewards/verify_math_reward/mean": 0.6104910969734192,
      "rewards/verify_math_reward/std": 0.48791128396987915,
      "step": 1429
    },
    {
      "clip_ratio/high_max": 0.0019983718993898947,
      "clip_ratio/high_mean": 0.0005938726353633683,
      "clip_ratio/low_mean": 0.00038011708011254086,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009739897222971194,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2997.0,
      "completions/mean_length": 597.3225708007812,
      "completions/mean_terminated_length": 545.8131103515625,
      "completions/min_length": 130.0,
      "completions/min_terminated_length": 130.0,
      "epoch": 13.36395450568679,
      "grad_norm": 0.130859375,
      "learning_rate": 1e-06,
      "loss": 0.0015,
      "num_tokens": 832759258.0,
      "reward": 0.582589328289032,
      "reward_std": 0.21831823885440826,
      "rewards/verify_math_reward/mean": 0.5825892686843872,
      "rewards/verify_math_reward/std": 0.493407279253006,
      "step": 1430
    },
    {
      "clip_ratio/high_max": 0.0020299209836593946,
      "clip_ratio/high_mean": 0.000690493546471771,
      "clip_ratio/low_mean": 0.0003531034808474942,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010435970270918915,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2821.0,
      "completions/mean_length": 543.1473388671875,
      "completions/mean_terminated_length": 503.04742431640625,
      "completions/min_length": 83.0,
      "completions/min_terminated_length": 83.0,
      "epoch": 13.37328667249927,
      "grad_norm": 0.14453125,
      "learning_rate": 1e-06,
      "loss": 0.0112,
      "num_tokens": 833300054.0,
      "reward": 0.598214328289032,
      "reward_std": 0.21846021711826324,
      "rewards/verify_math_reward/mean": 0.5982142686843872,
      "rewards/verify_math_reward/std": 0.49053290486335754,
      "step": 1431
    },
    {
      "clip_ratio/high_max": 0.0014582962166969082,
      "clip_ratio/high_mean": 0.0004073754737419222,
      "clip_ratio/low_mean": 0.0004089128991608959,
      "clip_ratio/low_min": 1.6267569662886672e-05,
      "clip_ratio/region_mean": 0.0008162883814293309,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4060.0,
      "completions/mean_length": 604.2421875,
      "completions/mean_terminated_length": 564.8318481445312,
      "completions/min_length": 25.0,
      "completions/min_terminated_length": 25.0,
      "epoch": 13.382618839311753,
      "grad_norm": 0.1279296875,
      "learning_rate": 1e-06,
      "loss": 0.0026,
      "num_tokens": 833892503.0,
      "reward": 0.5524553656578064,
      "reward_std": 0.2141755372285843,
      "rewards/verify_math_reward/mean": 0.5524553656578064,
      "rewards/verify_math_reward/std": 0.49751853942871094,
      "step": 1432
    },
    {
      "clip_ratio/high_max": 0.0016004578310457873,
      "clip_ratio/high_mean": 0.0005155829067007289,
      "clip_ratio/low_mean": 0.00028066771028534276,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007962506188050611,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3435.0,
      "completions/mean_length": 611.8671875,
      "completions/mean_terminated_length": 548.519287109375,
      "completions/min_length": 163.0,
      "completions/min_terminated_length": 163.0,
      "epoch": 13.391951006124234,
      "grad_norm": 0.1220703125,
      "learning_rate": 1e-06,
      "loss": 0.0063,
      "num_tokens": 834469672.0,
      "reward": 0.5803571939468384,
      "reward_std": 0.20001837611198425,
      "rewards/verify_math_reward/mean": 0.5803571343421936,
      "rewards/verify_math_reward/std": 0.4937761425971985,
      "step": 1433
    },
    {
      "clip_ratio/high_max": 0.0018618465674080653,
      "clip_ratio/high_mean": 0.0005791701880752953,
      "clip_ratio/low_mean": 0.00035893432254852087,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009381045229019946,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3498.0,
      "completions/mean_length": 556.9710083007812,
      "completions/mean_terminated_length": 504.86749267578125,
      "completions/min_length": 111.0,
      "completions/min_terminated_length": 111.0,
      "epoch": 13.401283172936717,
      "grad_norm": 0.1376953125,
      "learning_rate": 1e-06,
      "loss": 0.0184,
      "num_tokens": 835007846.0,
      "reward": 0.6171875,
      "reward_std": 0.20035536587238312,
      "rewards/verify_math_reward/mean": 0.6171875,
      "rewards/verify_math_reward/std": 0.4863446056842804,
      "step": 1434
    },
    {
      "clip_ratio/high_max": 0.0018208273340860615,
      "clip_ratio/high_mean": 0.0005466261397941707,
      "clip_ratio/low_mean": 0.0003893457421781932,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009359718769701431,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3442.0,
      "completions/mean_length": 598.4933471679688,
      "completions/mean_terminated_length": 547.0010986328125,
      "completions/min_length": 172.0,
      "completions/min_terminated_length": 172.0,
      "epoch": 13.410615339749198,
      "grad_norm": 0.1259765625,
      "learning_rate": 1e-06,
      "loss": 0.0017,
      "num_tokens": 835570056.0,
      "reward": 0.5892857313156128,
      "reward_std": 0.20816446840763092,
      "rewards/verify_math_reward/mean": 0.5892857313156128,
      "rewards/verify_math_reward/std": 0.49223825335502625,
      "step": 1435
    },
    {
      "clip_ratio/high_max": 0.002047443776973523,
      "clip_ratio/high_mean": 0.0006194652796693845,
      "clip_ratio/low_mean": 0.00033381256344000576,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009532778394714114,
      "completions/clipped_ratio": 0.0200892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3837.0,
      "completions/mean_length": 628.4654541015625,
      "completions/mean_terminated_length": 557.3770141601562,
      "completions/min_length": 74.0,
      "completions/min_terminated_length": 74.0,
      "epoch": 13.41994750656168,
      "grad_norm": 0.1279296875,
      "learning_rate": 1e-06,
      "loss": -0.0007,
      "num_tokens": 836158121.0,
      "reward": 0.5535714626312256,
      "reward_std": 0.2154536247253418,
      "rewards/verify_math_reward/mean": 0.5535714030265808,
      "rewards/verify_math_reward/std": 0.4973994791507721,
      "step": 1436
    },
    {
      "clip_ratio/high_max": 0.0015440452807524707,
      "clip_ratio/high_mean": 0.0005259499757812591,
      "clip_ratio/low_mean": 0.0003068337643981067,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008327837385877501,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3063.0,
      "completions/mean_length": 630.5803833007812,
      "completions/mean_terminated_length": 583.5385131835938,
      "completions/min_length": 79.0,
      "completions/min_terminated_length": 79.0,
      "epoch": 13.429279673374161,
      "grad_norm": 0.11669921875,
      "learning_rate": 1e-06,
      "loss": 0.0058,
      "num_tokens": 836760785.0,
      "reward": 0.551339328289032,
      "reward_std": 0.2310425192117691,
      "rewards/verify_math_reward/mean": 0.5513392686843872,
      "rewards/verify_math_reward/std": 0.4976350665092468,
      "step": 1437
    },
    {
      "clip_ratio/high_max": 0.0014194907153068925,
      "clip_ratio/high_mean": 0.00044470407328844885,
      "clip_ratio/low_mean": 0.00031971871101177385,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000764422770771489,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3281.0,
      "completions/mean_length": 586.0111694335938,
      "completions/mean_terminated_length": 530.2970581054688,
      "completions/min_length": 83.0,
      "completions/min_terminated_length": 83.0,
      "epoch": 13.438611840186644,
      "grad_norm": 0.119140625,
      "learning_rate": 1e-06,
      "loss": 0.0006,
      "num_tokens": 837320075.0,
      "reward": 0.5948660969734192,
      "reward_std": 0.19204454123973846,
      "rewards/verify_math_reward/mean": 0.5948660969734192,
      "rewards/verify_math_reward/std": 0.49119213223457336,
      "step": 1438
    },
    {
      "clip_ratio/high_max": 0.0017518545373604866,
      "clip_ratio/high_mean": 0.0004434636229007083,
      "clip_ratio/low_mean": 0.00035267474231659435,
      "clip_ratio/low_min": 1.033741318678949e-05,
      "clip_ratio/region_mean": 0.0007961383798829047,
      "completions/clipped_ratio": 0.021205357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3600.0,
      "completions/mean_length": 645.625,
      "completions/mean_terminated_length": 570.8734130859375,
      "completions/min_length": 11.0,
      "completions/min_terminated_length": 11.0,
      "epoch": 13.447944006999125,
      "grad_norm": 0.1201171875,
      "learning_rate": 1e-06,
      "loss": -0.0013,
      "num_tokens": 837916267.0,
      "reward": 0.5111607313156128,
      "reward_std": 0.18829189240932465,
      "rewards/verify_math_reward/mean": 0.5111607313156128,
      "rewards/verify_math_reward/std": 0.5001546144485474,
      "step": 1439
    },
    {
      "clip_ratio/high_max": 0.001641486342123244,
      "clip_ratio/high_mean": 0.0005413385501924495,
      "clip_ratio/low_mean": 0.00036927487428783934,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009106134311878122,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2109.0,
      "completions/mean_length": 612.4096069335938,
      "completions/mean_terminated_length": 573.0914306640625,
      "completions/min_length": 136.0,
      "completions/min_terminated_length": 136.0,
      "epoch": 13.457276173811607,
      "grad_norm": 0.125,
      "learning_rate": 1e-06,
      "loss": 0.0213,
      "num_tokens": 838507378.0,
      "reward": 0.5870535969734192,
      "reward_std": 0.21797415614128113,
      "rewards/verify_math_reward/mean": 0.5870535969734192,
      "rewards/verify_math_reward/std": 0.49263834953308105,
      "step": 1440
    },
    {
      "clip_ratio/high_max": 0.0014924777042324422,
      "clip_ratio/high_mean": 0.0004368835213881539,
      "clip_ratio/low_mean": 0.00033707786394643335,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007739613852209004,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4084.0,
      "completions/mean_length": 617.4754638671875,
      "completions/mean_terminated_length": 586.1373901367188,
      "completions/min_length": 38.0,
      "completions/min_terminated_length": 38.0,
      "epoch": 13.466608340624088,
      "grad_norm": 0.12353515625,
      "learning_rate": 1e-06,
      "loss": -0.0008,
      "num_tokens": 839119572.0,
      "reward": 0.5323660969734192,
      "reward_std": 0.19858945906162262,
      "rewards/verify_math_reward/mean": 0.5323660969734192,
      "rewards/verify_math_reward/std": 0.4992299973964691,
      "step": 1441
    },
    {
      "clip_ratio/high_max": 0.001487494842876913,
      "clip_ratio/high_mean": 0.00041717613407854515,
      "clip_ratio/low_mean": 0.0002736194227281885,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006907955425958789,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2231.0,
      "completions/mean_length": 612.1451416015625,
      "completions/mean_terminated_length": 552.82861328125,
      "completions/min_length": 5.0,
      "completions/min_terminated_length": 5.0,
      "epoch": 13.47594050743657,
      "grad_norm": 0.111328125,
      "learning_rate": 1e-06,
      "loss": 0.001,
      "num_tokens": 839693350.0,
      "reward": 0.5837053656578064,
      "reward_std": 0.1565753072500229,
      "rewards/verify_math_reward/mean": 0.5837053656578064,
      "rewards/verify_math_reward/std": 0.49321892857551575,
      "step": 1442
    },
    {
      "clip_ratio/high_max": 0.0020271714784030337,
      "clip_ratio/high_mean": 0.0006483817498974531,
      "clip_ratio/low_mean": 0.0003719502656167606,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010203320016444195,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2159.0,
      "completions/mean_length": 574.8359375,
      "completions/mean_terminated_length": 527.037353515625,
      "completions/min_length": 94.0,
      "completions/min_terminated_length": 94.0,
      "epoch": 13.485272674249051,
      "grad_norm": 0.1513671875,
      "learning_rate": 1e-06,
      "loss": 0.0039,
      "num_tokens": 840259067.0,
      "reward": 0.582589328289032,
      "reward_std": 0.23311659693717957,
      "rewards/verify_math_reward/mean": 0.5825892686843872,
      "rewards/verify_math_reward/std": 0.4934072494506836,
      "step": 1443
    },
    {
      "clip_ratio/high_max": 0.001726871063510771,
      "clip_ratio/high_mean": 0.0005453907610899478,
      "clip_ratio/low_mean": 0.00031528299075489485,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008606737665104447,
      "completions/clipped_ratio": 0.0200892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2624.0,
      "completions/mean_length": 629.575927734375,
      "completions/mean_terminated_length": 558.51025390625,
      "completions/min_length": 120.0,
      "completions/min_terminated_length": 120.0,
      "epoch": 13.494604841061534,
      "grad_norm": 0.12451171875,
      "learning_rate": 1e-06,
      "loss": -0.0026,
      "num_tokens": 840837527.0,
      "reward": 0.5658482313156128,
      "reward_std": 0.23172719776630402,
      "rewards/verify_math_reward/mean": 0.5658482313156128,
      "rewards/verify_math_reward/std": 0.49592188000679016,
      "step": 1444
    },
    {
      "clip_ratio/high_max": 0.001498488769357209,
      "clip_ratio/high_mean": 0.0004240192091629069,
      "clip_ratio/low_mean": 0.0003190860571748999,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007431052504216495,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3298.0,
      "completions/mean_length": 603.7779541015625,
      "completions/mean_terminated_length": 540.282958984375,
      "completions/min_length": 129.0,
      "completions/min_terminated_length": 129.0,
      "epoch": 13.503937007874015,
      "grad_norm": 0.12890625,
      "learning_rate": 1e-06,
      "loss": -0.0081,
      "num_tokens": 841403920.0,
      "reward": 0.5558035969734192,
      "reward_std": 0.21229758858680725,
      "rewards/verify_math_reward/mean": 0.5558035969734192,
      "rewards/verify_math_reward/std": 0.49715372920036316,
      "step": 1445
    },
    {
      "clip_ratio/high_max": 0.0016854322220751783,
      "clip_ratio/high_mean": 0.00044098968282924034,
      "clip_ratio/low_mean": 0.0003208507280305639,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007618404097229359,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3324.0,
      "completions/mean_length": 632.3270263671875,
      "completions/mean_terminated_length": 569.3511352539062,
      "completions/min_length": 135.0,
      "completions/min_terminated_length": 135.0,
      "epoch": 13.513269174686497,
      "grad_norm": 0.12255859375,
      "learning_rate": 1e-06,
      "loss": 0.0082,
      "num_tokens": 841988813.0,
      "reward": 0.5245535969734192,
      "reward_std": 0.19512708485126495,
      "rewards/verify_math_reward/mean": 0.5245535969734192,
      "rewards/verify_math_reward/std": 0.4996756911277771,
      "step": 1446
    },
    {
      "clip_ratio/high_max": 0.0015337583636210184,
      "clip_ratio/high_mean": 0.0004813868329165416,
      "clip_ratio/low_mean": 0.0003618764460497914,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000843263269871386,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3126.0,
      "completions/mean_length": 604.5870971679688,
      "completions/mean_terminated_length": 549.1677856445312,
      "completions/min_length": 145.0,
      "completions/min_terminated_length": 145.0,
      "epoch": 13.52260134149898,
      "grad_norm": 0.119140625,
      "learning_rate": 1e-06,
      "loss": 0.0173,
      "num_tokens": 842575123.0,
      "reward": 0.4966517984867096,
      "reward_std": 0.19967137277126312,
      "rewards/verify_math_reward/mean": 0.4966517984867096,
      "rewards/verify_math_reward/std": 0.5002680420875549,
      "step": 1447
    },
    {
      "clip_ratio/high_max": 0.001459809553125524,
      "clip_ratio/high_mean": 0.0004290812088356688,
      "clip_ratio/low_mean": 0.00036512190786197607,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007942031129459792,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3931.0,
      "completions/mean_length": 609.6484375,
      "completions/mean_terminated_length": 558.3204956054688,
      "completions/min_length": 18.0,
      "completions/min_terminated_length": 18.0,
      "epoch": 13.531933508311461,
      "grad_norm": 0.123046875,
      "learning_rate": 1e-06,
      "loss": 0.0128,
      "num_tokens": 843149120.0,
      "reward": 0.5714285969734192,
      "reward_std": 0.20041997730731964,
      "rewards/verify_math_reward/mean": 0.5714285969734192,
      "rewards/verify_math_reward/std": 0.49514803290367126,
      "step": 1448
    },
    {
      "clip_ratio/high_max": 0.00186413984556566,
      "clip_ratio/high_mean": 0.0006101725743974384,
      "clip_ratio/low_mean": 0.0004602289575359464,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010704015476221684,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3993.0,
      "completions/mean_length": 579.1674194335938,
      "completions/mean_terminated_length": 527.3906860351562,
      "completions/min_length": 121.0,
      "completions/min_terminated_length": 121.0,
      "epoch": 13.541265675123944,
      "grad_norm": 0.142578125,
      "learning_rate": 1e-06,
      "loss": 0.0251,
      "num_tokens": 843704486.0,
      "reward": 0.5770089626312256,
      "reward_std": 0.2458515763282776,
      "rewards/verify_math_reward/mean": 0.5770089030265808,
      "rewards/verify_math_reward/std": 0.4943099617958069,
      "step": 1449
    },
    {
      "clip_ratio/high_max": 0.0016106554185171262,
      "clip_ratio/high_mean": 0.00058202399941365,
      "clip_ratio/low_mean": 0.0003739048650004406,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009559288673699484,
      "completions/clipped_ratio": 0.021205357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3328.0,
      "completions/mean_length": 622.8303833007812,
      "completions/mean_terminated_length": 547.5849609375,
      "completions/min_length": 115.0,
      "completions/min_terminated_length": 115.0,
      "epoch": 13.550597841936424,
      "grad_norm": 0.140625,
      "learning_rate": 1e-06,
      "loss": 0.0053,
      "num_tokens": 844285206.0,
      "reward": 0.5725446939468384,
      "reward_std": 0.2361224889755249,
      "rewards/verify_math_reward/mean": 0.5725446343421936,
      "rewards/verify_math_reward/std": 0.49498558044433594,
      "step": 1450
    },
    {
      "clip_ratio/high_max": 0.001712963504360232,
      "clip_ratio/high_mean": 0.0005939340901477408,
      "clip_ratio/low_mean": 0.0003948019037807171,
      "clip_ratio/low_min": 9.701955605123658e-06,
      "clip_ratio/region_mean": 0.0009887360083666863,
      "completions/clipped_ratio": 0.021205357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3746.0,
      "completions/mean_length": 633.4609375,
      "completions/mean_terminated_length": 558.44580078125,
      "completions/min_length": 125.0,
      "completions/min_terminated_length": 125.0,
      "epoch": 13.559930008748907,
      "grad_norm": 0.14453125,
      "learning_rate": 1e-06,
      "loss": 0.004,
      "num_tokens": 844855227.0,
      "reward": 0.59375,
      "reward_std": 0.250397264957428,
      "rewards/verify_math_reward/mean": 0.59375,
      "rewards/verify_math_reward/std": 0.4914066195487976,
      "step": 1451
    },
    {
      "clip_ratio/high_max": 0.0017048090921889525,
      "clip_ratio/high_mean": 0.0005401893226917309,
      "clip_ratio/low_mean": 0.0003728963090452453,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009130856333285919,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3744.0,
      "completions/mean_length": 581.3314819335938,
      "completions/mean_terminated_length": 541.6625366210938,
      "completions/min_length": 134.0,
      "completions/min_terminated_length": 134.0,
      "epoch": 13.569262175561388,
      "grad_norm": 0.130859375,
      "learning_rate": 1e-06,
      "loss": 0.0075,
      "num_tokens": 845418372.0,
      "reward": 0.5558035969734192,
      "reward_std": 0.24142493307590485,
      "rewards/verify_math_reward/mean": 0.5558035969734192,
      "rewards/verify_math_reward/std": 0.49715372920036316,
      "step": 1452
    },
    {
      "clip_ratio/high_max": 0.0016730862444092054,
      "clip_ratio/high_mean": 0.0005307649726091768,
      "clip_ratio/low_mean": 0.0003588253082398296,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008895902801668853,
      "completions/clipped_ratio": 0.0078125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3208.0,
      "completions/mean_length": 579.8125,
      "completions/mean_terminated_length": 552.1259765625,
      "completions/min_length": 105.0,
      "completions/min_terminated_length": 105.0,
      "epoch": 13.57859434237387,
      "grad_norm": 0.1396484375,
      "learning_rate": 1e-06,
      "loss": 0.0114,
      "num_tokens": 845995460.0,
      "reward": 0.5892857313156128,
      "reward_std": 0.24296332895755768,
      "rewards/verify_math_reward/mean": 0.5892857313156128,
      "rewards/verify_math_reward/std": 0.49223825335502625,
      "step": 1453
    },
    {
      "clip_ratio/high_max": 0.0017078154978662496,
      "clip_ratio/high_mean": 0.0005719310120184673,
      "clip_ratio/low_mean": 0.00034637552198546473,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009183065421893843,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1959.0,
      "completions/mean_length": 570.5569458007812,
      "completions/mean_terminated_length": 526.7378540039062,
      "completions/min_length": 108.0,
      "completions/min_terminated_length": 108.0,
      "epoch": 13.587926509186351,
      "grad_norm": 0.1337890625,
      "learning_rate": 1e-06,
      "loss": -0.0037,
      "num_tokens": 846555135.0,
      "reward": 0.5658482313156128,
      "reward_std": 0.20906810462474823,
      "rewards/verify_math_reward/mean": 0.5658482313156128,
      "rewards/verify_math_reward/std": 0.49592188000679016,
      "step": 1454
    },
    {
      "clip_ratio/high_max": 0.0016499298199050827,
      "clip_ratio/high_mean": 0.0005033507677580928,
      "clip_ratio/low_mean": 0.00030773112473525543,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008110818980640033,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3552.0,
      "completions/mean_length": 636.849365234375,
      "completions/mean_terminated_length": 577.9534912109375,
      "completions/min_length": 127.0,
      "completions/min_terminated_length": 127.0,
      "epoch": 13.597258675998834,
      "grad_norm": 0.130859375,
      "learning_rate": 1e-06,
      "loss": -0.0079,
      "num_tokens": 847160584.0,
      "reward": 0.5323660969734192,
      "reward_std": 0.22910040616989136,
      "rewards/verify_math_reward/mean": 0.5323660969734192,
      "rewards/verify_math_reward/std": 0.4992299973964691,
      "step": 1455
    },
    {
      "clip_ratio/high_max": 0.001650757820243598,
      "clip_ratio/high_mean": 0.000492941169113692,
      "clip_ratio/low_mean": 0.00042119161321352294,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009141327755060047,
      "completions/clipped_ratio": 0.0234375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3765.0,
      "completions/mean_length": 676.140625,
      "completions/mean_terminated_length": 594.06396484375,
      "completions/min_length": 4.0,
      "completions/min_terminated_length": 4.0,
      "epoch": 13.606590842811315,
      "grad_norm": 0.130859375,
      "learning_rate": 1e-06,
      "loss": 0.0045,
      "num_tokens": 847776766.0,
      "reward": 0.4977678656578064,
      "reward_std": 0.2218119502067566,
      "rewards/verify_math_reward/mean": 0.4977678656578064,
      "rewards/verify_math_reward/std": 0.5002743005752563,
      "step": 1456
    },
    {
      "clip_ratio/high_max": 0.0014967691386118531,
      "clip_ratio/high_mean": 0.00041895810500136577,
      "clip_ratio/low_mean": 0.0003660171489627828,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000784975256465259,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2809.0,
      "completions/mean_length": 641.1127319335938,
      "completions/mean_terminated_length": 594.2138061523438,
      "completions/min_length": 131.0,
      "completions/min_terminated_length": 131.0,
      "epoch": 13.615923009623797,
      "grad_norm": 0.1201171875,
      "learning_rate": 1e-06,
      "loss": -0.0007,
      "num_tokens": 848388547.0,
      "reward": 0.5379464626312256,
      "reward_std": 0.20102757215499878,
      "rewards/verify_math_reward/mean": 0.5379464030265808,
      "rewards/verify_math_reward/std": 0.4988364577293396,
      "step": 1457
    },
    {
      "clip_ratio/high_max": 0.0019857244224112947,
      "clip_ratio/high_mean": 0.0006267321759878541,
      "clip_ratio/low_mean": 0.0003349709638769127,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009617031373636564,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2069.0,
      "completions/mean_length": 547.4420166015625,
      "completions/mean_terminated_length": 503.3356018066406,
      "completions/min_length": 104.0,
      "completions/min_terminated_length": 104.0,
      "epoch": 13.625255176436278,
      "grad_norm": 0.14453125,
      "learning_rate": 1e-06,
      "loss": 0.0035,
      "num_tokens": 848923095.0,
      "reward": 0.6205357313156128,
      "reward_std": 0.20749297738075256,
      "rewards/verify_math_reward/mean": 0.6205357313156128,
      "rewards/verify_math_reward/std": 0.4855247139930725,
      "step": 1458
    },
    {
      "clip_ratio/high_max": 0.0014709434744872851,
      "clip_ratio/high_mean": 0.0004454475731563434,
      "clip_ratio/low_mean": 0.00033659933069429826,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000782046911353973,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2127.0,
      "completions/mean_length": 630.044677734375,
      "completions/mean_terminated_length": 579.0169677734375,
      "completions/min_length": 149.0,
      "completions/min_terminated_length": 149.0,
      "epoch": 13.63458734324876,
      "grad_norm": 0.1220703125,
      "learning_rate": 1e-06,
      "loss": -0.0051,
      "num_tokens": 849530199.0,
      "reward": 0.5066964626312256,
      "reward_std": 0.22109587490558624,
      "rewards/verify_math_reward/mean": 0.5066964030265808,
      "rewards/verify_math_reward/std": 0.5002344250679016,
      "step": 1459
    },
    {
      "clip_ratio/high_max": 0.0015920209752948722,
      "clip_ratio/high_mean": 0.0005085988584596635,
      "clip_ratio/low_mean": 0.00031790173829904234,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008265006072178949,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2603.0,
      "completions/mean_length": 620.5279541015625,
      "completions/mean_terminated_length": 581.3013916015625,
      "completions/min_length": 74.0,
      "completions/min_terminated_length": 74.0,
      "epoch": 13.643919510061242,
      "grad_norm": 0.12890625,
      "learning_rate": 1e-06,
      "loss": -0.0,
      "num_tokens": 850140056.0,
      "reward": 0.5569196939468384,
      "reward_std": 0.227440744638443,
      "rewards/verify_math_reward/mean": 0.5569196343421936,
      "rewards/verify_math_reward/std": 0.4970270097255707,
      "step": 1460
    },
    {
      "clip_ratio/high_max": 0.0019153909706801642,
      "clip_ratio/high_mean": 0.0006843581222710782,
      "clip_ratio/low_mean": 0.00037447444765348337,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010588325430944678,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3885.0,
      "completions/mean_length": 654.609375,
      "completions/mean_terminated_length": 588.0523071289062,
      "completions/min_length": 114.0,
      "completions/min_terminated_length": 114.0,
      "epoch": 13.653251676873724,
      "grad_norm": 0.1416015625,
      "learning_rate": 1e-06,
      "loss": 0.0048,
      "num_tokens": 850737466.0,
      "reward": 0.59375,
      "reward_std": 0.26144862174987793,
      "rewards/verify_math_reward/mean": 0.59375,
      "rewards/verify_math_reward/std": 0.4914066195487976,
      "step": 1461
    },
    {
      "clip_ratio/high_max": 0.0016420078009105055,
      "clip_ratio/high_mean": 0.000542934335044265,
      "clip_ratio/low_mean": 0.000367502664175845,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009104369910346577,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3300.0,
      "completions/mean_length": 595.6517944335938,
      "completions/mean_terminated_length": 540.0906982421875,
      "completions/min_length": 129.0,
      "completions/min_terminated_length": 129.0,
      "epoch": 13.662583843686207,
      "grad_norm": 0.1357421875,
      "learning_rate": 1e-06,
      "loss": 0.0143,
      "num_tokens": 851305154.0,
      "reward": 0.5636160969734192,
      "reward_std": 0.22646361589431763,
      "rewards/verify_math_reward/mean": 0.5636160969734192,
      "rewards/verify_math_reward/std": 0.49621346592903137,
      "step": 1462
    },
    {
      "clip_ratio/high_max": 0.001556218838231871,
      "clip_ratio/high_mean": 0.00041317227328363515,
      "clip_ratio/low_mean": 0.00039252614828910737,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008056984324866789,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3323.0,
      "completions/mean_length": 643.9765625,
      "completions/mean_terminated_length": 581.2124633789062,
      "completions/min_length": 20.0,
      "completions/min_terminated_length": 20.0,
      "epoch": 13.671916010498688,
      "grad_norm": 0.119140625,
      "learning_rate": 1e-06,
      "loss": 0.0037,
      "num_tokens": 851904061.0,
      "reward": 0.5345982313156128,
      "reward_std": 0.20771758258342743,
      "rewards/verify_math_reward/mean": 0.5345982313156128,
      "rewards/verify_math_reward/std": 0.4990801215171814,
      "step": 1463
    },
    {
      "clip_ratio/high_max": 0.0016773997294876608,
      "clip_ratio/high_mean": 0.0005545242436255648,
      "clip_ratio/low_mean": 0.0003654265331078932,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009199507826451736,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2845.0,
      "completions/mean_length": 625.880615234375,
      "completions/mean_terminated_length": 566.7979736328125,
      "completions/min_length": 127.0,
      "completions/min_terminated_length": 127.0,
      "epoch": 13.68124817731117,
      "grad_norm": 0.1279296875,
      "learning_rate": 1e-06,
      "loss": -0.003,
      "num_tokens": 852488098.0,
      "reward": 0.5145089626312256,
      "reward_std": 0.2098594754934311,
      "rewards/verify_math_reward/mean": 0.5145089030265808,
      "rewards/verify_math_reward/std": 0.5000685453414917,
      "step": 1464
    },
    {
      "clip_ratio/high_max": 0.0015371305908047361,
      "clip_ratio/high_mean": 0.00045309656434255885,
      "clip_ratio/low_mean": 0.0004256896056631376,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008787861770542804,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2646.0,
      "completions/mean_length": 590.6763916015625,
      "completions/mean_terminated_length": 535.0363159179688,
      "completions/min_length": 60.0,
      "completions/min_terminated_length": 60.0,
      "epoch": 13.690580344123651,
      "grad_norm": 0.13671875,
      "learning_rate": 1e-06,
      "loss": -0.0019,
      "num_tokens": 853056440.0,
      "reward": 0.5479910969734192,
      "reward_std": 0.24130292236804962,
      "rewards/verify_math_reward/mean": 0.5479910969734192,
      "rewards/verify_math_reward/std": 0.49796950817108154,
      "step": 1465
    },
    {
      "clip_ratio/high_max": 0.00148297511623241,
      "clip_ratio/high_mean": 0.00047140210608631605,
      "clip_ratio/low_mean": 0.0002479121314991062,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007193142419055221,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3967.0,
      "completions/mean_length": 594.1495971679688,
      "completions/mean_terminated_length": 554.6253051757812,
      "completions/min_length": 128.0,
      "completions/min_terminated_length": 128.0,
      "epoch": 13.699912510936134,
      "grad_norm": 0.1337890625,
      "learning_rate": 1e-06,
      "loss": 0.0126,
      "num_tokens": 853626206.0,
      "reward": 0.5803571939468384,
      "reward_std": 0.21301506459712982,
      "rewards/verify_math_reward/mean": 0.5803571343421936,
      "rewards/verify_math_reward/std": 0.4937761425971985,
      "step": 1466
    },
    {
      "clip_ratio/high_max": 0.0017450528794142883,
      "clip_ratio/high_mean": 0.0005463054749270668,
      "clip_ratio/low_mean": 0.00040349560276808916,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009498010849711136,
      "completions/clipped_ratio": 0.0234375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3506.0,
      "completions/mean_length": 656.2210083007812,
      "completions/mean_terminated_length": 573.666259765625,
      "completions/min_length": 29.0,
      "completions/min_terminated_length": 29.0,
      "epoch": 13.709244677748615,
      "grad_norm": 0.138671875,
      "learning_rate": 1e-06,
      "loss": 0.0021,
      "num_tokens": 854212148.0,
      "reward": 0.5647321939468384,
      "reward_std": 0.22443737089633942,
      "rewards/verify_math_reward/mean": 0.5647321343421936,
      "rewards/verify_math_reward/std": 0.49606895446777344,
      "step": 1467
    },
    {
      "clip_ratio/high_max": 0.0016002151533029974,
      "clip_ratio/high_mean": 0.0004723479805761599,
      "clip_ratio/low_mean": 0.0003160101001640214,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007883580801717471,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3317.0,
      "completions/mean_length": 605.2433471679688,
      "completions/mean_terminated_length": 537.7315063476562,
      "completions/min_length": 132.0,
      "completions/min_terminated_length": 132.0,
      "epoch": 13.718576844561097,
      "grad_norm": 0.1298828125,
      "learning_rate": 1e-06,
      "loss": -0.0075,
      "num_tokens": 854786334.0,
      "reward": 0.5078125,
      "reward_std": 0.19899989664554596,
      "rewards/verify_math_reward/mean": 0.5078125,
      "rewards/verify_math_reward/std": 0.5002182126045227,
      "step": 1468
    },
    {
      "clip_ratio/high_max": 0.0017225371939275647,
      "clip_ratio/high_mean": 0.0005235312387412705,
      "clip_ratio/low_mean": 0.00027352411507308716,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000797055363364052,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3139.0,
      "completions/mean_length": 653.8739013671875,
      "completions/mean_terminated_length": 618.9481201171875,
      "completions/min_length": 150.0,
      "completions/min_terminated_length": 150.0,
      "epoch": 13.727909011373578,
      "grad_norm": 0.1201171875,
      "learning_rate": 1e-06,
      "loss": 0.002,
      "num_tokens": 855425949.0,
      "reward": 0.5323660969734192,
      "reward_std": 0.21879860758781433,
      "rewards/verify_math_reward/mean": 0.5323660969734192,
      "rewards/verify_math_reward/std": 0.4992299973964691,
      "step": 1469
    },
    {
      "clip_ratio/high_max": 0.0018507699023757596,
      "clip_ratio/high_mean": 0.0005679615915141767,
      "clip_ratio/low_mean": 0.00032465870572195854,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008926203026931034,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3025.0,
      "completions/mean_length": 629.3917846679688,
      "completions/mean_terminated_length": 570.368896484375,
      "completions/min_length": 102.0,
      "completions/min_terminated_length": 102.0,
      "epoch": 13.73724117818606,
      "grad_norm": 0.123046875,
      "learning_rate": 1e-06,
      "loss": 0.0013,
      "num_tokens": 856008156.0,
      "reward": 0.5892857313156128,
      "reward_std": 0.20347636938095093,
      "rewards/verify_math_reward/mean": 0.5892857313156128,
      "rewards/verify_math_reward/std": 0.49223825335502625,
      "step": 1470
    },
    {
      "clip_ratio/high_max": 0.0014815277327215881,
      "clip_ratio/high_mean": 0.0004707739190052962,
      "clip_ratio/low_mean": 0.0004055853219142591,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008763592486502603,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3152.0,
      "completions/mean_length": 618.7120971679688,
      "completions/mean_terminated_length": 555.4886474609375,
      "completions/min_length": 122.0,
      "completions/min_terminated_length": 122.0,
      "epoch": 13.746573344998541,
      "grad_norm": 0.130859375,
      "learning_rate": 1e-06,
      "loss": 0.0138,
      "num_tokens": 856585322.0,
      "reward": 0.559151828289032,
      "reward_std": 0.22946879267692566,
      "rewards/verify_math_reward/mean": 0.5591517686843872,
      "rewards/verify_math_reward/std": 0.496766060590744,
      "step": 1471
    },
    {
      "clip_ratio/high_max": 0.0020694473441835726,
      "clip_ratio/high_mean": 0.0006478015011452953,
      "clip_ratio/low_mean": 0.0003714814356499119,
      "clip_ratio/low_min": 1.5798786989762448e-05,
      "clip_ratio/region_mean": 0.0010192829267907655,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3581.0,
      "completions/mean_length": 597.46875,
      "completions/mean_terminated_length": 557.98193359375,
      "completions/min_length": 151.0,
      "completions/min_terminated_length": 151.0,
      "epoch": 13.755905511811024,
      "grad_norm": 0.1328125,
      "learning_rate": 1e-06,
      "loss": -0.0051,
      "num_tokens": 857178438.0,
      "reward": 0.5602678656578064,
      "reward_std": 0.22695399820804596,
      "rewards/verify_math_reward/mean": 0.5602678656578064,
      "rewards/verify_math_reward/std": 0.4966317415237427,
      "step": 1472
    },
    {
      "clip_ratio/high_max": 0.0012901640693598893,
      "clip_ratio/high_mean": 0.000432423684742389,
      "clip_ratio/low_mean": 0.0003156316291779149,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000748055313124496,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3801.0,
      "completions/mean_length": 622.8292846679688,
      "completions/mean_terminated_length": 587.5885009765625,
      "completions/min_length": 6.0,
      "completions/min_terminated_length": 6.0,
      "epoch": 13.765237678623505,
      "grad_norm": 0.1201171875,
      "learning_rate": 1e-06,
      "loss": 0.0134,
      "num_tokens": 857783621.0,
      "reward": 0.546875,
      "reward_std": 0.20542281866073608,
      "rewards/verify_math_reward/mean": 0.546875,
      "rewards/verify_math_reward/std": 0.4980759024620056,
      "step": 1473
    },
    {
      "clip_ratio/high_max": 0.001881788402897655,
      "clip_ratio/high_mean": 0.0005327477763330535,
      "clip_ratio/low_mean": 0.0002711350437039073,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008038828218559502,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3704.0,
      "completions/mean_length": 636.6517944335938,
      "completions/mean_terminated_length": 569.7474365234375,
      "completions/min_length": 16.0,
      "completions/min_terminated_length": 16.0,
      "epoch": 13.774569845435988,
      "grad_norm": 0.1259765625,
      "learning_rate": 1e-06,
      "loss": 0.0043,
      "num_tokens": 858382333.0,
      "reward": 0.5189732313156128,
      "reward_std": 0.21537764370441437,
      "rewards/verify_math_reward/mean": 0.5189732313156128,
      "rewards/verify_math_reward/std": 0.49991893768310547,
      "step": 1474
    },
    {
      "clip_ratio/high_max": 0.0013809626379952533,
      "clip_ratio/high_mean": 0.0003932869999516697,
      "clip_ratio/low_mean": 0.0003436417129023539,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.00073692872592801,
      "completions/clipped_ratio": 0.0200892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4022.0,
      "completions/mean_length": 694.4777221679688,
      "completions/mean_terminated_length": 624.7426147460938,
      "completions/min_length": 167.0,
      "completions/min_terminated_length": 167.0,
      "epoch": 13.783902012248468,
      "grad_norm": 0.11328125,
      "learning_rate": 1e-06,
      "loss": 0.0118,
      "num_tokens": 859024601.0,
      "reward": 0.5189732313156128,
      "reward_std": 0.1976444274187088,
      "rewards/verify_math_reward/mean": 0.5189732313156128,
      "rewards/verify_math_reward/std": 0.49991893768310547,
      "step": 1475
    },
    {
      "clip_ratio/high_max": 0.0016969669432000956,
      "clip_ratio/high_mean": 0.00048578004702903854,
      "clip_ratio/low_mean": 0.0002999873190674407,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007857673626858741,
      "completions/clipped_ratio": 0.005580357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4030.0,
      "completions/mean_length": 542.716552734375,
      "completions/mean_terminated_length": 522.7766723632812,
      "completions/min_length": 105.0,
      "completions/min_terminated_length": 105.0,
      "epoch": 13.793234179060951,
      "grad_norm": 0.1259765625,
      "learning_rate": 1e-06,
      "loss": 0.0172,
      "num_tokens": 859586923.0,
      "reward": 0.6015625,
      "reward_std": 0.19178099930286407,
      "rewards/verify_math_reward/mean": 0.6015625,
      "rewards/verify_math_reward/std": 0.48984986543655396,
      "step": 1476
    },
    {
      "clip_ratio/high_max": 0.0015827381357667036,
      "clip_ratio/high_mean": 0.0005133775662216067,
      "clip_ratio/low_mean": 0.00032172162298138574,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008350991911356687,
      "completions/clipped_ratio": 0.021205357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3883.0,
      "completions/mean_length": 682.9141235351562,
      "completions/mean_terminated_length": 608.9703369140625,
      "completions/min_length": 128.0,
      "completions/min_terminated_length": 128.0,
      "epoch": 13.802566345873432,
      "grad_norm": 0.1220703125,
      "learning_rate": 1e-06,
      "loss": 0.0094,
      "num_tokens": 860216302.0,
      "reward": 0.5066964626312256,
      "reward_std": 0.2126799076795578,
      "rewards/verify_math_reward/mean": 0.5066964030265808,
      "rewards/verify_math_reward/std": 0.5002344250679016,
      "step": 1477
    },
    {
      "clip_ratio/high_max": 0.0015752986255392898,
      "clip_ratio/high_mean": 0.0005237242266957765,
      "clip_ratio/low_mean": 0.00028728788254284154,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008110121170830098,
      "completions/clipped_ratio": 0.006696428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2457.0,
      "completions/mean_length": 579.997802734375,
      "completions/mean_terminated_length": 556.2943725585938,
      "completions/min_length": 145.0,
      "completions/min_terminated_length": 145.0,
      "epoch": 13.811898512685914,
      "grad_norm": 0.1220703125,
      "learning_rate": 1e-06,
      "loss": 0.0097,
      "num_tokens": 860789844.0,
      "reward": 0.6316964626312256,
      "reward_std": 0.19219858944416046,
      "rewards/verify_math_reward/mean": 0.6316964030265808,
      "rewards/verify_math_reward/std": 0.4826137125492096,
      "step": 1478
    },
    {
      "clip_ratio/high_max": 0.0014719496630277717,
      "clip_ratio/high_mean": 0.00041756537757464685,
      "clip_ratio/low_mean": 0.0003222300886136509,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000739795465960924,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3409.0,
      "completions/mean_length": 629.3136596679688,
      "completions/mean_terminated_length": 574.286865234375,
      "completions/min_length": 161.0,
      "completions/min_terminated_length": 161.0,
      "epoch": 13.821230679498395,
      "grad_norm": 0.1201171875,
      "learning_rate": 1e-06,
      "loss": -0.0004,
      "num_tokens": 861383901.0,
      "reward": 0.5178571939468384,
      "reward_std": 0.21068249642848969,
      "rewards/verify_math_reward/mean": 0.5178571343421936,
      "rewards/verify_math_reward/std": 0.4999600946903229,
      "step": 1479
    },
    {
      "clip_ratio/high_max": 0.0021846755626029335,
      "clip_ratio/high_mean": 0.0005392227506035852,
      "clip_ratio/low_mean": 0.00035902116201214085,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008982439121609787,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3969.0,
      "completions/mean_length": 601.7288208007812,
      "completions/mean_terminated_length": 558.2971801757812,
      "completions/min_length": 137.0,
      "completions/min_terminated_length": 137.0,
      "epoch": 13.830562846310878,
      "grad_norm": 0.1259765625,
      "learning_rate": 1e-06,
      "loss": 0.0041,
      "num_tokens": 861962402.0,
      "reward": 0.6205357313156128,
      "reward_std": 0.17645888030529022,
      "rewards/verify_math_reward/mean": 0.6205357313156128,
      "rewards/verify_math_reward/std": 0.4855247139930725,
      "step": 1480
    },
    {
      "clip_ratio/high_max": 0.0016209942241403041,
      "clip_ratio/high_mean": 0.0004635233783574222,
      "clip_ratio/low_mean": 0.00024232734313045512,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007058507262627245,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3913.0,
      "completions/mean_length": 641.6986694335938,
      "completions/mean_terminated_length": 574.8919067382812,
      "completions/min_length": 129.0,
      "completions/min_terminated_length": 129.0,
      "epoch": 13.83989501312336,
      "grad_norm": 0.11083984375,
      "learning_rate": 1e-06,
      "loss": 0.0028,
      "num_tokens": 862550524.0,
      "reward": 0.5948660969734192,
      "reward_std": 0.17025348544120789,
      "rewards/verify_math_reward/mean": 0.5948660969734192,
      "rewards/verify_math_reward/std": 0.49119213223457336,
      "step": 1481
    },
    {
      "clip_ratio/high_max": 0.001509284583335102,
      "clip_ratio/high_mean": 0.0004057079880794845,
      "clip_ratio/low_mean": 0.0003302452535081102,
      "clip_ratio/low_min": 1.0589630619506352e-05,
      "clip_ratio/region_mean": 0.0007359532310147188,
      "completions/clipped_ratio": 0.025669642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4012.0,
      "completions/mean_length": 730.3672485351562,
      "completions/mean_terminated_length": 641.6964721679688,
      "completions/min_length": 135.0,
      "completions/min_terminated_length": 135.0,
      "epoch": 13.849227179935841,
      "grad_norm": 0.11865234375,
      "learning_rate": 1e-06,
      "loss": -0.0041,
      "num_tokens": 863210509.0,
      "reward": 0.4676339626312256,
      "reward_std": 0.19381438195705414,
      "rewards/verify_math_reward/mean": 0.4676339328289032,
      "rewards/verify_math_reward/std": 0.4992299973964691,
      "step": 1482
    },
    {
      "clip_ratio/high_max": 0.0016020727962313686,
      "clip_ratio/high_mean": 0.00046571843358833576,
      "clip_ratio/low_mean": 0.00030765953374611854,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007733779739282909,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3446.0,
      "completions/mean_length": 616.265625,
      "completions/mean_terminated_length": 576.990966796875,
      "completions/min_length": 115.0,
      "completions/min_terminated_length": 115.0,
      "epoch": 13.858559346748324,
      "grad_norm": 0.12255859375,
      "learning_rate": 1e-06,
      "loss": 0.0212,
      "num_tokens": 863814819.0,
      "reward": 0.5267857313156128,
      "reward_std": 0.192914679646492,
      "rewards/verify_math_reward/mean": 0.5267857313156128,
      "rewards/verify_math_reward/std": 0.4995608627796173,
      "step": 1483
    },
    {
      "clip_ratio/high_max": 0.0017778941637516255,
      "clip_ratio/high_mean": 0.0005326547875483811,
      "clip_ratio/low_mean": 0.0003499721511843745,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008826269177006907,
      "completions/clipped_ratio": 0.0078125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2545.0,
      "completions/mean_length": 585.7154541015625,
      "completions/mean_terminated_length": 558.0753784179688,
      "completions/min_length": 122.0,
      "completions/min_terminated_length": 122.0,
      "epoch": 13.867891513560805,
      "grad_norm": 0.1318359375,
      "learning_rate": 1e-06,
      "loss": 0.0082,
      "num_tokens": 864400516.0,
      "reward": 0.5658482313156128,
      "reward_std": 0.2339816838502884,
      "rewards/verify_math_reward/mean": 0.5658482313156128,
      "rewards/verify_math_reward/std": 0.49592188000679016,
      "step": 1484
    },
    {
      "clip_ratio/high_max": 0.0016853345314302715,
      "clip_ratio/high_mean": 0.0004785012778256714,
      "clip_ratio/low_mean": 0.0004052058570778172,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008837071363814175,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3814.0,
      "completions/mean_length": 595.6517944335938,
      "completions/mean_terminated_length": 544.1177368164062,
      "completions/min_length": 3.0,
      "completions/min_terminated_length": 3.0,
      "epoch": 13.877223680373287,
      "grad_norm": 0.134765625,
      "learning_rate": 1e-06,
      "loss": 0.0049,
      "num_tokens": 864962372.0,
      "reward": 0.5680803656578064,
      "reward_std": 0.23131422698497772,
      "rewards/verify_math_reward/mean": 0.5680803656578064,
      "rewards/verify_math_reward/std": 0.4956200420856476,
      "step": 1485
    },
    {
      "clip_ratio/high_max": 0.0013008546666242182,
      "clip_ratio/high_mean": 0.00036349351762510196,
      "clip_ratio/low_mean": 0.0003107999082203605,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006742934281191992,
      "completions/clipped_ratio": 0.03125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3840.0,
      "completions/mean_length": 673.2779541015625,
      "completions/mean_terminated_length": 562.8674926757812,
      "completions/min_length": 150.0,
      "completions/min_terminated_length": 150.0,
      "epoch": 13.886555847185768,
      "grad_norm": 0.1064453125,
      "learning_rate": 1e-06,
      "loss": -0.0099,
      "num_tokens": 865537853.0,
      "reward": 0.5457589626312256,
      "reward_std": 0.16743122041225433,
      "rewards/verify_math_reward/mean": 0.5457589030265808,
      "rewards/verify_math_reward/std": 0.4981797933578491,
      "step": 1486
    },
    {
      "clip_ratio/high_max": 0.0015954834507283522,
      "clip_ratio/high_mean": 0.000467176688289328,
      "clip_ratio/low_mean": 0.00035002035974684986,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008171970466719358,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3150.0,
      "completions/mean_length": 612.8671875,
      "completions/mean_terminated_length": 545.5028076171875,
      "completions/min_length": 77.0,
      "completions/min_terminated_length": 77.0,
      "epoch": 13.89588801399825,
      "grad_norm": 0.119140625,
      "learning_rate": 1e-06,
      "loss": 0.0214,
      "num_tokens": 866106718.0,
      "reward": 0.5758928656578064,
      "reward_std": 0.19745828211307526,
      "rewards/verify_math_reward/mean": 0.5758928656578064,
      "rewards/verify_math_reward/std": 0.49448272585868835,
      "step": 1487
    },
    {
      "clip_ratio/high_max": 0.0015423828590428457,
      "clip_ratio/high_mean": 0.0004908759989348255,
      "clip_ratio/low_mean": 0.00025817459322752256,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007490505854548246,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3394.0,
      "completions/mean_length": 565.6908569335938,
      "completions/mean_terminated_length": 533.8862915039062,
      "completions/min_length": 134.0,
      "completions/min_terminated_length": 134.0,
      "epoch": 13.905220180810732,
      "grad_norm": 0.12890625,
      "learning_rate": 1e-06,
      "loss": 0.0149,
      "num_tokens": 866658545.0,
      "reward": 0.6194196939468384,
      "reward_std": 0.1958438754081726,
      "rewards/verify_math_reward/mean": 0.6194196343421936,
      "rewards/verify_math_reward/std": 0.48580074310302734,
      "step": 1488
    },
    {
      "clip_ratio/high_max": 0.0019100382014585193,
      "clip_ratio/high_mean": 0.0006178386640840472,
      "clip_ratio/low_mean": 0.00033591721921766293,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009537558908050414,
      "completions/clipped_ratio": 0.006696428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2454.0,
      "completions/mean_length": 571.3995971679688,
      "completions/mean_terminated_length": 547.63818359375,
      "completions/min_length": 135.0,
      "completions/min_terminated_length": 135.0,
      "epoch": 13.914552347623214,
      "grad_norm": 0.142578125,
      "learning_rate": 1e-06,
      "loss": -0.0011,
      "num_tokens": 867240855.0,
      "reward": 0.5658482313156128,
      "reward_std": 0.24299722909927368,
      "rewards/verify_math_reward/mean": 0.5658482313156128,
      "rewards/verify_math_reward/std": 0.49592188000679016,
      "step": 1489
    },
    {
      "clip_ratio/high_max": 0.0015912617272988427,
      "clip_ratio/high_mean": 0.0005427971918834373,
      "clip_ratio/low_mean": 0.0003104864886154246,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.00085328366367321,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3327.0,
      "completions/mean_length": 603.4788208007812,
      "completions/mean_terminated_length": 556.0690307617188,
      "completions/min_length": 14.0,
      "completions/min_terminated_length": 14.0,
      "epoch": 13.923884514435695,
      "grad_norm": 0.1337890625,
      "learning_rate": 1e-06,
      "loss": 0.0112,
      "num_tokens": 867820820.0,
      "reward": 0.578125,
      "reward_std": 0.225075364112854,
      "rewards/verify_math_reward/mean": 0.578125,
      "rewards/verify_math_reward/std": 0.4941346049308777,
      "step": 1490
    },
    {
      "clip_ratio/high_max": 0.0014498585023829946,
      "clip_ratio/high_mean": 0.0005081321653506166,
      "clip_ratio/low_mean": 0.0004397898109118614,
      "clip_ratio/low_min": 9.987216799345333e-06,
      "clip_ratio/region_mean": 0.0009479219620516233,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2611.0,
      "completions/mean_length": 617.09375,
      "completions/mean_terminated_length": 577.8284301757812,
      "completions/min_length": 122.0,
      "completions/min_terminated_length": 122.0,
      "epoch": 13.933216681248178,
      "grad_norm": 0.13671875,
      "learning_rate": 1e-06,
      "loss": 0.0169,
      "num_tokens": 868420024.0,
      "reward": 0.5290178656578064,
      "reward_std": 0.23811551928520203,
      "rewards/verify_math_reward/mean": 0.5290178656578064,
      "rewards/verify_math_reward/std": 0.49943602085113525,
      "step": 1491
    },
    {
      "clip_ratio/high_max": 0.0015793536795172258,
      "clip_ratio/high_mean": 0.0004564840305647522,
      "clip_ratio/low_mean": 0.0003703847811493688,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008268688220596232,
      "completions/clipped_ratio": 0.024553571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2015.0,
      "completions/mean_length": 645.7232666015625,
      "completions/mean_terminated_length": 558.8741455078125,
      "completions/min_length": 147.0,
      "completions/min_terminated_length": 147.0,
      "epoch": 13.942548848060659,
      "grad_norm": 0.12890625,
      "learning_rate": 1e-06,
      "loss": -0.0045,
      "num_tokens": 868997648.0,
      "reward": 0.5189732313156128,
      "reward_std": 0.2004987597465515,
      "rewards/verify_math_reward/mean": 0.5189732313156128,
      "rewards/verify_math_reward/std": 0.49991893768310547,
      "step": 1492
    },
    {
      "clip_ratio/high_max": 0.0015132897642615717,
      "clip_ratio/high_mean": 0.0004594343321286942,
      "clip_ratio/low_mean": 0.0003326939034877796,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007921282267489005,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1969.0,
      "completions/mean_length": 571.8192138671875,
      "completions/mean_terminated_length": 523.9796752929688,
      "completions/min_length": 32.0,
      "completions/min_terminated_length": 32.0,
      "epoch": 13.951881014873141,
      "grad_norm": 0.1279296875,
      "learning_rate": 1e-06,
      "loss": -0.0025,
      "num_tokens": 869548150.0,
      "reward": 0.6071428656578064,
      "reward_std": 0.20824255049228668,
      "rewards/verify_math_reward/mean": 0.6071428656578064,
      "rewards/verify_math_reward/std": 0.48865827918052673,
      "step": 1493
    },
    {
      "clip_ratio/high_max": 0.0014417561378650134,
      "clip_ratio/high_mean": 0.00041261564365413506,
      "clip_ratio/low_mean": 0.0003567041931091808,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007693198467677576,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2938.0,
      "completions/mean_length": 661.607177734375,
      "completions/mean_terminated_length": 595.1854248046875,
      "completions/min_length": 80.0,
      "completions/min_terminated_length": 80.0,
      "epoch": 13.961213181685622,
      "grad_norm": 0.11328125,
      "learning_rate": 1e-06,
      "loss": 0.0008,
      "num_tokens": 870167582.0,
      "reward": 0.5345982313156128,
      "reward_std": 0.20200368762016296,
      "rewards/verify_math_reward/mean": 0.5345982313156128,
      "rewards/verify_math_reward/std": 0.4990801215171814,
      "step": 1494
    },
    {
      "clip_ratio/high_max": 0.0016464744785480434,
      "clip_ratio/high_mean": 0.0004716879473107838,
      "clip_ratio/low_mean": 0.00029222255068361846,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007639104969712207,
      "completions/clipped_ratio": 0.029017857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4033.0,
      "completions/mean_length": 694.708740234375,
      "completions/mean_terminated_length": 593.0609130859375,
      "completions/min_length": 18.0,
      "completions/min_terminated_length": 18.0,
      "epoch": 13.970545348498105,
      "grad_norm": 0.1142578125,
      "learning_rate": 1e-06,
      "loss": 0.0054,
      "num_tokens": 870764633.0,
      "reward": 0.4955357313156128,
      "reward_std": 0.18475833535194397,
      "rewards/verify_math_reward/mean": 0.4955357015132904,
      "rewards/verify_math_reward/std": 0.500259280204773,
      "step": 1495
    },
    {
      "clip_ratio/high_max": 0.0019481853469187627,
      "clip_ratio/high_mean": 0.000563488020816294,
      "clip_ratio/low_mean": 0.00029821233408711123,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008617003582003235,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2294.0,
      "completions/mean_length": 565.5670166015625,
      "completions/mean_terminated_length": 517.642578125,
      "completions/min_length": 26.0,
      "completions/min_terminated_length": 26.0,
      "epoch": 13.979877515310585,
      "grad_norm": 0.126953125,
      "learning_rate": 1e-06,
      "loss": -0.0048,
      "num_tokens": 871307429.0,
      "reward": 0.5993303656578064,
      "reward_std": 0.1954641044139862,
      "rewards/verify_math_reward/mean": 0.5993303656578064,
      "rewards/verify_math_reward/std": 0.49030786752700806,
      "step": 1496
    },
    {
      "clip_ratio/high_max": 0.0011723246516339714,
      "clip_ratio/high_mean": 0.00036326770009509346,
      "clip_ratio/low_mean": 0.00035125915496792004,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007145268627937185,
      "completions/clipped_ratio": 0.024553571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3855.0,
      "completions/mean_length": 714.7020263671875,
      "completions/mean_terminated_length": 629.5892333984375,
      "completions/min_length": 95.0,
      "completions/min_terminated_length": 95.0,
      "epoch": 13.989209682123068,
      "grad_norm": 0.12060546875,
      "learning_rate": 1e-06,
      "loss": 0.0037,
      "num_tokens": 871950738.0,
      "reward": 0.4754464626312256,
      "reward_std": 0.2298799455165863,
      "rewards/verify_math_reward/mean": 0.4754464328289032,
      "rewards/verify_math_reward/std": 0.4996756315231323,
      "step": 1497
    },
    {
      "clip_ratio/high_max": 0.00160207767112297,
      "clip_ratio/high_mean": 0.0005525343447061459,
      "clip_ratio/low_mean": 0.0003215502217699395,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008740845696593169,
      "completions/clipped_ratio": 0.017045454545454586,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1851.0,
      "completions/mean_length": 629.821044921875,
      "completions/mean_terminated_length": 569.7138671875,
      "completions/min_length": 184.0,
      "completions/min_terminated_length": 184.0,
      "epoch": 13.998541848935549,
      "grad_norm": 0.12451171875,
      "learning_rate": 1e-06,
      "loss": -0.0063,
      "num_tokens": 872546118.0,
      "reward": 0.6037946939468384,
      "reward_std": 0.22375266253948212,
      "rewards/verify_math_reward/mean": 0.6037946343421936,
      "rewards/verify_math_reward/std": 0.48938122391700745,
      "step": 1498
    },
    {
      "clip_ratio/high_max": 0.0019307765560370171,
      "clip_ratio/high_mean": 0.0005768025667975962,
      "clip_ratio/low_mean": 0.00034231782956339885,
      "clip_ratio/low_min": 9.4126507974579e-06,
      "clip_ratio/region_mean": 0.0009191203935188241,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3810.0,
      "completions/mean_length": 612.2846069335938,
      "completions/mean_terminated_length": 568.9841918945312,
      "completions/min_length": 135.0,
      "completions/min_terminated_length": 135.0,
      "epoch": 14.009332166812483,
      "grad_norm": 0.13671875,
      "learning_rate": 1e-06,
      "loss": 0.0176,
      "num_tokens": 873137581.0,
      "reward": 0.494419664144516,
      "reward_std": 0.21564865112304688,
      "rewards/verify_math_reward/mean": 0.4944196343421936,
      "rewards/verify_math_reward/std": 0.5002480745315552,
      "step": 1499
    },
    {
      "clip_ratio/high_max": 0.0016640719213683042,
      "clip_ratio/high_mean": 0.0005200427185627632,
      "clip_ratio/low_mean": 0.0003144358354347787,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008344785464942106,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3013.0,
      "completions/mean_length": 580.2310791015625,
      "completions/mean_terminated_length": 524.4251708984375,
      "completions/min_length": 112.0,
      "completions/min_terminated_length": 112.0,
      "epoch": 14.018664333624963,
      "grad_norm": 0.1416015625,
      "learning_rate": 1e-06,
      "loss": -0.0141,
      "num_tokens": 873695812.0,
      "reward": 0.6071428656578064,
      "reward_std": 0.22702956199645996,
      "rewards/verify_math_reward/mean": 0.6071428656578064,
      "rewards/verify_math_reward/std": 0.48865827918052673,
      "step": 1500
    },
    {
      "clip_ratio/high_max": 0.0015412053862746689,
      "clip_ratio/high_mean": 0.0004509609387923774,
      "clip_ratio/low_mean": 0.00028261597503842495,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007335769096243894,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2156.0,
      "completions/mean_length": 617.919677734375,
      "completions/mean_terminated_length": 586.5855712890625,
      "completions/min_length": 152.0,
      "completions/min_terminated_length": 152.0,
      "epoch": 14.027996500437446,
      "grad_norm": 0.1162109375,
      "learning_rate": 1e-06,
      "loss": 0.0079,
      "num_tokens": 874315228.0,
      "reward": 0.5011160969734192,
      "reward_std": 0.19643978774547577,
      "rewards/verify_math_reward/mean": 0.5011160969734192,
      "rewards/verify_math_reward/std": 0.5002780556678772,
      "step": 1501
    },
    {
      "clip_ratio/high_max": 0.001706761853711214,
      "clip_ratio/high_mean": 0.0005059305342456355,
      "clip_ratio/low_mean": 0.00043664882434768515,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009425793559785234,
      "completions/clipped_ratio": 0.025669642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2793.0,
      "completions/mean_length": 664.1585083007812,
      "completions/mean_terminated_length": 573.7434692382812,
      "completions/min_length": 116.0,
      "completions/min_terminated_length": 116.0,
      "epoch": 14.037328667249927,
      "grad_norm": 0.13671875,
      "learning_rate": 1e-06,
      "loss": -0.0016,
      "num_tokens": 874897234.0,
      "reward": 0.5502232313156128,
      "reward_std": 0.2329639196395874,
      "rewards/verify_math_reward/mean": 0.5502232313156128,
      "rewards/verify_math_reward/std": 0.49774909019470215,
      "step": 1502
    },
    {
      "clip_ratio/high_max": 0.0017753144729795167,
      "clip_ratio/high_mean": 0.0005143590854004287,
      "clip_ratio/low_mean": 0.00023039474388042436,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007447538259839348,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2837.0,
      "completions/mean_length": 608.5089721679688,
      "completions/mean_terminated_length": 553.1519165039062,
      "completions/min_length": 124.0,
      "completions/min_terminated_length": 124.0,
      "epoch": 14.04666083406241,
      "grad_norm": 0.12109375,
      "learning_rate": 1e-06,
      "loss": 0.0035,
      "num_tokens": 875475194.0,
      "reward": 0.559151828289032,
      "reward_std": 0.20069055259227753,
      "rewards/verify_math_reward/mean": 0.5591517686843872,
      "rewards/verify_math_reward/std": 0.496766060590744,
      "step": 1503
    },
    {
      "clip_ratio/high_max": 0.001343896225989738,
      "clip_ratio/high_mean": 0.000393055019003441,
      "clip_ratio/low_mean": 0.00036993329968026956,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007629883161826001,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2906.0,
      "completions/mean_length": 617.0301513671875,
      "completions/mean_terminated_length": 549.7462768554688,
      "completions/min_length": 124.0,
      "completions/min_terminated_length": 124.0,
      "epoch": 14.05599300087489,
      "grad_norm": 0.1240234375,
      "learning_rate": 1e-06,
      "loss": -0.0005,
      "num_tokens": 876043581.0,
      "reward": 0.5524553656578064,
      "reward_std": 0.19246003031730652,
      "rewards/verify_math_reward/mean": 0.5524553656578064,
      "rewards/verify_math_reward/std": 0.49751853942871094,
      "step": 1504
    },
    {
      "clip_ratio/high_max": 0.0017084121882362524,
      "clip_ratio/high_mean": 0.0005181743806588202,
      "clip_ratio/low_mean": 0.00025313037485830137,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007713047525612637,
      "completions/clipped_ratio": 0.024553571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2929.0,
      "completions/mean_length": 654.3326416015625,
      "completions/mean_terminated_length": 567.7001953125,
      "completions/min_length": 167.0,
      "completions/min_terminated_length": 167.0,
      "epoch": 14.065325167687373,
      "grad_norm": 0.1259765625,
      "learning_rate": 1e-06,
      "loss": -0.0192,
      "num_tokens": 876627191.0,
      "reward": 0.5613839626312256,
      "reward_std": 0.19340112805366516,
      "rewards/verify_math_reward/mean": 0.5613839030265808,
      "rewards/verify_math_reward/std": 0.496494859457016,
      "step": 1505
    },
    {
      "clip_ratio/high_max": 0.0014563648728653789,
      "clip_ratio/high_mean": 0.0003780712620482518,
      "clip_ratio/low_mean": 0.0002658752102888684,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006439464754066648,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4059.0,
      "completions/mean_length": 615.8795166015625,
      "completions/mean_terminated_length": 548.5733642578125,
      "completions/min_length": 134.0,
      "completions/min_terminated_length": 134.0,
      "epoch": 14.074657334499854,
      "grad_norm": 0.1142578125,
      "learning_rate": 1e-06,
      "loss": -0.0019,
      "num_tokens": 877204155.0,
      "reward": 0.5837053656578064,
      "reward_std": 0.15139120817184448,
      "rewards/verify_math_reward/mean": 0.5837053656578064,
      "rewards/verify_math_reward/std": 0.49321892857551575,
      "step": 1506
    },
    {
      "clip_ratio/high_max": 0.001548316153275664,
      "clip_ratio/high_mean": 0.0005630202476822888,
      "clip_ratio/low_mean": 0.0003481325754819409,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009111528224821086,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3035.0,
      "completions/mean_length": 663.372802734375,
      "completions/mean_terminated_length": 585.0022583007812,
      "completions/min_length": 110.0,
      "completions/min_terminated_length": 110.0,
      "epoch": 14.083989501312336,
      "grad_norm": 0.1357421875,
      "learning_rate": 1e-06,
      "loss": -0.006,
      "num_tokens": 877804793.0,
      "reward": 0.5736607313156128,
      "reward_std": 0.238377645611763,
      "rewards/verify_math_reward/mean": 0.5736607313156128,
      "rewards/verify_math_reward/std": 0.4948205351829529,
      "step": 1507
    },
    {
      "clip_ratio/high_max": 0.0017636547017900739,
      "clip_ratio/high_mean": 0.0005906411392970767,
      "clip_ratio/low_mean": 0.00040212186763710633,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009927630071615567,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3345.0,
      "completions/mean_length": 561.1127319335938,
      "completions/mean_terminated_length": 529.2669067382812,
      "completions/min_length": 16.0,
      "completions/min_terminated_length": 16.0,
      "epoch": 14.093321668124817,
      "grad_norm": 0.150390625,
      "learning_rate": 1e-06,
      "loss": 0.0002,
      "num_tokens": 878362686.0,
      "reward": 0.5848214626312256,
      "reward_std": 0.2366524040699005,
      "rewards/verify_math_reward/mean": 0.5848214030265808,
      "rewards/verify_math_reward/std": 0.49302801489830017,
      "step": 1508
    },
    {
      "clip_ratio/high_max": 0.001944740686667501,
      "clip_ratio/high_mean": 0.0005664587365572515,
      "clip_ratio/low_mean": 0.00035113504850414756,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009175937939289724,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3527.0,
      "completions/mean_length": 631.0390625,
      "completions/mean_terminated_length": 576.0396728515625,
      "completions/min_length": 149.0,
      "completions/min_terminated_length": 149.0,
      "epoch": 14.1026538349373,
      "grad_norm": 0.123046875,
      "learning_rate": 1e-06,
      "loss": 0.0212,
      "num_tokens": 878962185.0,
      "reward": 0.5814732313156128,
      "reward_std": 0.2064424306154251,
      "rewards/verify_math_reward/mean": 0.5814732313156128,
      "rewards/verify_math_reward/std": 0.4935929775238037,
      "step": 1509
    },
    {
      "clip_ratio/high_max": 0.0017148217593785375,
      "clip_ratio/high_mean": 0.000559840604864803,
      "clip_ratio/low_mean": 0.0002501155640857178,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008099561564449687,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1773.0,
      "completions/mean_length": 596.7522583007812,
      "completions/mean_terminated_length": 545.2344360351562,
      "completions/min_length": 116.0,
      "completions/min_terminated_length": 116.0,
      "epoch": 14.11198600174978,
      "grad_norm": 0.1328125,
      "learning_rate": 1e-06,
      "loss": -0.0018,
      "num_tokens": 879525859.0,
      "reward": 0.6428571939468384,
      "reward_std": 0.21237428486347198,
      "rewards/verify_math_reward/mean": 0.6428571343421936,
      "rewards/verify_math_reward/std": 0.4794250428676605,
      "step": 1510
    },
    {
      "clip_ratio/high_max": 0.0014186469570631743,
      "clip_ratio/high_mean": 0.000417981873511053,
      "clip_ratio/low_mean": 0.0004800662431989622,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008980481243270333,
      "completions/clipped_ratio": 0.021205357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3425.0,
      "completions/mean_length": 676.396240234375,
      "completions/mean_terminated_length": 602.311279296875,
      "completions/min_length": 112.0,
      "completions/min_terminated_length": 112.0,
      "epoch": 14.121318168562263,
      "grad_norm": 0.1181640625,
      "learning_rate": 1e-06,
      "loss": 0.0105,
      "num_tokens": 880137638.0,
      "reward": 0.527901828289032,
      "reward_std": 0.22485104203224182,
      "rewards/verify_math_reward/mean": 0.5279017686843872,
      "rewards/verify_math_reward/std": 0.49949970841407776,
      "step": 1511
    },
    {
      "clip_ratio/high_max": 0.0020417628329596482,
      "clip_ratio/high_mean": 0.0006389721718278452,
      "clip_ratio/low_mean": 0.0003225239290713944,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000961496096351766,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3525.0,
      "completions/mean_length": 563.3214721679688,
      "completions/mean_terminated_length": 523.44921875,
      "completions/min_length": 20.0,
      "completions/min_terminated_length": 20.0,
      "epoch": 14.130650335374744,
      "grad_norm": 0.1376953125,
      "learning_rate": 1e-06,
      "loss": -0.0049,
      "num_tokens": 880681734.0,
      "reward": 0.606026828289032,
      "reward_std": 0.21507565677165985,
      "rewards/verify_math_reward/mean": 0.6060267686843872,
      "rewards/verify_math_reward/std": 0.48890194296836853,
      "step": 1512
    },
    {
      "clip_ratio/high_max": 0.0017885157867567614,
      "clip_ratio/high_mean": 0.0005276078309179866,
      "clip_ratio/low_mean": 0.0003105719601990131,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008381797870242735,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3106.0,
      "completions/mean_length": 590.6138916015625,
      "completions/mean_terminated_length": 534.9727783203125,
      "completions/min_length": 16.0,
      "completions/min_terminated_length": 16.0,
      "epoch": 14.139982502187227,
      "grad_norm": 0.130859375,
      "learning_rate": 1e-06,
      "loss": 0.0102,
      "num_tokens": 881247428.0,
      "reward": 0.5223214626312256,
      "reward_std": 0.179951474070549,
      "rewards/verify_math_reward/mean": 0.5223214030265808,
      "rewards/verify_math_reward/std": 0.49978047609329224,
      "step": 1513
    },
    {
      "clip_ratio/high_max": 0.0013858373376933741,
      "clip_ratio/high_mean": 0.0003639224679545805,
      "clip_ratio/low_mean": 0.0002737126949341473,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000637635162775041,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3457.0,
      "completions/mean_length": 661.2533569335938,
      "completions/mean_terminated_length": 594.8247680664062,
      "completions/min_length": 136.0,
      "completions/min_terminated_length": 136.0,
      "epoch": 14.149314668999708,
      "grad_norm": 0.11279296875,
      "learning_rate": 1e-06,
      "loss": -0.0007,
      "num_tokens": 881864447.0,
      "reward": 0.5524553656578064,
      "reward_std": 0.15703065693378448,
      "rewards/verify_math_reward/mean": 0.5524553656578064,
      "rewards/verify_math_reward/std": 0.49751853942871094,
      "step": 1514
    },
    {
      "clip_ratio/high_max": 0.001705503991615842,
      "clip_ratio/high_mean": 0.0005129117788555959,
      "clip_ratio/low_mean": 0.00037262371779434034,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008855355026753386,
      "completions/clipped_ratio": 0.024553571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3994.0,
      "completions/mean_length": 697.1250610351562,
      "completions/mean_terminated_length": 611.5697631835938,
      "completions/min_length": 126.0,
      "completions/min_terminated_length": 126.0,
      "epoch": 14.15864683581219,
      "grad_norm": 0.1298828125,
      "learning_rate": 1e-06,
      "loss": -0.0098,
      "num_tokens": 882489399.0,
      "reward": 0.515625,
      "reward_std": 0.2118447721004486,
      "rewards/verify_math_reward/mean": 0.515625,
      "rewards/verify_math_reward/std": 0.5000349283218384,
      "step": 1515
    },
    {
      "clip_ratio/high_max": 0.0014235807529985323,
      "clip_ratio/high_mean": 0.00043980203270166385,
      "clip_ratio/low_mean": 0.00032022949631027586,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007600315229865373,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3874.0,
      "completions/mean_length": 579.6495971679688,
      "completions/mean_terminated_length": 543.9706420898438,
      "completions/min_length": 83.0,
      "completions/min_terminated_length": 83.0,
      "epoch": 14.167979002624673,
      "grad_norm": 0.115234375,
      "learning_rate": 1e-06,
      "loss": 0.0084,
      "num_tokens": 883061165.0,
      "reward": 0.6305803656578064,
      "reward_std": 0.17577557265758514,
      "rewards/verify_math_reward/mean": 0.6305803656578064,
      "rewards/verify_math_reward/std": 0.48291724920272827,
      "step": 1516
    },
    {
      "clip_ratio/high_max": 0.001512436803750461,
      "clip_ratio/high_mean": 0.0004945711592654334,
      "clip_ratio/low_mean": 0.00040261408162223233,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008971852421382209,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2490.0,
      "completions/mean_length": 557.388427734375,
      "completions/mean_terminated_length": 525.509033203125,
      "completions/min_length": 83.0,
      "completions/min_terminated_length": 83.0,
      "epoch": 14.177311169437154,
      "grad_norm": 0.142578125,
      "learning_rate": 1e-06,
      "loss": 0.005,
      "num_tokens": 883616009.0,
      "reward": 0.5647321939468384,
      "reward_std": 0.24862739443778992,
      "rewards/verify_math_reward/mean": 0.5647321343421936,
      "rewards/verify_math_reward/std": 0.49606895446777344,
      "step": 1517
    },
    {
      "clip_ratio/high_max": 0.0017349476565868827,
      "clip_ratio/high_mean": 0.00048150656016332505,
      "clip_ratio/low_mean": 0.0003248850423460681,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008063916006904037,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2785.0,
      "completions/mean_length": 644.3928833007812,
      "completions/mean_terminated_length": 589.60546875,
      "completions/min_length": 3.0,
      "completions/min_terminated_length": 3.0,
      "epoch": 14.186643336249636,
      "grad_norm": 0.1259765625,
      "learning_rate": 1e-06,
      "loss": -0.0009,
      "num_tokens": 884221481.0,
      "reward": 0.5636160969734192,
      "reward_std": 0.21388381719589233,
      "rewards/verify_math_reward/mean": 0.5636160969734192,
      "rewards/verify_math_reward/std": 0.49621346592903137,
      "step": 1518
    },
    {
      "clip_ratio/high_max": 0.0019056037363043288,
      "clip_ratio/high_mean": 0.0006080793355067726,
      "clip_ratio/low_mean": 0.00029451267869262665,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009025920026033418,
      "completions/clipped_ratio": 0.0323660714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3892.0,
      "completions/mean_length": 676.5111694335938,
      "completions/mean_terminated_length": 562.1337890625,
      "completions/min_length": 91.0,
      "completions/min_terminated_length": 91.0,
      "epoch": 14.195975503062117,
      "grad_norm": 0.123046875,
      "learning_rate": 1e-06,
      "loss": -0.0178,
      "num_tokens": 884801843.0,
      "reward": 0.559151828289032,
      "reward_std": 0.20891445875167847,
      "rewards/verify_math_reward/mean": 0.5591517686843872,
      "rewards/verify_math_reward/std": 0.496766060590744,
      "step": 1519
    },
    {
      "clip_ratio/high_max": 0.0019129214106214931,
      "clip_ratio/high_mean": 0.000607894981840218,
      "clip_ratio/low_mean": 0.00033826994956598355,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009461649306103936,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3500.0,
      "completions/mean_length": 575.0569458007812,
      "completions/mean_terminated_length": 535.317138671875,
      "completions/min_length": 106.0,
      "completions/min_terminated_length": 106.0,
      "epoch": 14.2053076698746,
      "grad_norm": 0.140625,
      "learning_rate": 1e-06,
      "loss": 0.0034,
      "num_tokens": 885365446.0,
      "reward": 0.5625,
      "reward_std": 0.24115532636642456,
      "rewards/verify_math_reward/mean": 0.5625,
      "rewards/verify_math_reward/std": 0.49635544419288635,
      "step": 1520
    },
    {
      "clip_ratio/high_max": 0.0014671868520963471,
      "clip_ratio/high_mean": 0.000437759687201833,
      "clip_ratio/low_mean": 0.0003206332690979252,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007583929705106129,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2606.0,
      "completions/mean_length": 607.2020263671875,
      "completions/mean_terminated_length": 547.8013916015625,
      "completions/min_length": 146.0,
      "completions/min_terminated_length": 146.0,
      "epoch": 14.21463983668708,
      "grad_norm": 0.1279296875,
      "learning_rate": 1e-06,
      "loss": 0.0074,
      "num_tokens": 885933395.0,
      "reward": 0.5580357313156128,
      "reward_std": 0.1994110643863678,
      "rewards/verify_math_reward/mean": 0.5580357313156128,
      "rewards/verify_math_reward/std": 0.49689781665802,
      "step": 1521
    },
    {
      "clip_ratio/high_max": 0.0015295231569325551,
      "clip_ratio/high_mean": 0.00046846458701566007,
      "clip_ratio/low_mean": 0.00036087475041313155,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000829339332995005,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2890.0,
      "completions/mean_length": 636.3002319335938,
      "completions/mean_terminated_length": 585.3646240234375,
      "completions/min_length": 83.0,
      "completions/min_terminated_length": 83.0,
      "epoch": 14.223972003499563,
      "grad_norm": 0.12353515625,
      "learning_rate": 1e-06,
      "loss": 0.0199,
      "num_tokens": 886531808.0,
      "reward": 0.5401785969734192,
      "reward_std": 0.22315604984760284,
      "rewards/verify_math_reward/mean": 0.5401785969734192,
      "rewards/verify_math_reward/std": 0.49866142868995667,
      "step": 1522
    },
    {
      "clip_ratio/high_max": 0.0016179913473024499,
      "clip_ratio/high_mean": 0.00044897663519805064,
      "clip_ratio/low_mean": 0.0004330608826421667,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008820375146569859,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3295.0,
      "completions/mean_length": 633.3002319335938,
      "completions/mean_terminated_length": 594.2178344726562,
      "completions/min_length": 36.0,
      "completions/min_terminated_length": 36.0,
      "epoch": 14.233304170312044,
      "grad_norm": 0.125,
      "learning_rate": 1e-06,
      "loss": 0.0078,
      "num_tokens": 887146581.0,
      "reward": 0.527901828289032,
      "reward_std": 0.2144811451435089,
      "rewards/verify_math_reward/mean": 0.5279017686843872,
      "rewards/verify_math_reward/std": 0.49949970841407776,
      "step": 1523
    },
    {
      "clip_ratio/high_max": 0.0014640017852798337,
      "clip_ratio/high_mean": 0.0003699388769291545,
      "clip_ratio/low_mean": 0.0003018170318682678,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006717559081153013,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3755.0,
      "completions/mean_length": 626.224365234375,
      "completions/mean_terminated_length": 583.09716796875,
      "completions/min_length": 111.0,
      "completions/min_terminated_length": 111.0,
      "epoch": 14.242636337124527,
      "grad_norm": 0.11328125,
      "learning_rate": 1e-06,
      "loss": 0.0039,
      "num_tokens": 887751078.0,
      "reward": 0.5546875,
      "reward_std": 0.1699160635471344,
      "rewards/verify_math_reward/mean": 0.5546875,
      "rewards/verify_math_reward/std": 0.4972778558731079,
      "step": 1524
    },
    {
      "clip_ratio/high_max": 0.0020246311214577872,
      "clip_ratio/high_mean": 0.0006806456217418599,
      "clip_ratio/low_mean": 0.00035072735158792057,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.001031372984925838,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3654.0,
      "completions/mean_length": 604.34375,
      "completions/mean_terminated_length": 564.9345703125,
      "completions/min_length": 129.0,
      "completions/min_terminated_length": 129.0,
      "epoch": 14.251968503937007,
      "grad_norm": 0.140625,
      "learning_rate": 1e-06,
      "loss": 0.0067,
      "num_tokens": 888352490.0,
      "reward": 0.59375,
      "reward_std": 0.24180610477924347,
      "rewards/verify_math_reward/mean": 0.59375,
      "rewards/verify_math_reward/std": 0.4914066195487976,
      "step": 1525
    },
    {
      "clip_ratio/high_max": 0.0014662166013295064,
      "clip_ratio/high_mean": 0.0004219607668574099,
      "clip_ratio/low_mean": 0.0004305838472191681,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008525446137355175,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3986.0,
      "completions/mean_length": 604.0703125,
      "completions/mean_terminated_length": 552.6602172851562,
      "completions/min_length": 105.0,
      "completions/min_terminated_length": 105.0,
      "epoch": 14.26130067074949,
      "grad_norm": 0.1298828125,
      "learning_rate": 1e-06,
      "loss": 0.008,
      "num_tokens": 888930729.0,
      "reward": 0.5680803656578064,
      "reward_std": 0.21996724605560303,
      "rewards/verify_math_reward/mean": 0.5680803656578064,
      "rewards/verify_math_reward/std": 0.4956200420856476,
      "step": 1526
    },
    {
      "clip_ratio/high_max": 0.0015160213315539295,
      "clip_ratio/high_mean": 0.0004319961130931915,
      "clip_ratio/low_mean": 0.00036158949774289795,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007935856101539684,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3891.0,
      "completions/mean_length": 614.4475708007812,
      "completions/mean_terminated_length": 571.1740112304688,
      "completions/min_length": 115.0,
      "completions/min_terminated_length": 115.0,
      "epoch": 14.27063283756197,
      "grad_norm": 0.1376953125,
      "learning_rate": 1e-06,
      "loss": -0.0038,
      "num_tokens": 889534034.0,
      "reward": 0.520089328289032,
      "reward_std": 0.21192918717861176,
      "rewards/verify_math_reward/mean": 0.5200892686843872,
      "rewards/verify_math_reward/std": 0.4998753070831299,
      "step": 1527
    },
    {
      "clip_ratio/high_max": 0.001307474672103126,
      "clip_ratio/high_mean": 0.0003457803753690314,
      "clip_ratio/low_mean": 0.000377805635025652,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007235860175569542,
      "completions/clipped_ratio": 0.03125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2879.0,
      "completions/mean_length": 714.0904541015625,
      "completions/mean_terminated_length": 604.9965209960938,
      "completions/min_length": 126.0,
      "completions/min_terminated_length": 126.0,
      "epoch": 14.279965004374453,
      "grad_norm": 0.123046875,
      "learning_rate": 1e-06,
      "loss": -0.0014,
      "num_tokens": 890151883.0,
      "reward": 0.486607164144516,
      "reward_std": 0.20978349447250366,
      "rewards/verify_math_reward/mean": 0.4866071343421936,
      "rewards/verify_math_reward/std": 0.500099778175354,
      "step": 1528
    },
    {
      "clip_ratio/high_max": 0.0016604755001026206,
      "clip_ratio/high_mean": 0.0005210502342833934,
      "clip_ratio/low_mean": 0.0003724878737330073,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008935381174524082,
      "completions/clipped_ratio": 0.0200892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3635.0,
      "completions/mean_length": 674.6908569335938,
      "completions/mean_terminated_length": 604.5501098632812,
      "completions/min_length": 18.0,
      "completions/min_terminated_length": 18.0,
      "epoch": 14.289297171186934,
      "grad_norm": 0.1279296875,
      "learning_rate": 1e-06,
      "loss": 0.0011,
      "num_tokens": 890763398.0,
      "reward": 0.4977678656578064,
      "reward_std": 0.23904915153980255,
      "rewards/verify_math_reward/mean": 0.4977678656578064,
      "rewards/verify_math_reward/std": 0.5002742409706116,
      "step": 1529
    },
    {
      "clip_ratio/high_max": 0.0019644675176095916,
      "clip_ratio/high_mean": 0.0006172429698381166,
      "clip_ratio/low_mean": 0.0003118324134447903,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009290753714594757,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2008.0,
      "completions/mean_length": 565.693115234375,
      "completions/mean_terminated_length": 525.84765625,
      "completions/min_length": 136.0,
      "completions/min_terminated_length": 136.0,
      "epoch": 14.298629337999417,
      "grad_norm": 0.1337890625,
      "learning_rate": 1e-06,
      "loss": -0.0029,
      "num_tokens": 891301867.0,
      "reward": 0.6640625,
      "reward_std": 0.224068284034729,
      "rewards/verify_math_reward/mean": 0.6640625,
      "rewards/verify_math_reward/std": 0.4725809693336487,
      "step": 1530
    },
    {
      "clip_ratio/high_max": 0.0016890158858586801,
      "clip_ratio/high_mean": 0.0005197786613280186,
      "clip_ratio/low_mean": 0.0003198698020696611,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008396484690820216,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4091.0,
      "completions/mean_length": 623.0580444335938,
      "completions/mean_terminated_length": 567.9320068359375,
      "completions/min_length": 101.0,
      "completions/min_terminated_length": 101.0,
      "epoch": 14.307961504811898,
      "grad_norm": 0.1201171875,
      "learning_rate": 1e-06,
      "loss": 0.0037,
      "num_tokens": 891890287.0,
      "reward": 0.5580357313156128,
      "reward_std": 0.20061568915843964,
      "rewards/verify_math_reward/mean": 0.5580357313156128,
      "rewards/verify_math_reward/std": 0.49689781665802,
      "step": 1531
    },
    {
      "clip_ratio/high_max": 0.0018087048829329433,
      "clip_ratio/high_mean": 0.0005703125470972736,
      "clip_ratio/low_mean": 0.0003972547054900133,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009675672645244049,
      "completions/clipped_ratio": 0.0078125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3859.0,
      "completions/mean_length": 586.3449096679688,
      "completions/mean_terminated_length": 558.7098388671875,
      "completions/min_length": 55.0,
      "completions/min_terminated_length": 55.0,
      "epoch": 14.31729367162438,
      "grad_norm": 0.1318359375,
      "learning_rate": 1e-06,
      "loss": 0.0187,
      "num_tokens": 892480324.0,
      "reward": 0.520089328289032,
      "reward_std": 0.218841090798378,
      "rewards/verify_math_reward/mean": 0.5200892686843872,
      "rewards/verify_math_reward/std": 0.4998753070831299,
      "step": 1532
    },
    {
      "clip_ratio/high_max": 0.001723562831102754,
      "clip_ratio/high_mean": 0.000552482175521618,
      "clip_ratio/low_mean": 0.00030707415839970054,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008595563467679312,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3111.0,
      "completions/mean_length": 598.7779541015625,
      "completions/mean_terminated_length": 539.2338256835938,
      "completions/min_length": 151.0,
      "completions/min_terminated_length": 151.0,
      "epoch": 14.326625838436861,
      "grad_norm": 0.13671875,
      "learning_rate": 1e-06,
      "loss": 0.0111,
      "num_tokens": 893035453.0,
      "reward": 0.574776828289032,
      "reward_std": 0.21271267533302307,
      "rewards/verify_math_reward/mean": 0.5747767686843872,
      "rewards/verify_math_reward/std": 0.49465295672416687,
      "step": 1533
    },
    {
      "clip_ratio/high_max": 0.0015161935980358976,
      "clip_ratio/high_mean": 0.000461618922145135,
      "clip_ratio/low_mean": 0.00037800682150646026,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008396257480853819,
      "completions/clipped_ratio": 0.0267857142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3858.0,
      "completions/mean_length": 694.4520263671875,
      "completions/mean_terminated_length": 600.8314208984375,
      "completions/min_length": 131.0,
      "completions/min_terminated_length": 131.0,
      "epoch": 14.335958005249344,
      "grad_norm": 0.1279296875,
      "learning_rate": 1e-06,
      "loss": -0.001,
      "num_tokens": 893647722.0,
      "reward": 0.5167410969734192,
      "reward_std": 0.20857815444469452,
      "rewards/verify_math_reward/mean": 0.5167410969734192,
      "rewards/verify_math_reward/std": 0.4999987483024597,
      "step": 1534
    },
    {
      "clip_ratio/high_max": 0.0014743680394531111,
      "clip_ratio/high_mean": 0.0004271749584177087,
      "clip_ratio/low_mean": 0.00032111649215949,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007482914534193696,
      "completions/clipped_ratio": 0.0234375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3724.0,
      "completions/mean_length": 653.700927734375,
      "completions/mean_terminated_length": 571.085693359375,
      "completions/min_length": 126.0,
      "completions/min_terminated_length": 126.0,
      "epoch": 14.345290172061826,
      "grad_norm": 0.1240234375,
      "learning_rate": 1e-06,
      "loss": -0.0067,
      "num_tokens": 894238542.0,
      "reward": 0.5424107313156128,
      "reward_std": 0.20249292254447937,
      "rewards/verify_math_reward/mean": 0.5424107313156128,
      "rewards/verify_math_reward/std": 0.4984763562679291,
      "step": 1535
    },
    {
      "clip_ratio/high_max": 0.001402763675287133,
      "clip_ratio/high_mean": 0.00044309864460956305,
      "clip_ratio/low_mean": 0.00038792643181295716,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008310250668728258,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3323.0,
      "completions/mean_length": 640.7433471679688,
      "completions/mean_terminated_length": 573.9180908203125,
      "completions/min_length": 134.0,
      "completions/min_terminated_length": 134.0,
      "epoch": 14.354622338874307,
      "grad_norm": 0.12353515625,
      "learning_rate": 1e-06,
      "loss": 0.0083,
      "num_tokens": 894825952.0,
      "reward": 0.5502232313156128,
      "reward_std": 0.21447864174842834,
      "rewards/verify_math_reward/mean": 0.5502232313156128,
      "rewards/verify_math_reward/std": 0.49774909019470215,
      "step": 1536
    },
    {
      "clip_ratio/high_max": 0.0017578473380126525,
      "clip_ratio/high_mean": 0.0005143431053511449,
      "clip_ratio/low_mean": 0.00043021276769650285,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009445558580409852,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3419.0,
      "completions/mean_length": 627.75,
      "completions/mean_terminated_length": 584.641845703125,
      "completions/min_length": 154.0,
      "completions/min_terminated_length": 154.0,
      "epoch": 14.36395450568679,
      "grad_norm": 0.123046875,
      "learning_rate": 1e-06,
      "loss": -0.003,
      "num_tokens": 895442592.0,
      "reward": 0.4899553656578064,
      "reward_std": 0.22188794612884521,
      "rewards/verify_math_reward/mean": 0.4899553656578064,
      "rewards/verify_math_reward/std": 0.5001782774925232,
      "step": 1537
    },
    {
      "clip_ratio/high_max": 0.0017130379001173424,
      "clip_ratio/high_mean": 0.000581580497964751,
      "clip_ratio/low_mean": 0.00029908007115864166,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008806605637801113,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3451.0,
      "completions/mean_length": 632.888427734375,
      "completions/mean_terminated_length": 569.9227294921875,
      "completions/min_length": 96.0,
      "completions/min_terminated_length": 96.0,
      "epoch": 14.37328667249927,
      "grad_norm": 0.126953125,
      "learning_rate": 1e-06,
      "loss": -0.0136,
      "num_tokens": 896024572.0,
      "reward": 0.5491071939468384,
      "reward_std": 0.21320053935050964,
      "rewards/verify_math_reward/mean": 0.5491071343421936,
      "rewards/verify_math_reward/std": 0.49786055088043213,
      "step": 1538
    },
    {
      "clip_ratio/high_max": 0.0015094607515493408,
      "clip_ratio/high_mean": 0.00046719284432583663,
      "clip_ratio/low_mean": 0.0003417732586967759,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008089661032499862,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3947.0,
      "completions/mean_length": 604.482177734375,
      "completions/mean_terminated_length": 573.0270385742188,
      "completions/min_length": 86.0,
      "completions/min_terminated_length": 86.0,
      "epoch": 14.382618839311753,
      "grad_norm": 0.125,
      "learning_rate": 1e-06,
      "loss": 0.0192,
      "num_tokens": 896617996.0,
      "reward": 0.5446428656578064,
      "reward_std": 0.208957239985466,
      "rewards/verify_math_reward/mean": 0.5446428656578064,
      "rewards/verify_math_reward/std": 0.4982811510562897,
      "step": 1539
    },
    {
      "clip_ratio/high_max": 0.0017951807185454527,
      "clip_ratio/high_mean": 0.0005923280787101248,
      "clip_ratio/low_mean": 0.0004002520242920582,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000992580105048546,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2145.0,
      "completions/mean_length": 544.2745971679688,
      "completions/mean_terminated_length": 496.06109619140625,
      "completions/min_length": 130.0,
      "completions/min_terminated_length": 130.0,
      "epoch": 14.391951006124234,
      "grad_norm": 0.140625,
      "learning_rate": 1e-06,
      "loss": 0.0074,
      "num_tokens": 897139114.0,
      "reward": 0.5859375,
      "reward_std": 0.2293960303068161,
      "rewards/verify_math_reward/mean": 0.5859375,
      "rewards/verify_math_reward/std": 0.4928344786167145,
      "step": 1540
    },
    {
      "clip_ratio/high_max": 0.0014198419776221272,
      "clip_ratio/high_mean": 0.00038499406605296826,
      "clip_ratio/low_mean": 0.0003924798751313574,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007774739442538703,
      "completions/clipped_ratio": 0.024553571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2815.0,
      "completions/mean_length": 694.1395263671875,
      "completions/mean_terminated_length": 608.5091552734375,
      "completions/min_length": 144.0,
      "completions/min_terminated_length": 144.0,
      "epoch": 14.401283172936717,
      "grad_norm": 0.1220703125,
      "learning_rate": 1e-06,
      "loss": -0.0035,
      "num_tokens": 897765903.0,
      "reward": 0.527901828289032,
      "reward_std": 0.20493358373641968,
      "rewards/verify_math_reward/mean": 0.5279017686843872,
      "rewards/verify_math_reward/std": 0.49949970841407776,
      "step": 1541
    },
    {
      "clip_ratio/high_max": 0.0017295615270995768,
      "clip_ratio/high_mean": 0.0005449113423310337,
      "clip_ratio/low_mean": 0.0003918287829947076,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009367401244162465,
      "completions/clipped_ratio": 0.0078125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3664.0,
      "completions/mean_length": 595.9631958007812,
      "completions/mean_terminated_length": 568.4038696289062,
      "completions/min_length": 136.0,
      "completions/min_terminated_length": 136.0,
      "epoch": 14.410615339749198,
      "grad_norm": 0.134765625,
      "learning_rate": 1e-06,
      "loss": 0.0224,
      "num_tokens": 898351894.0,
      "reward": 0.5613839626312256,
      "reward_std": 0.24423283338546753,
      "rewards/verify_math_reward/mean": 0.5613839030265808,
      "rewards/verify_math_reward/std": 0.496494859457016,
      "step": 1542
    },
    {
      "clip_ratio/high_max": 0.001838911666709464,
      "clip_ratio/high_mean": 0.0005933181932959997,
      "clip_ratio/low_mean": 0.00037655709672890225,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009698753055999987,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3924.0,
      "completions/mean_length": 631.5647583007812,
      "completions/mean_terminated_length": 584.5362548828125,
      "completions/min_length": 24.0,
      "completions/min_terminated_length": 24.0,
      "epoch": 14.41994750656168,
      "grad_norm": 0.1279296875,
      "learning_rate": 1e-06,
      "loss": 0.0013,
      "num_tokens": 898950288.0,
      "reward": 0.59375,
      "reward_std": 0.21470825374126434,
      "rewards/verify_math_reward/mean": 0.59375,
      "rewards/verify_math_reward/std": 0.4914066195487976,
      "step": 1543
    },
    {
      "clip_ratio/high_max": 0.0012954257008459535,
      "clip_ratio/high_mean": 0.00034241293542436324,
      "clip_ratio/low_mean": 0.0003398050796477037,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006822180166636826,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3070.0,
      "completions/mean_length": 617.8616333007812,
      "completions/mean_terminated_length": 570.6470947265625,
      "completions/min_length": 122.0,
      "completions/min_terminated_length": 122.0,
      "epoch": 14.429279673374161,
      "grad_norm": 0.1171875,
      "learning_rate": 1e-06,
      "loss": 0.0068,
      "num_tokens": 899556868.0,
      "reward": 0.551339328289032,
      "reward_std": 0.1962871253490448,
      "rewards/verify_math_reward/mean": 0.5513392686843872,
      "rewards/verify_math_reward/std": 0.4976350665092468,
      "step": 1544
    },
    {
      "clip_ratio/high_max": 0.0013910240759287262,
      "clip_ratio/high_mean": 0.000438392488831596,
      "clip_ratio/low_mean": 0.0003174454025156592,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007558378993053338,
      "completions/clipped_ratio": 0.03125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3826.0,
      "completions/mean_length": 675.0267944335938,
      "completions/mean_terminated_length": 564.6727905273438,
      "completions/min_length": 105.0,
      "completions/min_terminated_length": 105.0,
      "epoch": 14.438611840186644,
      "grad_norm": 0.1279296875,
      "learning_rate": 1e-06,
      "loss": -0.0031,
      "num_tokens": 900136068.0,
      "reward": 0.5245535969734192,
      "reward_std": 0.21297159790992737,
      "rewards/verify_math_reward/mean": 0.5245535969734192,
      "rewards/verify_math_reward/std": 0.4996756911277771,
      "step": 1545
    },
    {
      "clip_ratio/high_max": 0.0017639706875343109,
      "clip_ratio/high_mean": 0.0005594972026301548,
      "clip_ratio/low_mean": 0.00042790657562363776,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.00098740377870854,
      "completions/clipped_ratio": 0.0234375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2865.0,
      "completions/mean_length": 656.6585083007812,
      "completions/mean_terminated_length": 574.1142578125,
      "completions/min_length": 128.0,
      "completions/min_terminated_length": 128.0,
      "epoch": 14.447944006999125,
      "grad_norm": 0.1337890625,
      "learning_rate": 1e-06,
      "loss": 0.0032,
      "num_tokens": 900721298.0,
      "reward": 0.515625,
      "reward_std": 0.22413566708564758,
      "rewards/verify_math_reward/mean": 0.515625,
      "rewards/verify_math_reward/std": 0.5000349283218384,
      "step": 1546
    },
    {
      "clip_ratio/high_max": 0.001680259183558519,
      "clip_ratio/high_mean": 0.0004201324322821165,
      "clip_ratio/low_mean": 0.00038109770162009227,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008012301468625083,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2832.0,
      "completions/mean_length": 589.318115234375,
      "completions/mean_terminated_length": 541.716064453125,
      "completions/min_length": 143.0,
      "completions/min_terminated_length": 143.0,
      "epoch": 14.457276173811607,
      "grad_norm": 0.1318359375,
      "learning_rate": 1e-06,
      "loss": 0.0075,
      "num_tokens": 901294247.0,
      "reward": 0.5379464626312256,
      "reward_std": 0.17548204958438873,
      "rewards/verify_math_reward/mean": 0.5379464030265808,
      "rewards/verify_math_reward/std": 0.4988364577293396,
      "step": 1547
    },
    {
      "clip_ratio/high_max": 0.0017964202270377427,
      "clip_ratio/high_mean": 0.0005530574530894228,
      "clip_ratio/low_mean": 0.00026572718252282357,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000818784626062552,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2608.0,
      "completions/mean_length": 622.359375,
      "completions/mean_terminated_length": 571.2185668945312,
      "completions/min_length": 157.0,
      "completions/min_terminated_length": 157.0,
      "epoch": 14.466608340624088,
      "grad_norm": 0.11328125,
      "learning_rate": 1e-06,
      "loss": 0.005,
      "num_tokens": 901901089.0,
      "reward": 0.5245535969734192,
      "reward_std": 0.19384829699993134,
      "rewards/verify_math_reward/mean": 0.5245535969734192,
      "rewards/verify_math_reward/std": 0.4996756613254547,
      "step": 1548
    },
    {
      "clip_ratio/high_max": 0.0016368790902561159,
      "clip_ratio/high_mean": 0.0005065319853656547,
      "clip_ratio/low_mean": 0.0003591351855902758,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008656671734570409,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3413.0,
      "completions/mean_length": 638.8303833007812,
      "completions/mean_terminated_length": 559.8995361328125,
      "completions/min_length": 106.0,
      "completions/min_terminated_length": 106.0,
      "epoch": 14.47594050743657,
      "grad_norm": 0.130859375,
      "learning_rate": 1e-06,
      "loss": -0.0033,
      "num_tokens": 902472841.0,
      "reward": 0.5479910969734192,
      "reward_std": 0.2291657030582428,
      "rewards/verify_math_reward/mean": 0.5479910969734192,
      "rewards/verify_math_reward/std": 0.49796950817108154,
      "step": 1549
    },
    {
      "clip_ratio/high_max": 0.0016680218705005245,
      "clip_ratio/high_mean": 0.0004951860539677,
      "clip_ratio/low_mean": 0.00037240606957311684,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008675921235408168,
      "completions/clipped_ratio": 0.021205357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4031.0,
      "completions/mean_length": 632.2388916015625,
      "completions/mean_terminated_length": 557.197265625,
      "completions/min_length": 22.0,
      "completions/min_terminated_length": 22.0,
      "epoch": 14.485272674249051,
      "grad_norm": 0.12451171875,
      "learning_rate": 1e-06,
      "loss": 0.0002,
      "num_tokens": 903060583.0,
      "reward": 0.53125,
      "reward_std": 0.21188825368881226,
      "rewards/verify_math_reward/mean": 0.53125,
      "rewards/verify_math_reward/std": 0.4993011951446533,
      "step": 1550
    },
    {
      "clip_ratio/high_max": 0.0013455009857352707,
      "clip_ratio/high_mean": 0.00035143964259987115,
      "clip_ratio/low_mean": 0.0003274626013762827,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006789022273778755,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3216.0,
      "completions/mean_length": 586.0078125,
      "completions/mean_terminated_length": 550.3934326171875,
      "completions/min_length": 4.0,
      "completions/min_terminated_length": 4.0,
      "epoch": 14.494604841061534,
      "grad_norm": 0.125,
      "learning_rate": 1e-06,
      "loss": 0.009,
      "num_tokens": 903633926.0,
      "reward": 0.5915178656578064,
      "reward_std": 0.17983242869377136,
      "rewards/verify_math_reward/mean": 0.5915178656578064,
      "rewards/verify_math_reward/std": 0.49182769656181335,
      "step": 1551
    },
    {
      "clip_ratio/high_max": 0.0012904408258691547,
      "clip_ratio/high_mean": 0.00040748293201886554,
      "clip_ratio/low_mean": 0.00039662205495005765,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008041049754865526,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2931.0,
      "completions/mean_length": 601.3795166015625,
      "completions/mean_terminated_length": 557.9435424804688,
      "completions/min_length": 125.0,
      "completions/min_terminated_length": 125.0,
      "epoch": 14.503937007874015,
      "grad_norm": 0.1181640625,
      "learning_rate": 1e-06,
      "loss": 0.0113,
      "num_tokens": 904221722.0,
      "reward": 0.5412946939468384,
      "reward_std": 0.19283728301525116,
      "rewards/verify_math_reward/mean": 0.5412946343421936,
      "rewards/verify_math_reward/std": 0.49857014417648315,
      "step": 1552
    },
    {
      "clip_ratio/high_max": 0.0016895394037419464,
      "clip_ratio/high_mean": 0.0005341190271792584,
      "clip_ratio/low_mean": 0.0002519272152312624,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007860462460484996,
      "completions/clipped_ratio": 0.0200892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2706.0,
      "completions/mean_length": 631.677490234375,
      "completions/mean_terminated_length": 560.6549072265625,
      "completions/min_length": 105.0,
      "completions/min_terminated_length": 105.0,
      "epoch": 14.513269174686497,
      "grad_norm": 0.1201171875,
      "learning_rate": 1e-06,
      "loss": -0.0067,
      "num_tokens": 904802641.0,
      "reward": 0.5814732313156128,
      "reward_std": 0.1949409544467926,
      "rewards/verify_math_reward/mean": 0.5814732313156128,
      "rewards/verify_math_reward/std": 0.4935929775238037,
      "step": 1553
    },
    {
      "clip_ratio/high_max": 0.0016963273910732823,
      "clip_ratio/high_mean": 0.0005249796301995957,
      "clip_ratio/low_mean": 0.000345862801509611,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008708424311407725,
      "completions/clipped_ratio": 0.021205357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2056.0,
      "completions/mean_length": 637.279052734375,
      "completions/mean_terminated_length": 562.3466186523438,
      "completions/min_length": 60.0,
      "completions/min_terminated_length": 60.0,
      "epoch": 14.52260134149898,
      "grad_norm": 0.1357421875,
      "learning_rate": 1e-06,
      "loss": -0.0071,
      "num_tokens": 905386283.0,
      "reward": 0.5290178656578064,
      "reward_std": 0.22007529437541962,
      "rewards/verify_math_reward/mean": 0.5290178656578064,
      "rewards/verify_math_reward/std": 0.49943602085113525,
      "step": 1554
    },
    {
      "clip_ratio/high_max": 0.0011884248078786186,
      "clip_ratio/high_mean": 0.00032524324228688783,
      "clip_ratio/low_mean": 0.0002867367578573976,
      "clip_ratio/low_min": 1.1015156815119553e-05,
      "clip_ratio/region_mean": 0.0006119799986663566,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2368.0,
      "completions/mean_length": 638.232177734375,
      "completions/mean_terminated_length": 591.2941284179688,
      "completions/min_length": 121.0,
      "completions/min_terminated_length": 121.0,
      "epoch": 14.531933508311461,
      "grad_norm": 0.11083984375,
      "learning_rate": 1e-06,
      "loss": 0.0141,
      "num_tokens": 906007515.0,
      "reward": 0.5558035969734192,
      "reward_std": 0.17101122438907623,
      "rewards/verify_math_reward/mean": 0.5558035969734192,
      "rewards/verify_math_reward/std": 0.49715372920036316,
      "step": 1555
    },
    {
      "clip_ratio/high_max": 0.0018121606326531037,
      "clip_ratio/high_mean": 0.0005071863884040795,
      "clip_ratio/low_mean": 0.00033820467319856107,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008453910540993093,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3292.0,
      "completions/mean_length": 659.630615234375,
      "completions/mean_terminated_length": 597.151123046875,
      "completions/min_length": 102.0,
      "completions/min_terminated_length": 102.0,
      "epoch": 14.541265675123944,
      "grad_norm": 0.12109375,
      "learning_rate": 1e-06,
      "loss": 0.0269,
      "num_tokens": 906628352.0,
      "reward": 0.5212053656578064,
      "reward_std": 0.20688749849796295,
      "rewards/verify_math_reward/mean": 0.5212053656578064,
      "rewards/verify_math_reward/std": 0.49982914328575134,
      "step": 1556
    },
    {
      "clip_ratio/high_max": 0.001812286400308949,
      "clip_ratio/high_mean": 0.0005917690667729403,
      "clip_ratio/low_mean": 0.00045150386313252966,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010432729268359253,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2107.0,
      "completions/mean_length": 606.5346069335938,
      "completions/mean_terminated_length": 539.0477905273438,
      "completions/min_length": 94.0,
      "completions/min_terminated_length": 94.0,
      "epoch": 14.550597841936424,
      "grad_norm": 0.1552734375,
      "learning_rate": 1e-06,
      "loss": 0.0162,
      "num_tokens": 907188623.0,
      "reward": 0.5714285969734192,
      "reward_std": 0.25025638937950134,
      "rewards/verify_math_reward/mean": 0.5714285969734192,
      "rewards/verify_math_reward/std": 0.49514803290367126,
      "step": 1557
    },
    {
      "clip_ratio/high_max": 0.0014588254935006262,
      "clip_ratio/high_mean": 0.00040958315935313294,
      "clip_ratio/low_mean": 0.00036816072054079996,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007777438922857982,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3046.0,
      "completions/mean_length": 562.5580444335938,
      "completions/mean_terminated_length": 514.5927734375,
      "completions/min_length": 107.0,
      "completions/min_terminated_length": 107.0,
      "epoch": 14.559930008748907,
      "grad_norm": 0.12353515625,
      "learning_rate": 1e-06,
      "loss": 0.0026,
      "num_tokens": 907722931.0,
      "reward": 0.6171875,
      "reward_std": 0.18882180750370026,
      "rewards/verify_math_reward/mean": 0.6171875,
      "rewards/verify_math_reward/std": 0.4863446056842804,
      "step": 1558
    },
    {
      "clip_ratio/high_max": 0.0015287041123883682,
      "clip_ratio/high_mean": 0.00048510482167785085,
      "clip_ratio/low_mean": 0.00028622887361962057,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007713337035966106,
      "completions/clipped_ratio": 0.021205357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3203.0,
      "completions/mean_length": 600.9955444335938,
      "completions/mean_terminated_length": 525.2770385742188,
      "completions/min_length": 29.0,
      "completions/min_terminated_length": 29.0,
      "epoch": 14.569262175561388,
      "grad_norm": 0.1318359375,
      "learning_rate": 1e-06,
      "loss": 0.0093,
      "num_tokens": 908273959.0,
      "reward": 0.5569196939468384,
      "reward_std": 0.19741688668727875,
      "rewards/verify_math_reward/mean": 0.5569196343421936,
      "rewards/verify_math_reward/std": 0.49702703952789307,
      "step": 1559
    },
    {
      "clip_ratio/high_max": 0.001763489262884832,
      "clip_ratio/high_mean": 0.0005315247715316218,
      "clip_ratio/low_mean": 0.00033591098451779544,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008674357559357304,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3159.0,
      "completions/mean_length": 652.1194458007812,
      "completions/mean_terminated_length": 605.3699340820312,
      "completions/min_length": 42.0,
      "completions/min_terminated_length": 42.0,
      "epoch": 14.57859434237387,
      "grad_norm": 0.11865234375,
      "learning_rate": 1e-06,
      "loss": 0.0077,
      "num_tokens": 908897274.0,
      "reward": 0.5457589626312256,
      "reward_std": 0.22405827045440674,
      "rewards/verify_math_reward/mean": 0.5457589030265808,
      "rewards/verify_math_reward/std": 0.4981797933578491,
      "step": 1560
    },
    {
      "clip_ratio/high_max": 0.0016889206062842277,
      "clip_ratio/high_mean": 0.0005480726462110397,
      "clip_ratio/low_mean": 0.0002917422873451869,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000839814947994455,
      "completions/clipped_ratio": 0.024553571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3376.0,
      "completions/mean_length": 605.4933471679688,
      "completions/mean_terminated_length": 517.6315307617188,
      "completions/min_length": 144.0,
      "completions/min_terminated_length": 144.0,
      "epoch": 14.587926509186351,
      "grad_norm": 0.125,
      "learning_rate": 1e-06,
      "loss": -0.0163,
      "num_tokens": 909437004.0,
      "reward": 0.5837053656578064,
      "reward_std": 0.1840072125196457,
      "rewards/verify_math_reward/mean": 0.5837053656578064,
      "rewards/verify_math_reward/std": 0.49321895837783813,
      "step": 1561
    },
    {
      "clip_ratio/high_max": 0.0015501676207350101,
      "clip_ratio/high_mean": 0.00047069335960259195,
      "clip_ratio/low_mean": 0.00039319797213011043,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008638913323011366,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2071.0,
      "completions/mean_length": 582.390625,
      "completions/mean_terminated_length": 534.694580078125,
      "completions/min_length": 100.0,
      "completions/min_terminated_length": 100.0,
      "epoch": 14.597258675998834,
      "grad_norm": 0.13671875,
      "learning_rate": 1e-06,
      "loss": 0.0042,
      "num_tokens": 910003650.0,
      "reward": 0.5457589626312256,
      "reward_std": 0.21857966482639313,
      "rewards/verify_math_reward/mean": 0.5457589030265808,
      "rewards/verify_math_reward/std": 0.4981797933578491,
      "step": 1562
    },
    {
      "clip_ratio/high_max": 0.0018689792268560268,
      "clip_ratio/high_mean": 0.0005871383873454761,
      "clip_ratio/low_mean": 0.0003853880889437278,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009725264917506138,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2656.0,
      "completions/mean_length": 592.1395263671875,
      "completions/mean_terminated_length": 540.5537719726562,
      "completions/min_length": 121.0,
      "completions/min_terminated_length": 121.0,
      "epoch": 14.606590842811315,
      "grad_norm": 0.1337890625,
      "learning_rate": 1e-06,
      "loss": -0.0047,
      "num_tokens": 910566279.0,
      "reward": 0.5658482313156128,
      "reward_std": 0.23889870941638947,
      "rewards/verify_math_reward/mean": 0.5658482313156128,
      "rewards/verify_math_reward/std": 0.49592188000679016,
      "step": 1563
    },
    {
      "clip_ratio/high_max": 0.0018714175803324906,
      "clip_ratio/high_mean": 0.0006118261198935215,
      "clip_ratio/low_mean": 0.00037246720228267804,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000984293320470897,
      "completions/clipped_ratio": 0.0200892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3159.0,
      "completions/mean_length": 636.6484375,
      "completions/mean_terminated_length": 565.7278442382812,
      "completions/min_length": 176.0,
      "completions/min_terminated_length": 176.0,
      "epoch": 14.615923009623797,
      "grad_norm": 0.1357421875,
      "learning_rate": 1e-06,
      "loss": 0.0031,
      "num_tokens": 911152444.0,
      "reward": 0.5758928656578064,
      "reward_std": 0.23022131621837616,
      "rewards/verify_math_reward/mean": 0.5758928656578064,
      "rewards/verify_math_reward/std": 0.49448272585868835,
      "step": 1564
    },
    {
      "clip_ratio/high_max": 0.0018047129633487202,
      "clip_ratio/high_mean": 0.0005651459600812814,
      "clip_ratio/low_mean": 0.000288212067061977,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008533580221410375,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3039.0,
      "completions/mean_length": 593.2053833007812,
      "completions/mean_terminated_length": 545.6561279296875,
      "completions/min_length": 77.0,
      "completions/min_terminated_length": 77.0,
      "epoch": 14.625255176436278,
      "grad_norm": 0.126953125,
      "learning_rate": 1e-06,
      "loss": 0.0007,
      "num_tokens": 911723364.0,
      "reward": 0.5613839626312256,
      "reward_std": 0.20665885508060455,
      "rewards/verify_math_reward/mean": 0.5613839030265808,
      "rewards/verify_math_reward/std": 0.496494859457016,
      "step": 1565
    },
    {
      "clip_ratio/high_max": 0.0013756183079749462,
      "clip_ratio/high_mean": 0.00042291028353247384,
      "clip_ratio/low_mean": 0.00026492827737456537,
      "clip_ratio/low_min": 1.036484263750026e-05,
      "clip_ratio/region_mean": 0.000687838563862897,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3333.0,
      "completions/mean_length": 600.4877319335938,
      "completions/mean_terminated_length": 557.0407104492188,
      "completions/min_length": 80.0,
      "completions/min_terminated_length": 80.0,
      "epoch": 14.63458734324876,
      "grad_norm": 0.1259765625,
      "learning_rate": 1e-06,
      "loss": 0.0058,
      "num_tokens": 912301193.0,
      "reward": 0.5703125,
      "reward_std": 0.200128972530365,
      "rewards/verify_math_reward/mean": 0.5703125,
      "rewards/verify_math_reward/std": 0.49530795216560364,
      "step": 1566
    },
    {
      "clip_ratio/high_max": 0.001985565988434246,
      "clip_ratio/high_mean": 0.0006315414407254138,
      "clip_ratio/low_mean": 0.00035548601363188936,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009870274561762926,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2446.0,
      "completions/mean_length": 560.1663208007812,
      "completions/mean_terminated_length": 512.1685791015625,
      "completions/min_length": 165.0,
      "completions/min_terminated_length": 165.0,
      "epoch": 14.643919510061242,
      "grad_norm": 0.1396484375,
      "learning_rate": 1e-06,
      "loss": 0.0005,
      "num_tokens": 912850022.0,
      "reward": 0.5602678656578064,
      "reward_std": 0.24596740305423737,
      "rewards/verify_math_reward/mean": 0.5602678656578064,
      "rewards/verify_math_reward/std": 0.4966317415237427,
      "step": 1567
    },
    {
      "clip_ratio/high_max": 0.0019799693473032676,
      "clip_ratio/high_mean": 0.0005882592213311,
      "clip_ratio/low_mean": 0.0004541148209682433,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010423740386613645,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3979.0,
      "completions/mean_length": 632.4910888671875,
      "completions/mean_terminated_length": 589.4418334960938,
      "completions/min_length": 102.0,
      "completions/min_terminated_length": 102.0,
      "epoch": 14.653251676873724,
      "grad_norm": 0.1396484375,
      "learning_rate": 1e-06,
      "loss": 0.0085,
      "num_tokens": 913454286.0,
      "reward": 0.5569196939468384,
      "reward_std": 0.23180390894412994,
      "rewards/verify_math_reward/mean": 0.5569196343421936,
      "rewards/verify_math_reward/std": 0.4970270097255707,
      "step": 1568
    },
    {
      "clip_ratio/high_max": 0.001248057238626643,
      "clip_ratio/high_mean": 0.0003339070048014037,
      "clip_ratio/low_mean": 0.00030964711925207666,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006435541272367118,
      "completions/clipped_ratio": 0.021205357142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4092.0,
      "completions/mean_length": 661.3092041015625,
      "completions/mean_terminated_length": 586.8973388671875,
      "completions/min_length": 90.0,
      "completions/min_terminated_length": 90.0,
      "epoch": 14.662583843686207,
      "grad_norm": 0.11328125,
      "learning_rate": 1e-06,
      "loss": -0.0097,
      "num_tokens": 914069971.0,
      "reward": 0.5234375,
      "reward_std": 0.19058139622211456,
      "rewards/verify_math_reward/mean": 0.5234375,
      "rewards/verify_math_reward/std": 0.49972933530807495,
      "step": 1569
    },
    {
      "clip_ratio/high_max": 0.0012808902492906782,
      "clip_ratio/high_mean": 0.0003640243446625391,
      "clip_ratio/low_mean": 0.0003344646167988685,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006984889596424182,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3824.0,
      "completions/mean_length": 608.2756958007812,
      "completions/mean_terminated_length": 568.9108276367188,
      "completions/min_length": 78.0,
      "completions/min_terminated_length": 78.0,
      "epoch": 14.671916010498688,
      "grad_norm": 0.115234375,
      "learning_rate": 1e-06,
      "loss": 0.0115,
      "num_tokens": 914661106.0,
      "reward": 0.5457589626312256,
      "reward_std": 0.18160048127174377,
      "rewards/verify_math_reward/mean": 0.5457589030265808,
      "rewards/verify_math_reward/std": 0.4981797933578491,
      "step": 1570
    },
    {
      "clip_ratio/high_max": 0.0016296030889861868,
      "clip_ratio/high_mean": 0.0005215273788508057,
      "clip_ratio/low_mean": 0.00042272938185305975,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000944256765251339,
      "completions/clipped_ratio": 0.0200892857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4077.0,
      "completions/mean_length": 631.9330444335938,
      "completions/mean_terminated_length": 560.9157104492188,
      "completions/min_length": 114.0,
      "completions/min_terminated_length": 114.0,
      "epoch": 14.68124817731117,
      "grad_norm": 0.1337890625,
      "learning_rate": 1e-06,
      "loss": 0.0078,
      "num_tokens": 915245878.0,
      "reward": 0.5301339626312256,
      "reward_std": 0.2310122549533844,
      "rewards/verify_math_reward/mean": 0.5301339030265808,
      "rewards/verify_math_reward/std": 0.49936988949775696,
      "step": 1571
    },
    {
      "clip_ratio/high_max": 0.0017332785919279559,
      "clip_ratio/high_mean": 0.0004814774433725688,
      "clip_ratio/low_mean": 0.00019448248360731668,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006759599291399354,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3823.0,
      "completions/mean_length": 568.4498291015625,
      "completions/mean_terminated_length": 532.6572265625,
      "completions/min_length": 95.0,
      "completions/min_terminated_length": 95.0,
      "epoch": 14.690580344123651,
      "grad_norm": 0.11376953125,
      "learning_rate": 1e-06,
      "loss": -0.0134,
      "num_tokens": 915815841.0,
      "reward": 0.5814732313156128,
      "reward_std": 0.1716756522655487,
      "rewards/verify_math_reward/mean": 0.5814732313156128,
      "rewards/verify_math_reward/std": 0.4935929775238037,
      "step": 1572
    },
    {
      "clip_ratio/high_max": 0.001720041304906772,
      "clip_ratio/high_mean": 0.00047953514967957744,
      "clip_ratio/low_mean": 0.0004103460438500406,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000889881195689668,
      "completions/clipped_ratio": 0.0189732142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3873.0,
      "completions/mean_length": 698.4129638671875,
      "completions/mean_terminated_length": 632.7030639648438,
      "completions/min_length": 84.0,
      "completions/min_terminated_length": 84.0,
      "epoch": 14.699912510936134,
      "grad_norm": 0.130859375,
      "learning_rate": 1e-06,
      "loss": 0.0,
      "num_tokens": 916463227.0,
      "reward": 0.4531250298023224,
      "reward_std": 0.22560739517211914,
      "rewards/verify_math_reward/mean": 0.453125,
      "rewards/verify_math_reward/std": 0.4980759024620056,
      "step": 1573
    },
    {
      "clip_ratio/high_max": 0.0018501778213249054,
      "clip_ratio/high_mean": 0.0005637085314447177,
      "clip_ratio/low_mean": 0.0003098486677117762,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008735572077966935,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3247.0,
      "completions/mean_length": 580.5435791015625,
      "completions/mean_terminated_length": 532.8224487304688,
      "completions/min_length": 78.0,
      "completions/min_terminated_length": 78.0,
      "epoch": 14.709244677748615,
      "grad_norm": 0.1259765625,
      "learning_rate": 1e-06,
      "loss": 0.0093,
      "num_tokens": 917021978.0,
      "reward": 0.5814732313156128,
      "reward_std": 0.19512639939785004,
      "rewards/verify_math_reward/mean": 0.5814732313156128,
      "rewards/verify_math_reward/std": 0.4935929775238037,
      "step": 1574
    },
    {
      "clip_ratio/high_max": 0.0021636232231685426,
      "clip_ratio/high_mean": 0.0006965985867282143,
      "clip_ratio/low_mean": 0.0003783344146768286,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0010749330049293349,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3958.0,
      "completions/mean_length": 599.5178833007812,
      "completions/mean_terminated_length": 548.040771484375,
      "completions/min_length": 11.0,
      "completions/min_terminated_length": 11.0,
      "epoch": 14.718576844561097,
      "grad_norm": 0.138671875,
      "learning_rate": 1e-06,
      "loss": -0.0009,
      "num_tokens": 917587474.0,
      "reward": 0.5613839626312256,
      "reward_std": 0.24277111887931824,
      "rewards/verify_math_reward/mean": 0.5613839030265808,
      "rewards/verify_math_reward/std": 0.496494859457016,
      "step": 1575
    },
    {
      "clip_ratio/high_max": 0.0016982999331958126,
      "clip_ratio/high_mean": 0.0004861666807300935,
      "clip_ratio/low_mean": 0.00034483374474802986,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008310004268423654,
      "completions/clipped_ratio": 0.0078125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2598.0,
      "completions/mean_length": 595.630615234375,
      "completions/mean_terminated_length": 568.0686645507812,
      "completions/min_length": 91.0,
      "completions/min_terminated_length": 91.0,
      "epoch": 14.727909011373578,
      "grad_norm": 0.125,
      "learning_rate": 1e-06,
      "loss": 0.0065,
      "num_tokens": 918199175.0,
      "reward": 0.5301339626312256,
      "reward_std": 0.21676772832870483,
      "rewards/verify_math_reward/mean": 0.5301339030265808,
      "rewards/verify_math_reward/std": 0.49936985969543457,
      "step": 1576
    },
    {
      "clip_ratio/high_max": 0.0014718273487233091,
      "clip_ratio/high_mean": 0.00039991727658161835,
      "clip_ratio/low_mean": 0.0002965557399647878,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006964730146137299,
      "completions/clipped_ratio": 0.012276785714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3368.0,
      "completions/mean_length": 588.2924194335938,
      "completions/mean_terminated_length": 544.6937866210938,
      "completions/min_length": 125.0,
      "completions/min_terminated_length": 125.0,
      "epoch": 14.73724117818606,
      "grad_norm": 0.11474609375,
      "learning_rate": 1e-06,
      "loss": 0.0169,
      "num_tokens": 918780557.0,
      "reward": 0.6037946939468384,
      "reward_std": 0.20165739953517914,
      "rewards/verify_math_reward/mean": 0.6037946343421936,
      "rewards/verify_math_reward/std": 0.48938122391700745,
      "step": 1577
    },
    {
      "clip_ratio/high_max": 0.0016819774118630448,
      "clip_ratio/high_mean": 0.0004913263937851298,
      "clip_ratio/low_mean": 0.00047859606638667174,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000969922464719275,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3364.0,
      "completions/mean_length": 574.7433471679688,
      "completions/mean_terminated_length": 539.0146484375,
      "completions/min_length": 4.0,
      "completions/min_terminated_length": 4.0,
      "epoch": 14.746573344998541,
      "grad_norm": 0.1494140625,
      "learning_rate": 1e-06,
      "loss": 0.0145,
      "num_tokens": 919339215.0,
      "reward": 0.5491071939468384,
      "reward_std": 0.25359535217285156,
      "rewards/verify_math_reward/mean": 0.5491071343421936,
      "rewards/verify_math_reward/std": 0.49786055088043213,
      "step": 1578
    },
    {
      "clip_ratio/high_max": 0.001475791541452054,
      "clip_ratio/high_mean": 0.00045782445386066684,
      "clip_ratio/low_mean": 0.0004203136682008335,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008781381284279632,
      "completions/clipped_ratio": 0.010044642857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3699.0,
      "completions/mean_length": 575.7545166015625,
      "completions/mean_terminated_length": 540.0360717773438,
      "completions/min_length": 129.0,
      "completions/min_terminated_length": 129.0,
      "epoch": 14.755905511811024,
      "grad_norm": 0.1298828125,
      "learning_rate": 1e-06,
      "loss": -0.007,
      "num_tokens": 919913315.0,
      "reward": 0.582589328289032,
      "reward_std": 0.2067016214132309,
      "rewards/verify_math_reward/mean": 0.5825892686843872,
      "rewards/verify_math_reward/std": 0.4934072494506836,
      "step": 1579
    },
    {
      "clip_ratio/high_max": 0.0016975590842776,
      "clip_ratio/high_mean": 0.0005550756031880155,
      "clip_ratio/low_mean": 0.00040159013474294625,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009566657299728831,
      "completions/clipped_ratio": 0.011160714285714302,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2218.0,
      "completions/mean_length": 569.7277221679688,
      "completions/mean_terminated_length": 529.9277954101562,
      "completions/min_length": 159.0,
      "completions/min_terminated_length": 159.0,
      "epoch": 14.765237678623505,
      "grad_norm": 0.142578125,
      "learning_rate": 1e-06,
      "loss": -0.006,
      "num_tokens": 920473623.0,
      "reward": 0.6026785969734192,
      "reward_std": 0.23367930948734283,
      "rewards/verify_math_reward/mean": 0.6026785969734192,
      "rewards/verify_math_reward/std": 0.48961687088012695,
      "step": 1580
    },
    {
      "clip_ratio/high_max": 0.0016341464697688934,
      "clip_ratio/high_mean": 0.0004837096932988061,
      "clip_ratio/low_mean": 0.0003861727238927415,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008698824212842737,
      "completions/clipped_ratio": 0.030133928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3783.0,
      "completions/mean_length": 707.7689819335938,
      "completions/mean_terminated_length": 602.4959716796875,
      "completions/min_length": 126.0,
      "completions/min_terminated_length": 126.0,
      "epoch": 14.774569845435988,
      "grad_norm": 0.12060546875,
      "learning_rate": 1e-06,
      "loss": 0.0023,
      "num_tokens": 921098728.0,
      "reward": 0.4542410969734192,
      "reward_std": 0.21353641152381897,
      "rewards/verify_math_reward/mean": 0.4542410671710968,
      "rewards/verify_math_reward/std": 0.4981798231601715,
      "step": 1581
    },
    {
      "clip_ratio/high_max": 0.001602337195436121,
      "clip_ratio/high_mean": 0.0004422149468155112,
      "clip_ratio/low_mean": 0.0003250001595915819,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007672151150472928,
      "completions/clipped_ratio": 0.006696428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2832.0,
      "completions/mean_length": 585.0201416015625,
      "completions/mean_terminated_length": 561.3505859375,
      "completions/min_length": 135.0,
      "completions/min_terminated_length": 135.0,
      "epoch": 14.783902012248468,
      "grad_norm": 0.12109375,
      "learning_rate": 1e-06,
      "loss": -0.0034,
      "num_tokens": 921690698.0,
      "reward": 0.5524553656578064,
      "reward_std": 0.173563152551651,
      "rewards/verify_math_reward/mean": 0.5524553656578064,
      "rewards/verify_math_reward/std": 0.49751853942871094,
      "step": 1582
    },
    {
      "clip_ratio/high_max": 0.001917310130011174,
      "clip_ratio/high_mean": 0.0005945376769886934,
      "clip_ratio/low_mean": 0.0003221652618776716,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009167029320451547,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3482.0,
      "completions/mean_length": 585.6361694335938,
      "completions/mean_terminated_length": 525.8683471679688,
      "completions/min_length": 11.0,
      "completions/min_terminated_length": 11.0,
      "epoch": 14.793234179060951,
      "grad_norm": 0.1376953125,
      "learning_rate": 1e-06,
      "loss": -0.0039,
      "num_tokens": 922238828.0,
      "reward": 0.598214328289032,
      "reward_std": 0.20861980319023132,
      "rewards/verify_math_reward/mean": 0.5982142686843872,
      "rewards/verify_math_reward/std": 0.49053290486335754,
      "step": 1583
    },
    {
      "clip_ratio/high_max": 0.0015323374082072405,
      "clip_ratio/high_mean": 0.00041175250066771696,
      "clip_ratio/low_mean": 0.0003433801232404221,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007551326198154129,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2749.0,
      "completions/mean_length": 606.1138916015625,
      "completions/mean_terminated_length": 546.6947021484375,
      "completions/min_length": 143.0,
      "completions/min_terminated_length": 143.0,
      "epoch": 14.802566345873432,
      "grad_norm": 0.12109375,
      "learning_rate": 1e-06,
      "loss": -0.0005,
      "num_tokens": 922806450.0,
      "reward": 0.5368303656578064,
      "reward_std": 0.18799060583114624,
      "rewards/verify_math_reward/mean": 0.5368303656578064,
      "rewards/verify_math_reward/std": 0.49892017245292664,
      "step": 1584
    },
    {
      "clip_ratio/high_max": 0.0016291528081637807,
      "clip_ratio/high_mean": 0.0005152303215254506,
      "clip_ratio/low_mean": 0.00036970703752103873,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008849373716657283,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3813.0,
      "completions/mean_length": 666.3772583007812,
      "completions/mean_terminated_length": 604.0204467773438,
      "completions/min_length": 90.0,
      "completions/min_terminated_length": 90.0,
      "epoch": 14.811898512685914,
      "grad_norm": 0.130859375,
      "learning_rate": 1e-06,
      "loss": 0.0023,
      "num_tokens": 923433980.0,
      "reward": 0.5368303656578064,
      "reward_std": 0.22361180186271667,
      "rewards/verify_math_reward/mean": 0.5368303656578064,
      "rewards/verify_math_reward/std": 0.49892017245292664,
      "step": 1585
    },
    {
      "clip_ratio/high_max": 0.0017048433710442623,
      "clip_ratio/high_mean": 0.00052554421813511,
      "clip_ratio/low_mean": 0.0002599278892603252,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.000785472115239827,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3703.0,
      "completions/mean_length": 619.6484375,
      "completions/mean_terminated_length": 564.46826171875,
      "completions/min_length": 141.0,
      "completions/min_terminated_length": 141.0,
      "epoch": 14.821230679498395,
      "grad_norm": 0.1240234375,
      "learning_rate": 1e-06,
      "loss": 0.0162,
      "num_tokens": 924024497.0,
      "reward": 0.5758928656578064,
      "reward_std": 0.21181045472621918,
      "rewards/verify_math_reward/mean": 0.5758928656578064,
      "rewards/verify_math_reward/std": 0.49448272585868835,
      "step": 1586
    },
    {
      "clip_ratio/high_max": 0.0014261988408179604,
      "clip_ratio/high_mean": 0.0004011380869997083,
      "clip_ratio/low_mean": 0.000325291187550647,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007264292767104052,
      "completions/clipped_ratio": 0.016741071428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3336.0,
      "completions/mean_length": 670.779052734375,
      "completions/mean_terminated_length": 612.4608764648438,
      "completions/min_length": 109.0,
      "completions/min_terminated_length": 109.0,
      "epoch": 14.830562846310878,
      "grad_norm": 0.111328125,
      "learning_rate": 1e-06,
      "loss": 0.002,
      "num_tokens": 924649251.0,
      "reward": 0.4854910969734192,
      "reward_std": 0.18457356095314026,
      "rewards/verify_math_reward/mean": 0.4854910671710968,
      "rewards/verify_math_reward/std": 0.5000686049461365,
      "step": 1587
    },
    {
      "clip_ratio/high_max": 0.0016468872136101709,
      "clip_ratio/high_mean": 0.0005263573989395809,
      "clip_ratio/low_mean": 0.0003818326463260746,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009081900470846449,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3947.0,
      "completions/mean_length": 569.9096069335938,
      "completions/mean_terminated_length": 522.0441284179688,
      "completions/min_length": 140.0,
      "completions/min_terminated_length": 140.0,
      "epoch": 14.83989501312336,
      "grad_norm": 0.1298828125,
      "learning_rate": 1e-06,
      "loss": 0.0079,
      "num_tokens": 925203282.0,
      "reward": 0.6071428656578064,
      "reward_std": 0.20208247005939484,
      "rewards/verify_math_reward/mean": 0.6071428656578064,
      "rewards/verify_math_reward/std": 0.48865827918052673,
      "step": 1588
    },
    {
      "clip_ratio/high_max": 0.0017774242696759757,
      "clip_ratio/high_mean": 0.000528467898220697,
      "clip_ratio/low_mean": 0.00023485599547257152,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007633238951711974,
      "completions/clipped_ratio": 0.0267857142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3914.0,
      "completions/mean_length": 690.646240234375,
      "completions/mean_terminated_length": 596.9208374023438,
      "completions/min_length": 3.0,
      "completions/min_terminated_length": 3.0,
      "epoch": 14.849227179935841,
      "grad_norm": 0.125,
      "learning_rate": 1e-06,
      "loss": 0.0063,
      "num_tokens": 925812605.0,
      "reward": 0.5256696939468384,
      "reward_std": 0.18021291494369507,
      "rewards/verify_math_reward/mean": 0.5256696343421936,
      "rewards/verify_math_reward/std": 0.4996195137500763,
      "step": 1589
    },
    {
      "clip_ratio/high_max": 0.001768240523233544,
      "clip_ratio/high_mean": 0.0004815827736592837,
      "clip_ratio/low_mean": 0.0003349495824522819,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008165323512230316,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2205.0,
      "completions/mean_length": 627.7745971679688,
      "completions/mean_terminated_length": 580.694580078125,
      "completions/min_length": 107.0,
      "completions/min_terminated_length": 107.0,
      "epoch": 14.858559346748324,
      "grad_norm": 0.1181640625,
      "learning_rate": 1e-06,
      "loss": -0.004,
      "num_tokens": 926418723.0,
      "reward": 0.5234375,
      "reward_std": 0.19558288156986237,
      "rewards/verify_math_reward/mean": 0.5234375,
      "rewards/verify_math_reward/std": 0.49972933530807495,
      "step": 1590
    },
    {
      "clip_ratio/high_max": 0.0016090367535070982,
      "clip_ratio/high_mean": 0.0004916613288514782,
      "clip_ratio/low_mean": 0.00034506639576648013,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008367277287106845,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2581.0,
      "completions/mean_length": 606.8917846679688,
      "completions/mean_terminated_length": 555.523193359375,
      "completions/min_length": 26.0,
      "completions/min_terminated_length": 26.0,
      "epoch": 14.867891513560805,
      "grad_norm": 0.134765625,
      "learning_rate": 1e-06,
      "loss": 0.0171,
      "num_tokens": 926997466.0,
      "reward": 0.5691964626312256,
      "reward_std": 0.22180058062076569,
      "rewards/verify_math_reward/mean": 0.5691964030265808,
      "rewards/verify_math_reward/std": 0.4954652488231659,
      "step": 1591
    },
    {
      "clip_ratio/high_max": 0.001990718868910335,
      "clip_ratio/high_mean": 0.0005809984504594468,
      "clip_ratio/low_mean": 0.0003284147120439229,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009094131546589779,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3798.0,
      "completions/mean_length": 558.453125,
      "completions/mean_terminated_length": 510.4321594238281,
      "completions/min_length": 94.0,
      "completions/min_terminated_length": 94.0,
      "epoch": 14.877223680373287,
      "grad_norm": 0.150390625,
      "learning_rate": 1e-06,
      "loss": 0.0004,
      "num_tokens": 927536520.0,
      "reward": 0.6049107313156128,
      "reward_std": 0.23217158019542694,
      "rewards/verify_math_reward/mean": 0.6049107313156128,
      "rewards/verify_math_reward/std": 0.48914292454719543,
      "step": 1592
    },
    {
      "clip_ratio/high_max": 0.0012845343526350916,
      "clip_ratio/high_mean": 0.00038729559696548677,
      "clip_ratio/low_mean": 0.00037624197398145043,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007635375782228948,
      "completions/clipped_ratio": 0.0279017857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3890.0,
      "completions/mean_length": 677.6506958007812,
      "completions/mean_terminated_length": 579.5350341796875,
      "completions/min_length": 123.0,
      "completions/min_terminated_length": 123.0,
      "epoch": 14.886555847185768,
      "grad_norm": 0.1279296875,
      "learning_rate": 1e-06,
      "loss": 0.0034,
      "num_tokens": 928137527.0,
      "reward": 0.559151828289032,
      "reward_std": 0.19516101479530334,
      "rewards/verify_math_reward/mean": 0.5591517686843872,
      "rewards/verify_math_reward/std": 0.496766060590744,
      "step": 1593
    },
    {
      "clip_ratio/high_max": 0.0015147674203035422,
      "clip_ratio/high_mean": 0.00042596659727678343,
      "clip_ratio/low_mean": 0.00025001416520353814,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006759807629350689,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2427.0,
      "completions/mean_length": 580.3861694335938,
      "completions/mean_terminated_length": 532.6629028320312,
      "completions/min_length": 18.0,
      "completions/min_terminated_length": 18.0,
      "epoch": 14.89588801399825,
      "grad_norm": 0.126953125,
      "learning_rate": 1e-06,
      "loss": -0.0089,
      "num_tokens": 928700681.0,
      "reward": 0.5502232313156128,
      "reward_std": 0.1609041541814804,
      "rewards/verify_math_reward/mean": 0.5502232313156128,
      "rewards/verify_math_reward/std": 0.49774909019470215,
      "step": 1594
    },
    {
      "clip_ratio/high_max": 0.001434171803339268,
      "clip_ratio/high_mean": 0.0004297477705677011,
      "clip_ratio/low_mean": 0.0003625786466727732,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007923264347482473,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4096.0,
      "completions/mean_length": 626.703125,
      "completions/mean_terminated_length": 563.625,
      "completions/min_length": 164.0,
      "completions/min_terminated_length": 164.0,
      "epoch": 14.905220180810732,
      "grad_norm": 0.1240234375,
      "learning_rate": 1e-06,
      "loss": 0.009,
      "num_tokens": 929287303.0,
      "reward": 0.5792410969734192,
      "reward_std": 0.18359535932540894,
      "rewards/verify_math_reward/mean": 0.5792410969734192,
      "rewards/verify_math_reward/std": 0.49395665526390076,
      "step": 1595
    },
    {
      "clip_ratio/high_max": 0.0013942216082796222,
      "clip_ratio/high_mean": 0.0004281837921098486,
      "clip_ratio/low_mean": 0.000468025960117302,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0008962097595031082,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2784.0,
      "completions/mean_length": 619.0692138671875,
      "completions/mean_terminated_length": 571.87109375,
      "completions/min_length": 154.0,
      "completions/min_terminated_length": 154.0,
      "epoch": 14.914552347623214,
      "grad_norm": 0.12109375,
      "learning_rate": 1e-06,
      "loss": 0.0049,
      "num_tokens": 929878533.0,
      "reward": 0.53125,
      "reward_std": 0.20940369367599487,
      "rewards/verify_math_reward/mean": 0.53125,
      "rewards/verify_math_reward/std": 0.4993011951446533,
      "step": 1596
    },
    {
      "clip_ratio/high_max": 0.0016481798802487901,
      "clip_ratio/high_mean": 0.00043252003808902373,
      "clip_ratio/low_mean": 0.00028706260150102025,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007195826401584782,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2290.0,
      "completions/mean_length": 605.9085083007812,
      "completions/mean_terminated_length": 526.2260131835938,
      "completions/min_length": 115.0,
      "completions/min_terminated_length": 115.0,
      "epoch": 14.923884514435695,
      "grad_norm": 0.1220703125,
      "learning_rate": 1e-06,
      "loss": -0.0044,
      "num_tokens": 930421187.0,
      "reward": 0.6104910969734192,
      "reward_std": 0.1880647838115692,
      "rewards/verify_math_reward/mean": 0.6104910969734192,
      "rewards/verify_math_reward/std": 0.48791125416755676,
      "step": 1597
    },
    {
      "clip_ratio/high_max": 0.001700075298685988,
      "clip_ratio/high_mean": 0.00046269676079191413,
      "clip_ratio/low_mean": 0.00032024430538513116,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0007829410697013373,
      "completions/clipped_ratio": 0.0234375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3756.0,
      "completions/mean_length": 653.2199096679688,
      "completions/mean_terminated_length": 570.5931396484375,
      "completions/min_length": 142.0,
      "completions/min_terminated_length": 142.0,
      "epoch": 14.933216681248178,
      "grad_norm": 0.126953125,
      "learning_rate": 1e-06,
      "loss": 0.0014,
      "num_tokens": 931004440.0,
      "reward": 0.546875,
      "reward_std": 0.20069099962711334,
      "rewards/verify_math_reward/mean": 0.546875,
      "rewards/verify_math_reward/std": 0.4980759024620056,
      "step": 1598
    },
    {
      "clip_ratio/high_max": 0.001689458915279829,
      "clip_ratio/high_mean": 0.0005262118459086196,
      "clip_ratio/low_mean": 0.000389939800470529,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0009161516509266221,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4025.0,
      "completions/mean_length": 629.8638916015625,
      "completions/mean_terminated_length": 566.8431396484375,
      "completions/min_length": 142.0,
      "completions/min_terminated_length": 142.0,
      "epoch": 14.942548848060659,
      "grad_norm": 0.13671875,
      "learning_rate": 1e-06,
      "loss": 0.0067,
      "num_tokens": 931595734.0,
      "reward": 0.5680803656578064,
      "reward_std": 0.22202850878238678,
      "rewards/verify_math_reward/mean": 0.5680803656578064,
      "rewards/verify_math_reward/std": 0.4956200420856476,
      "step": 1599
    },
    {
      "clip_ratio/high_max": 0.00129362222742202,
      "clip_ratio/high_mean": 0.0003828839010111551,
      "clip_ratio/low_mean": 0.0002956844238042322,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0006785683294765477,
      "completions/clipped_ratio": 0.014508928571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3837.0,
      "completions/mean_length": 661.578125,
      "completions/mean_terminated_length": 611.0147094726562,
      "completions/min_length": 167.0,
      "completions/min_terminated_length": 167.0,
      "epoch": 14.951881014873141,
      "grad_norm": 0.10693359375,
      "learning_rate": 1e-06,
      "loss": 0.008,
      "num_tokens": 932235660.0,
      "reward": 0.4754464626312256,
      "reward_std": 0.18861931562423706,
      "rewards/verify_math_reward/mean": 0.4754464328289032,
      "rewards/verify_math_reward/std": 0.4996756315231323,
      "step": 1600
    },
    {
      "epoch": 14.951881014873141,
      "step": 1600,
      "total_flos": 0.0,
      "train_loss": 0.002559207767341967,
      "train_runtime": 118820.1312,
      "train_samples_per_second": 12.065,
      "train_steps_per_second": 0.013
    }
  ],
  "logging_steps": 1,
  "max_steps": 1600,
  "num_input_tokens_seen": 932235660,
  "num_train_epochs": 15,
  "save_steps": 80,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 0.0,
  "train_batch_size": 4,
  "trial_name": null,
  "trial_params": null
}