{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.14942629310093392, "eval_steps": 500, "global_step": 1601, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3983.0, "completions/mean_length": 586.052490234375, "completions/mean_terminated_length": 554.4313354492188, "completions/min_length": 11.0, "completions/min_terminated_length": 11.0, "epoch": 9.333310000058333e-05, "grad_norm": 0.14894212782382965, "learning_rate": 2e-07, "loss": 0.0116, "num_tokens": 608015.0, "reward": 0.5267857313156128, "reward_std": 0.26569417119026184, "rewards/simpleverify_reward/mean": 0.5267857313156128, "rewards/simpleverify_reward/std": 0.4995608627796173, "step": 1 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.024553571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3639.0, "completions/mean_length": 658.997802734375, "completions/mean_terminated_length": 572.4827880859375, "completions/min_length": 24.0, "completions/min_terminated_length": 24.0, "epoch": 0.00018666620000116666, "grad_norm": 0.14274385571479797, "learning_rate": 2e-07, "loss": 0.0295, "num_tokens": 1290709.0, "reward": 0.4207589328289032, "reward_std": 0.2861265242099762, "rewards/simpleverify_reward/mean": 0.4207589328289032, "rewards/simpleverify_reward/std": 0.49395662546157837, "step": 2 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2024.0, "completions/mean_length": 620.2221069335938, "completions/mean_terminated_length": 548.9647216796875, "completions/min_length": 72.0, "completions/min_terminated_length": 72.0, "epoch": 0.00027999930000175, "grad_norm": 0.13471642136573792, "learning_rate": 2e-07, "loss": 0.0232, "num_tokens": 1938436.0, "reward": 0.478794664144516, "reward_std": 0.2667461335659027, "rewards/simpleverify_reward/mean": 0.4787946343421936, "rewards/simpleverify_reward/std": 0.49982914328575134, "step": 3 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3007.0, "completions/mean_length": 655.4152221679688, "completions/mean_terminated_length": 608.71044921875, "completions/min_length": 26.0, "completions/min_terminated_length": 26.0, "epoch": 0.0003733324000023333, "grad_norm": 0.14450564980506897, "learning_rate": 2e-07, "loss": 0.0172, "num_tokens": 2615568.0, "reward": 0.4252232313156128, "reward_std": 0.28128981590270996, "rewards/simpleverify_reward/mean": 0.4252232015132904, "rewards/simpleverify_reward/std": 0.4946529269218445, "step": 4 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2475.0, "completions/mean_length": 586.6484375, "completions/mean_terminated_length": 555.0326538085938, "completions/min_length": 20.0, "completions/min_terminated_length": 20.0, "epoch": 0.0004666655000029167, "grad_norm": 0.1305343359708786, "learning_rate": 2e-07, "loss": 0.0043, "num_tokens": 3232661.0, "reward": 0.4933035969734192, "reward_std": 0.2470964640378952, "rewards/simpleverify_reward/mean": 0.4933035671710968, "rewards/simpleverify_reward/std": 0.5002344250679016, "step": 5 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2944.0, "completions/mean_length": 568.4285888671875, "completions/mean_terminated_length": 532.6358032226562, "completions/min_length": 11.0, "completions/min_terminated_length": 11.0, "epoch": 0.0005599986000035, "grad_norm": 0.14486382901668549, "learning_rate": 2e-07, "loss": 0.0042, "num_tokens": 3833605.0, "reward": 0.4441964626312256, "reward_std": 0.24461443722248077, "rewards/simpleverify_reward/mean": 0.4441964328289032, "rewards/simpleverify_reward/std": 0.49715369939804077, "step": 6 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3157.0, "completions/mean_length": 615.0658569335938, "completions/mean_terminated_length": 559.8129272460938, "completions/min_length": 83.0, "completions/min_terminated_length": 83.0, "epoch": 0.0006533317000040833, "grad_norm": 0.15216903388500214, "learning_rate": 2e-07, "loss": 0.0105, "num_tokens": 4477936.0, "reward": 0.504464328289032, "reward_std": 0.2523932456970215, "rewards/simpleverify_reward/mean": 0.5044642686843872, "rewards/simpleverify_reward/std": 0.5002593398094177, "step": 7 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3121.0, "completions/mean_length": 623.427490234375, "completions/mean_terminated_length": 564.3031005859375, "completions/min_length": 71.0, "completions/min_terminated_length": 71.0, "epoch": 0.0007466648000046666, "grad_norm": 0.13206979632377625, "learning_rate": 2e-07, "loss": 0.0313, "num_tokens": 5135447.0, "reward": 0.5089285969734192, "reward_std": 0.2542693614959717, "rewards/simpleverify_reward/mean": 0.5089285969734192, "rewards/simpleverify_reward/std": 0.5001994967460632, "step": 8 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3983.0, "completions/mean_length": 615.3605346679688, "completions/mean_terminated_length": 552.0761108398438, "completions/min_length": 30.0, "completions/min_terminated_length": 30.0, "epoch": 0.00083999790000525, "grad_norm": 0.1403147131204605, "learning_rate": 2e-07, "loss": 0.0427, "num_tokens": 5783058.0, "reward": 0.4754464626312256, "reward_std": 0.2520572245121002, "rewards/simpleverify_reward/mean": 0.4754464328289032, "rewards/simpleverify_reward/std": 0.4996756315231323, "step": 9 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3797.0, "completions/mean_length": 621.6038208007812, "completions/mean_terminated_length": 546.331787109375, "completions/min_length": 92.0, "completions/min_terminated_length": 92.0, "epoch": 0.0009333310000058334, "grad_norm": 0.1676136553287506, "learning_rate": 2e-07, "loss": 0.0065, "num_tokens": 6428103.0, "reward": 0.5033482313156128, "reward_std": 0.25975868105888367, "rewards/simpleverify_reward/mean": 0.5033482313156128, "rewards/simpleverify_reward/std": 0.5002680420875549, "step": 10 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.006696428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3870.0, "completions/mean_length": 565.9386596679688, "completions/mean_terminated_length": 542.1404418945312, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 0.0010266641000064166, "grad_norm": 0.13548392057418823, "learning_rate": 2e-07, "loss": 0.0007, "num_tokens": 7025232.0, "reward": 0.5212053656578064, "reward_std": 0.24089109897613525, "rewards/simpleverify_reward/mean": 0.5212053656578064, "rewards/simpleverify_reward/std": 0.49982914328575134, "step": 11 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2947.0, "completions/mean_length": 548.4944458007812, "completions/mean_terminated_length": 516.534912109375, "completions/min_length": 69.0, "completions/min_terminated_length": 69.0, "epoch": 0.001119997200007, "grad_norm": 0.17502151429653168, "learning_rate": 2e-07, "loss": 0.0066, "num_tokens": 7600875.0, "reward": 0.5189732313156128, "reward_std": 0.27787166833877563, "rewards/simpleverify_reward/mean": 0.5189732313156128, "rewards/simpleverify_reward/std": 0.49991893768310547, "step": 12 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2989.0, "completions/mean_length": 608.794677734375, "completions/mean_terminated_length": 553.4421997070312, "completions/min_length": 3.0, "completions/min_terminated_length": 3.0, "epoch": 0.0012133303000075833, "grad_norm": 0.1518460363149643, "learning_rate": 2e-07, "loss": 0.0448, "num_tokens": 8236787.0, "reward": 0.520089328289032, "reward_std": 0.2743351459503174, "rewards/simpleverify_reward/mean": 0.5200892686843872, "rewards/simpleverify_reward/std": 0.4998753070831299, "step": 13 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2872.0, "completions/mean_length": 618.052490234375, "completions/mean_terminated_length": 586.7196044921875, "completions/min_length": 3.0, "completions/min_terminated_length": 3.0, "epoch": 0.0013066634000081666, "grad_norm": 0.14038799703121185, "learning_rate": 2e-07, "loss": 0.0059, "num_tokens": 8893874.0, "reward": 0.4877232313156128, "reward_std": 0.2815508544445038, "rewards/simpleverify_reward/mean": 0.4877232015132904, "rewards/simpleverify_reward/std": 0.500128448009491, "step": 14 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2987.0, "completions/mean_length": 610.0904541015625, "completions/mean_terminated_length": 554.7584838867188, "completions/min_length": 24.0, "completions/min_terminated_length": 24.0, "epoch": 0.00139999650000875, "grad_norm": 0.14030103385448456, "learning_rate": 2e-07, "loss": 0.0233, "num_tokens": 9540291.0, "reward": 0.4352678656578064, "reward_std": 0.2780998945236206, "rewards/simpleverify_reward/mean": 0.4352678656578064, "rewards/simpleverify_reward/std": 0.49606895446777344, "step": 15 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 4064.0, "completions/mean_length": 616.5614013671875, "completions/mean_terminated_length": 553.298828125, "completions/min_length": 11.0, "completions/min_terminated_length": 11.0, "epoch": 0.0014933296000093333, "grad_norm": 0.15774741768836975, "learning_rate": 2e-07, "loss": 0.0418, "num_tokens": 10178530.0, "reward": 0.4832589626312256, "reward_std": 0.2830139994621277, "rewards/simpleverify_reward/mean": 0.4832589328289032, "rewards/simpleverify_reward/std": 0.4999987483024597, "step": 16 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3943.0, "completions/mean_length": 646.6607666015625, "completions/mean_terminated_length": 591.9093017578125, "completions/min_length": 65.0, "completions/min_terminated_length": 65.0, "epoch": 0.0015866627000099165, "grad_norm": 0.13299201428890228, "learning_rate": 2e-07, "loss": 0.0263, "num_tokens": 10847690.0, "reward": 0.4497767984867096, "reward_std": 0.25986742973327637, "rewards/simpleverify_reward/mean": 0.4497767984867096, "rewards/simpleverify_reward/std": 0.49774909019470215, "step": 17 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.024553571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2761.0, "completions/mean_length": 655.7109375, "completions/mean_terminated_length": 569.1132202148438, "completions/min_length": 40.0, "completions/min_terminated_length": 40.0, "epoch": 0.0016799958000105, "grad_norm": 0.14098875224590302, "learning_rate": 2e-07, "loss": 0.0213, "num_tokens": 11524095.0, "reward": 0.486607164144516, "reward_std": 0.2650076746940613, "rewards/simpleverify_reward/mean": 0.4866071343421936, "rewards/simpleverify_reward/std": 0.500099778175354, "step": 18 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2864.0, "completions/mean_length": 612.40625, "completions/mean_terminated_length": 569.1073608398438, "completions/min_length": 48.0, "completions/min_terminated_length": 48.0, "epoch": 0.0017733289000110833, "grad_norm": 0.13727599382400513, "learning_rate": 2e-07, "loss": 0.0352, "num_tokens": 12164339.0, "reward": 0.4810267984867096, "reward_std": 0.2549094557762146, "rewards/simpleverify_reward/mean": 0.4810267984867096, "rewards/simpleverify_reward/std": 0.49991899728775024, "step": 19 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2649.0, "completions/mean_length": 645.1864013671875, "completions/mean_terminated_length": 594.3816528320312, "completions/min_length": 85.0, "completions/min_terminated_length": 85.0, "epoch": 0.0018666620000116667, "grad_norm": 0.12143158912658691, "learning_rate": 2e-07, "loss": 0.024, "num_tokens": 12836346.0, "reward": 0.5256696939468384, "reward_std": 0.21688787639141083, "rewards/simpleverify_reward/mean": 0.5256696343421936, "rewards/simpleverify_reward/std": 0.4996195435523987, "step": 20 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2353.0, "completions/mean_length": 539.4910888671875, "completions/mean_terminated_length": 503.40472412109375, "completions/min_length": 54.0, "completions/min_terminated_length": 54.0, "epoch": 0.00195999510001225, "grad_norm": 0.15041778981685638, "learning_rate": 2e-07, "loss": 0.0273, "num_tokens": 13398810.0, "reward": 0.535714328289032, "reward_std": 0.2705051302909851, "rewards/simpleverify_reward/mean": 0.5357142686843872, "rewards/simpleverify_reward/std": 0.4990014135837555, "step": 21 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3212.0, "completions/mean_length": 620.9576416015625, "completions/mean_terminated_length": 569.796142578125, "completions/min_length": 11.0, "completions/min_terminated_length": 11.0, "epoch": 0.002053328200012833, "grad_norm": 0.1316279023885727, "learning_rate": 2e-07, "loss": 0.0123, "num_tokens": 14041572.0, "reward": 0.4620535969734192, "reward_std": 0.26430293917655945, "rewards/simpleverify_reward/mean": 0.4620535671710968, "rewards/simpleverify_reward/std": 0.4988364577293396, "step": 22 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3619.0, "completions/mean_length": 576.8850708007812, "completions/mean_terminated_length": 549.1754760742188, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "epoch": 0.0021466613000134167, "grad_norm": 0.1340492069721222, "learning_rate": 2e-07, "loss": 0.0131, "num_tokens": 14644429.0, "reward": 0.5814732313156128, "reward_std": 0.23984119296073914, "rewards/simpleverify_reward/mean": 0.5814732313156128, "rewards/simpleverify_reward/std": 0.4935929775238037, "step": 23 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3742.0, "completions/mean_length": 644.375, "completions/mean_terminated_length": 573.61279296875, "completions/min_length": 111.0, "completions/min_terminated_length": 111.0, "epoch": 0.002239994400014, "grad_norm": 0.13783109188079834, "learning_rate": 2e-07, "loss": 0.0284, "num_tokens": 15306605.0, "reward": 0.4776785969734192, "reward_std": 0.25532063841819763, "rewards/simpleverify_reward/mean": 0.4776785671710968, "rewards/simpleverify_reward/std": 0.4997805058956146, "step": 24 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.006696428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3820.0, "completions/mean_length": 585.9989013671875, "completions/mean_terminated_length": 562.3359375, "completions/min_length": 100.0, "completions/min_terminated_length": 100.0, "epoch": 0.002333327500014583, "grad_norm": 0.14209768176078796, "learning_rate": 2e-07, "loss": 0.0243, "num_tokens": 15933084.0, "reward": 0.5379464626312256, "reward_std": 0.27110356092453003, "rewards/simpleverify_reward/mean": 0.5379464030265808, "rewards/simpleverify_reward/std": 0.4988364577293396, "step": 25 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3740.0, "completions/mean_length": 588.0859375, "completions/mean_terminated_length": 548.4932250976562, "completions/min_length": 62.0, "completions/min_terminated_length": 62.0, "epoch": 0.0024266606000151666, "grad_norm": 0.13313233852386475, "learning_rate": 2e-07, "loss": 0.0111, "num_tokens": 16554057.0, "reward": 0.5133928656578064, "reward_std": 0.22766894102096558, "rewards/simpleverify_reward/mean": 0.5133928656578064, "rewards/simpleverify_reward/std": 0.500099778175354, "step": 26 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2204.0, "completions/mean_length": 643.734375, "completions/mean_terminated_length": 572.9590454101562, "completions/min_length": 54.0, "completions/min_terminated_length": 54.0, "epoch": 0.00251999370001575, "grad_norm": 0.1182326152920723, "learning_rate": 2e-07, "loss": 0.0143, "num_tokens": 17230043.0, "reward": 0.5, "reward_std": 0.2568169832229614, "rewards/simpleverify_reward/mean": 0.5, "rewards/simpleverify_reward/std": 0.5002792477607727, "step": 27 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3506.0, "completions/mean_length": 568.9498291015625, "completions/mean_terminated_length": 517.0226440429688, "completions/min_length": 98.0, "completions/min_terminated_length": 98.0, "epoch": 0.002613326800016333, "grad_norm": 0.14213819801807404, "learning_rate": 2e-07, "loss": 0.0101, "num_tokens": 17825358.0, "reward": 0.5334821939468384, "reward_std": 0.24859529733657837, "rewards/simpleverify_reward/mean": 0.5334821343421936, "rewards/simpleverify_reward/std": 0.49915632605552673, "step": 28 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.006696428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 1936.0, "completions/mean_length": 577.2745971679688, "completions/mean_terminated_length": 553.5527954101562, "completions/min_length": 49.0, "completions/min_terminated_length": 49.0, "epoch": 0.0027066599000169166, "grad_norm": 0.123870849609375, "learning_rate": 2e-07, "loss": -0.0022, "num_tokens": 18433236.0, "reward": 0.5267857313156128, "reward_std": 0.2384939342737198, "rewards/simpleverify_reward/mean": 0.5267857313156128, "rewards/simpleverify_reward/std": 0.4995608627796173, "step": 29 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3712.0, "completions/mean_length": 595.0402221679688, "completions/mean_terminated_length": 555.5260009765625, "completions/min_length": 4.0, "completions/min_terminated_length": 4.0, "epoch": 0.0027999930000175, "grad_norm": 0.14834679663181305, "learning_rate": 2e-07, "loss": 0.0236, "num_tokens": 19062336.0, "reward": 0.5234375, "reward_std": 0.2557777762413025, "rewards/simpleverify_reward/mean": 0.5234375, "rewards/simpleverify_reward/std": 0.49972933530807495, "step": 30 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3578.0, "completions/mean_length": 601.232177734375, "completions/mean_terminated_length": 537.69091796875, "completions/min_length": 8.0, "completions/min_terminated_length": 8.0, "epoch": 0.002893326100018083, "grad_norm": 0.12755534052848816, "learning_rate": 2e-07, "loss": 0.0233, "num_tokens": 19683512.0, "reward": 0.5770089626312256, "reward_std": 0.23999205231666565, "rewards/simpleverify_reward/mean": 0.5770089030265808, "rewards/simpleverify_reward/std": 0.4943099319934845, "step": 31 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3686.0, "completions/mean_length": 588.3326416015625, "completions/mean_terminated_length": 524.5568237304688, "completions/min_length": 82.0, "completions/min_terminated_length": 82.0, "epoch": 0.0029866592000186666, "grad_norm": 0.14600606262683868, "learning_rate": 2e-07, "loss": 0.0146, "num_tokens": 20293882.0, "reward": 0.574776828289032, "reward_std": 0.2404831200838089, "rewards/simpleverify_reward/mean": 0.5747767686843872, "rewards/simpleverify_reward/std": 0.49465295672416687, "step": 32 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3605.0, "completions/mean_length": 641.4765625, "completions/mean_terminated_length": 578.6670532226562, "completions/min_length": 11.0, "completions/min_terminated_length": 11.0, "epoch": 0.00307999230001925, "grad_norm": 0.11914670467376709, "learning_rate": 2e-07, "loss": 0.0193, "num_tokens": 20959029.0, "reward": 0.5167410969734192, "reward_std": 0.23942935466766357, "rewards/simpleverify_reward/mean": 0.5167410969734192, "rewards/simpleverify_reward/std": 0.4999987483024597, "step": 33 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3918.0, "completions/mean_length": 638.966552734375, "completions/mean_terminated_length": 568.0934448242188, "completions/min_length": 18.0, "completions/min_terminated_length": 18.0, "epoch": 0.003173325400019833, "grad_norm": 0.13645559549331665, "learning_rate": 2e-07, "loss": 0.0393, "num_tokens": 21620031.0, "reward": 0.5256696939468384, "reward_std": 0.23191265761852264, "rewards/simpleverify_reward/mean": 0.5256696343421936, "rewards/simpleverify_reward/std": 0.4996195435523987, "step": 34 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 2922.0, "completions/mean_length": 619.9442138671875, "completions/mean_terminated_length": 580.7110595703125, "completions/min_length": 27.0, "completions/min_terminated_length": 27.0, "epoch": 0.0032666585000204165, "grad_norm": 0.12014550715684891, "learning_rate": 2e-07, "loss": 0.0115, "num_tokens": 22277173.0, "reward": 0.53125, "reward_std": 0.20200690627098083, "rewards/simpleverify_reward/mean": 0.53125, "rewards/simpleverify_reward/std": 0.4993011951446533, "step": 35 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3118.0, "completions/mean_length": 656.3225708007812, "completions/mean_terminated_length": 577.7910766601562, "completions/min_length": 33.0, "completions/min_terminated_length": 33.0, "epoch": 0.003359991600021, "grad_norm": 0.11663436889648438, "learning_rate": 2e-07, "loss": 0.0251, "num_tokens": 22956270.0, "reward": 0.5267857313156128, "reward_std": 0.23349636793136597, "rewards/simpleverify_reward/mean": 0.5267857313156128, "rewards/simpleverify_reward/std": 0.4995608627796173, "step": 36 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3939.0, "completions/mean_length": 653.9285888671875, "completions/mean_terminated_length": 571.31884765625, "completions/min_length": 52.0, "completions/min_terminated_length": 52.0, "epoch": 0.0034533247000215835, "grad_norm": 0.13482102751731873, "learning_rate": 2e-07, "loss": 0.0253, "num_tokens": 23637326.0, "reward": 0.5290178656578064, "reward_std": 0.26452937722206116, "rewards/simpleverify_reward/mean": 0.5290178656578064, "rewards/simpleverify_reward/std": 0.49943605065345764, "step": 37 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3124.0, "completions/mean_length": 587.8861694335938, "completions/mean_terminated_length": 552.2908325195312, "completions/min_length": 107.0, "completions/min_terminated_length": 107.0, "epoch": 0.0035466578000221665, "grad_norm": 0.12904879450798035, "learning_rate": 2e-07, "loss": 0.0243, "num_tokens": 24244808.0, "reward": 0.5167410969734192, "reward_std": 0.24408058822155, "rewards/simpleverify_reward/mean": 0.5167410969734192, "rewards/simpleverify_reward/std": 0.4999987483024597, "step": 38 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3757.0, "completions/mean_length": 660.7578125, "completions/mean_terminated_length": 590.3314819335938, "completions/min_length": 11.0, "completions/min_terminated_length": 11.0, "epoch": 0.00363999090002275, "grad_norm": 0.1281396746635437, "learning_rate": 2e-07, "loss": 0.0071, "num_tokens": 24920775.0, "reward": 0.4877232313156128, "reward_std": 0.27918368577957153, "rewards/simpleverify_reward/mean": 0.4877232015132904, "rewards/simpleverify_reward/std": 0.500128448009491, "step": 39 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2946.0, "completions/mean_length": 607.716552734375, "completions/mean_terminated_length": 552.346923828125, "completions/min_length": 4.0, "completions/min_terminated_length": 4.0, "epoch": 0.0037333240000233334, "grad_norm": 0.11140039563179016, "learning_rate": 2e-07, "loss": 0.0065, "num_tokens": 25557801.0, "reward": 0.5256696939468384, "reward_std": 0.17810925841331482, "rewards/simpleverify_reward/mean": 0.5256696343421936, "rewards/simpleverify_reward/std": 0.4996195435523987, "step": 40 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 2262.0, "completions/mean_length": 592.2444458007812, "completions/mean_terminated_length": 552.6986694335938, "completions/min_length": 119.0, "completions/min_terminated_length": 119.0, "epoch": 0.0038266571000239165, "grad_norm": 0.1285465955734253, "learning_rate": 2e-07, "loss": 0.0159, "num_tokens": 26180140.0, "reward": 0.4955357313156128, "reward_std": 0.2530326545238495, "rewards/simpleverify_reward/mean": 0.4955357015132904, "rewards/simpleverify_reward/std": 0.500259280204773, "step": 41 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3709.0, "completions/mean_length": 633.2767944335938, "completions/mean_terminated_length": 558.2576904296875, "completions/min_length": 89.0, "completions/min_terminated_length": 89.0, "epoch": 0.0039199902000245, "grad_norm": 0.12360970675945282, "learning_rate": 2e-07, "loss": 0.019, "num_tokens": 26837476.0, "reward": 0.543526828289032, "reward_std": 0.22661586105823517, "rewards/simpleverify_reward/mean": 0.5435267686843872, "rewards/simpleverify_reward/std": 0.49838000535964966, "step": 42 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2771.0, "completions/mean_length": 598.6373291015625, "completions/mean_terminated_length": 543.1235961914062, "completions/min_length": 20.0, "completions/min_terminated_length": 20.0, "epoch": 0.004013323300025083, "grad_norm": 0.1343419998884201, "learning_rate": 2e-07, "loss": 0.0179, "num_tokens": 27471775.0, "reward": 0.4988839626312256, "reward_std": 0.25855404138565063, "rewards/simpleverify_reward/mean": 0.4988839328289032, "rewards/simpleverify_reward/std": 0.5002779960632324, "step": 43 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3800.0, "completions/mean_length": 635.6060791015625, "completions/mean_terminated_length": 564.6640625, "completions/min_length": 75.0, "completions/min_terminated_length": 75.0, "epoch": 0.004106656400025666, "grad_norm": 0.13284070789813995, "learning_rate": 2e-07, "loss": 0.0329, "num_tokens": 28129150.0, "reward": 0.4988839626312256, "reward_std": 0.26704782247543335, "rewards/simpleverify_reward/mean": 0.4988839328289032, "rewards/simpleverify_reward/std": 0.5002779960632324, "step": 44 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2900.0, "completions/mean_length": 574.3381958007812, "completions/mean_terminated_length": 518.4387817382812, "completions/min_length": 69.0, "completions/min_terminated_length": 69.0, "epoch": 0.00419998950002625, "grad_norm": 0.1301700323820114, "learning_rate": 2e-07, "loss": 0.018, "num_tokens": 28731453.0, "reward": 0.5803571939468384, "reward_std": 0.21853689849376678, "rewards/simpleverify_reward/mean": 0.5803571343421936, "rewards/simpleverify_reward/std": 0.4937761425971985, "step": 45 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3277.0, "completions/mean_length": 626.1451416015625, "completions/mean_terminated_length": 575.0599975585938, "completions/min_length": 99.0, "completions/min_terminated_length": 99.0, "epoch": 0.004293322600026833, "grad_norm": 0.13736340403556824, "learning_rate": 2e-07, "loss": 0.0224, "num_tokens": 29380487.0, "reward": 0.5390625, "reward_std": 0.2522405982017517, "rewards/simpleverify_reward/mean": 0.5390625, "rewards/simpleverify_reward/std": 0.4987502098083496, "step": 46 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3110.0, "completions/mean_length": 634.9631958007812, "completions/mean_terminated_length": 551.8982543945312, "completions/min_length": 13.0, "completions/min_terminated_length": 13.0, "epoch": 0.004386655700027417, "grad_norm": 0.1331474632024765, "learning_rate": 2e-07, "loss": 0.047, "num_tokens": 30049582.0, "reward": 0.512276828289032, "reward_std": 0.23762626945972443, "rewards/simpleverify_reward/mean": 0.5122767686843872, "rewards/simpleverify_reward/std": 0.500128448009491, "step": 47 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2696.0, "completions/mean_length": 588.0636596679688, "completions/mean_terminated_length": 540.444580078125, "completions/min_length": 15.0, "completions/min_terminated_length": 15.0, "epoch": 0.004479988800028, "grad_norm": 0.12872089445590973, "learning_rate": 2e-07, "loss": 0.0119, "num_tokens": 30663919.0, "reward": 0.582589328289032, "reward_std": 0.2441604882478714, "rewards/simpleverify_reward/mean": 0.5825892686843872, "rewards/simpleverify_reward/std": 0.4934072494506836, "step": 48 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 1978.0, "completions/mean_length": 540.4252319335938, "completions/mean_terminated_length": 508.3930358886719, "completions/min_length": 85.0, "completions/min_terminated_length": 85.0, "epoch": 0.004573321900028583, "grad_norm": 0.13207238912582397, "learning_rate": 2e-07, "loss": 0.016, "num_tokens": 31239036.0, "reward": 0.551339328289032, "reward_std": 0.21766996383666992, "rewards/simpleverify_reward/mean": 0.5513392686843872, "rewards/simpleverify_reward/std": 0.4976350665092468, "step": 49 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 4007.0, "completions/mean_length": 645.6886596679688, "completions/mean_terminated_length": 570.9384155273438, "completions/min_length": 118.0, "completions/min_terminated_length": 118.0, "epoch": 0.004666655000029166, "grad_norm": 0.1187574490904808, "learning_rate": 2e-07, "loss": 0.0141, "num_tokens": 31912597.0, "reward": 0.5424107313156128, "reward_std": 0.20996643602848053, "rewards/simpleverify_reward/mean": 0.5424107313156128, "rewards/simpleverify_reward/std": 0.4984763562679291, "step": 50 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3659.0, "completions/mean_length": 632.2210083007812, "completions/mean_terminated_length": 569.2431640625, "completions/min_length": 19.0, "completions/min_terminated_length": 19.0, "epoch": 0.00475998810002975, "grad_norm": 0.12449927628040314, "learning_rate": 2e-07, "loss": 0.0253, "num_tokens": 32579091.0, "reward": 0.515625, "reward_std": 0.24243271350860596, "rewards/simpleverify_reward/mean": 0.515625, "rewards/simpleverify_reward/std": 0.5000349283218384, "step": 51 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3448.0, "completions/mean_length": 586.0357666015625, "completions/mean_terminated_length": 542.4090576171875, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "epoch": 0.004853321200030333, "grad_norm": 0.11511293798685074, "learning_rate": 2e-07, "loss": 0.009, "num_tokens": 33181227.0, "reward": 0.582589328289032, "reward_std": 0.18858975172042847, "rewards/simpleverify_reward/mean": 0.5825892686843872, "rewards/simpleverify_reward/std": 0.4934072494506836, "step": 52 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3812.0, "completions/mean_length": 615.8058471679688, "completions/mean_terminated_length": 564.5684814453125, "completions/min_length": 92.0, "completions/min_terminated_length": 92.0, "epoch": 0.004946654300030917, "grad_norm": 0.1416894644498825, "learning_rate": 2e-07, "loss": 0.0253, "num_tokens": 33820477.0, "reward": 0.5870535969734192, "reward_std": 0.24885743856430054, "rewards/simpleverify_reward/mean": 0.5870535969734192, "rewards/simpleverify_reward/std": 0.49263834953308105, "step": 53 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 4095.0, "completions/mean_length": 631.5971069335938, "completions/mean_terminated_length": 556.5416259765625, "completions/min_length": 20.0, "completions/min_terminated_length": 20.0, "epoch": 0.0050399874000315, "grad_norm": 0.1136423721909523, "learning_rate": 2e-07, "loss": 0.0293, "num_tokens": 34470692.0, "reward": 0.5078125, "reward_std": 0.21226368844509125, "rewards/simpleverify_reward/mean": 0.5078125, "rewards/simpleverify_reward/std": 0.5002182126045227, "step": 54 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3230.0, "completions/mean_length": 582.9498291015625, "completions/mean_terminated_length": 543.2991333007812, "completions/min_length": 19.0, "completions/min_terminated_length": 19.0, "epoch": 0.005133320500032083, "grad_norm": 0.12786602973937988, "learning_rate": 2e-07, "loss": 0.0232, "num_tokens": 35078455.0, "reward": 0.5613839626312256, "reward_std": 0.20606406033039093, "rewards/simpleverify_reward/mean": 0.5613839030265808, "rewards/simpleverify_reward/std": 0.496494859457016, "step": 55 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2699.0, "completions/mean_length": 583.7076416015625, "completions/mean_terminated_length": 536.0294189453125, "completions/min_length": 34.0, "completions/min_terminated_length": 34.0, "epoch": 0.005226653600032666, "grad_norm": 0.11711642146110535, "learning_rate": 2e-07, "loss": 0.0282, "num_tokens": 35684113.0, "reward": 0.5848214626312256, "reward_std": 0.19463853538036346, "rewards/simpleverify_reward/mean": 0.5848214030265808, "rewards/simpleverify_reward/std": 0.49302801489830017, "step": 56 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3977.0, "completions/mean_length": 579.005615234375, "completions/mean_terminated_length": 523.1802978515625, "completions/min_length": 87.0, "completions/min_terminated_length": 87.0, "epoch": 0.00531998670003325, "grad_norm": 0.12185695767402649, "learning_rate": 2e-07, "loss": 0.0218, "num_tokens": 36294566.0, "reward": 0.582589328289032, "reward_std": 0.21511872112751007, "rewards/simpleverify_reward/mean": 0.5825892686843872, "rewards/simpleverify_reward/std": 0.4934072494506836, "step": 57 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3061.0, "completions/mean_length": 589.935302734375, "completions/mean_terminated_length": 538.3170776367188, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "epoch": 0.005413319800033833, "grad_norm": 0.12470349669456482, "learning_rate": 2e-07, "loss": 0.0406, "num_tokens": 36908900.0, "reward": 0.6149553656578064, "reward_std": 0.23375527560710907, "rewards/simpleverify_reward/mean": 0.6149553656578064, "rewards/simpleverify_reward/std": 0.4868776500225067, "step": 58 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2082.0, "completions/mean_length": 589.8114013671875, "completions/mean_terminated_length": 554.235595703125, "completions/min_length": 118.0, "completions/min_terminated_length": 118.0, "epoch": 0.005506652900034417, "grad_norm": 0.11984679847955704, "learning_rate": 2e-07, "loss": 0.0134, "num_tokens": 37525011.0, "reward": 0.527901828289032, "reward_std": 0.21809138357639313, "rewards/simpleverify_reward/mean": 0.5279017686843872, "rewards/simpleverify_reward/std": 0.49949970841407776, "step": 59 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3687.0, "completions/mean_length": 660.3092041015625, "completions/mean_terminated_length": 585.8756713867188, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 0.005599986000035, "grad_norm": 0.1187063604593277, "learning_rate": 2e-07, "loss": 0.0316, "num_tokens": 38203368.0, "reward": 0.5167410969734192, "reward_std": 0.2183125764131546, "rewards/simpleverify_reward/mean": 0.5167410969734192, "rewards/simpleverify_reward/std": 0.4999987483024597, "step": 60 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.006696428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 1755.0, "completions/mean_length": 512.4553833007812, "completions/mean_terminated_length": 488.296630859375, "completions/min_length": 116.0, "completions/min_terminated_length": 116.0, "epoch": 0.005693319100035584, "grad_norm": 0.13587848842144012, "learning_rate": 2e-07, "loss": 0.0224, "num_tokens": 38747408.0, "reward": 0.6595982313156128, "reward_std": 0.21786358952522278, "rewards/simpleverify_reward/mean": 0.6595982313156128, "rewards/simpleverify_reward/std": 0.4741089344024658, "step": 61 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2593.0, "completions/mean_length": 605.5022583007812, "completions/mean_terminated_length": 554.1132202148438, "completions/min_length": 153.0, "completions/min_terminated_length": 153.0, "epoch": 0.005786652200036166, "grad_norm": 0.12373954057693481, "learning_rate": 2e-07, "loss": 0.0146, "num_tokens": 39379402.0, "reward": 0.5558035969734192, "reward_std": 0.23766018450260162, "rewards/simpleverify_reward/mean": 0.5558035969734192, "rewards/simpleverify_reward/std": 0.49715372920036316, "step": 62 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 4048.0, "completions/mean_length": 579.8270263671875, "completions/mean_terminated_length": 540.14111328125, "completions/min_length": 5.0, "completions/min_terminated_length": 5.0, "epoch": 0.00587998530003675, "grad_norm": 0.11738574504852295, "learning_rate": 2e-07, "loss": 0.0086, "num_tokens": 39986319.0, "reward": 0.5491071939468384, "reward_std": 0.20674508810043335, "rewards/simpleverify_reward/mean": 0.5491071343421936, "rewards/simpleverify_reward/std": 0.49786055088043213, "step": 63 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.005580357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 4059.0, "completions/mean_length": 588.927490234375, "completions/mean_terminated_length": 569.2469482421875, "completions/min_length": 14.0, "completions/min_terminated_length": 14.0, "epoch": 0.005973318400037333, "grad_norm": 0.11654112488031387, "learning_rate": 2e-07, "loss": 0.0201, "num_tokens": 40609798.0, "reward": 0.5178571939468384, "reward_std": 0.2160523384809494, "rewards/simpleverify_reward/mean": 0.5178571343421936, "rewards/simpleverify_reward/std": 0.4999600946903229, "step": 64 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3188.0, "completions/mean_length": 553.591552734375, "completions/mean_terminated_length": 521.6779174804688, "completions/min_length": 120.0, "completions/min_terminated_length": 120.0, "epoch": 0.006066651500037917, "grad_norm": 0.14344535768032074, "learning_rate": 2e-07, "loss": 0.005, "num_tokens": 41188760.0, "reward": 0.5714285969734192, "reward_std": 0.2578040659427643, "rewards/simpleverify_reward/mean": 0.5714285969734192, "rewards/simpleverify_reward/std": 0.49514803290367126, "step": 65 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3734.0, "completions/mean_length": 605.6886596679688, "completions/mean_terminated_length": 558.308837890625, "completions/min_length": 94.0, "completions/min_terminated_length": 94.0, "epoch": 0.0061599846000385, "grad_norm": 0.11568710207939148, "learning_rate": 2e-07, "loss": 0.017, "num_tokens": 41818729.0, "reward": 0.5725446939468384, "reward_std": 0.21188785135746002, "rewards/simpleverify_reward/mean": 0.5725446343421936, "rewards/simpleverify_reward/std": 0.49498558044433594, "step": 66 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 2856.0, "completions/mean_length": 536.8426513671875, "completions/mean_terminated_length": 508.8177795410156, "completions/min_length": 90.0, "completions/min_terminated_length": 90.0, "epoch": 0.0062533177000390835, "grad_norm": 0.13949453830718994, "learning_rate": 2e-07, "loss": 0.021, "num_tokens": 42383964.0, "reward": 0.551339328289032, "reward_std": 0.22406010329723358, "rewards/simpleverify_reward/mean": 0.5513392686843872, "rewards/simpleverify_reward/std": 0.4976350665092468, "step": 67 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3950.0, "completions/mean_length": 598.2533569335938, "completions/mean_terminated_length": 542.7335815429688, "completions/min_length": 96.0, "completions/min_terminated_length": 96.0, "epoch": 0.006346650800039666, "grad_norm": 0.1248648464679718, "learning_rate": 2e-07, "loss": 0.0332, "num_tokens": 43006975.0, "reward": 0.5926339626312256, "reward_std": 0.19576720893383026, "rewards/simpleverify_reward/mean": 0.5926339030265808, "rewards/simpleverify_reward/std": 0.49161845445632935, "step": 68 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2207.0, "completions/mean_length": 618.6038208007812, "completions/mean_terminated_length": 575.3819580078125, "completions/min_length": 56.0, "completions/min_terminated_length": 56.0, "epoch": 0.00643998390004025, "grad_norm": 0.11936494708061218, "learning_rate": 2e-07, "loss": 0.0036, "num_tokens": 43653484.0, "reward": 0.5491071939468384, "reward_std": 0.20133177936077118, "rewards/simpleverify_reward/mean": 0.5491071343421936, "rewards/simpleverify_reward/std": 0.49786055088043213, "step": 69 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2267.0, "completions/mean_length": 581.6417846679688, "completions/mean_terminated_length": 549.9808959960938, "completions/min_length": 2.0, "completions/min_terminated_length": 2.0, "epoch": 0.006533317000040833, "grad_norm": 0.12859514355659485, "learning_rate": 2e-07, "loss": 0.0147, "num_tokens": 44261931.0, "reward": 0.53125, "reward_std": 0.23477405309677124, "rewards/simpleverify_reward/mean": 0.53125, "rewards/simpleverify_reward/std": 0.4993011951446533, "step": 70 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 2581.0, "completions/mean_length": 593.6495971679688, "completions/mean_terminated_length": 554.11962890625, "completions/min_length": 102.0, "completions/min_terminated_length": 102.0, "epoch": 0.0066266501000414165, "grad_norm": 0.12527605891227722, "learning_rate": 2e-07, "loss": 0.0188, "num_tokens": 44884697.0, "reward": 0.5345982313156128, "reward_std": 0.20711280405521393, "rewards/simpleverify_reward/mean": 0.5345982313156128, "rewards/simpleverify_reward/std": 0.4990801215171814, "step": 71 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0323660714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3007.0, "completions/mean_length": 662.9386596679688, "completions/mean_terminated_length": 548.1072387695312, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 0.006719983200042, "grad_norm": 0.11334379762411118, "learning_rate": 2e-07, "loss": 0.0237, "num_tokens": 45569290.0, "reward": 0.527901828289032, "reward_std": 0.1874699890613556, "rewards/simpleverify_reward/mean": 0.5279017686843872, "rewards/simpleverify_reward/std": 0.49949970841407776, "step": 72 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3463.0, "completions/mean_length": 607.5971069335938, "completions/mean_terminated_length": 564.2384033203125, "completions/min_length": 70.0, "completions/min_terminated_length": 70.0, "epoch": 0.0068133163000425835, "grad_norm": 0.13698521256446838, "learning_rate": 2e-07, "loss": 0.0483, "num_tokens": 46195273.0, "reward": 0.5323660969734192, "reward_std": 0.2486726939678192, "rewards/simpleverify_reward/mean": 0.5323660969734192, "rewards/simpleverify_reward/std": 0.4992299973964691, "step": 73 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3263.0, "completions/mean_length": 630.6239013671875, "completions/mean_terminated_length": 587.5514526367188, "completions/min_length": 110.0, "completions/min_terminated_length": 110.0, "epoch": 0.006906649400043167, "grad_norm": 0.1124383807182312, "learning_rate": 2e-07, "loss": 0.0414, "num_tokens": 46855168.0, "reward": 0.5178571939468384, "reward_std": 0.21763646602630615, "rewards/simpleverify_reward/mean": 0.5178571343421936, "rewards/simpleverify_reward/std": 0.4999600946903229, "step": 74 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2309.0, "completions/mean_length": 610.4777221679688, "completions/mean_terminated_length": 563.1629028320312, "completions/min_length": 113.0, "completions/min_terminated_length": 113.0, "epoch": 0.0069999825000437495, "grad_norm": 0.12460388988256454, "learning_rate": 2e-07, "loss": -0.0012, "num_tokens": 47488636.0, "reward": 0.5167410969734192, "reward_std": 0.24645917117595673, "rewards/simpleverify_reward/mean": 0.5167410969734192, "rewards/simpleverify_reward/std": 0.4999987483024597, "step": 75 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3860.0, "completions/mean_length": 626.036865234375, "completions/mean_terminated_length": 578.9332885742188, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 0.007093315600044333, "grad_norm": 0.12412525713443756, "learning_rate": 2e-07, "loss": 0.0087, "num_tokens": 48131125.0, "reward": 0.5290178656578064, "reward_std": 0.23142340779304504, "rewards/simpleverify_reward/mean": 0.5290178656578064, "rewards/simpleverify_reward/std": 0.49943602085113525, "step": 76 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2190.0, "completions/mean_length": 602.3739013671875, "completions/mean_terminated_length": 554.9490966796875, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 0.0071866487000449165, "grad_norm": 0.11633453518152237, "learning_rate": 2e-07, "loss": 0.0225, "num_tokens": 48764292.0, "reward": 0.520089328289032, "reward_std": 0.1903877854347229, "rewards/simpleverify_reward/mean": 0.5200892686843872, "rewards/simpleverify_reward/std": 0.4998753070831299, "step": 77 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4042.0, "completions/mean_length": 597.4386596679688, "completions/mean_terminated_length": 529.77587890625, "completions/min_length": 99.0, "completions/min_terminated_length": 99.0, "epoch": 0.0072799818000455, "grad_norm": 0.1261005401611328, "learning_rate": 2e-07, "loss": 0.0382, "num_tokens": 49378837.0, "reward": 0.582589328289032, "reward_std": 0.23431868851184845, "rewards/simpleverify_reward/mean": 0.5825892686843872, "rewards/simpleverify_reward/std": 0.493407279253006, "step": 78 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.006696428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3986.0, "completions/mean_length": 582.7835083007812, "completions/mean_terminated_length": 559.098876953125, "completions/min_length": 18.0, "completions/min_terminated_length": 18.0, "epoch": 0.007373314900046083, "grad_norm": 0.11349702626466751, "learning_rate": 2e-07, "loss": 0.02, "num_tokens": 49980651.0, "reward": 0.582589328289032, "reward_std": 0.2097385972738266, "rewards/simpleverify_reward/mean": 0.5825892686843872, "rewards/simpleverify_reward/std": 0.493407279253006, "step": 79 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2158.0, "completions/mean_length": 566.4252319335938, "completions/mean_terminated_length": 522.5548095703125, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 0.007466648000046667, "grad_norm": 0.12239402532577515, "learning_rate": 2e-07, "loss": 0.0327, "num_tokens": 50581944.0, "reward": 0.5725446939468384, "reward_std": 0.20583511888980865, "rewards/simpleverify_reward/mean": 0.5725446343421936, "rewards/simpleverify_reward/std": 0.49498558044433594, "step": 80 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2656.0, "completions/mean_length": 593.796875, "completions/mean_terminated_length": 530.1204223632812, "completions/min_length": 24.0, "completions/min_terminated_length": 24.0, "epoch": 0.0075599811000472495, "grad_norm": 0.13750427961349487, "learning_rate": 2e-07, "loss": 0.0334, "num_tokens": 51200266.0, "reward": 0.5892857313156128, "reward_std": 0.24581767618656158, "rewards/simpleverify_reward/mean": 0.5892857313156128, "rewards/simpleverify_reward/std": 0.49223825335502625, "step": 81 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.024553571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3843.0, "completions/mean_length": 676.8147583007812, "completions/mean_terminated_length": 590.7482299804688, "completions/min_length": 99.0, "completions/min_terminated_length": 99.0, "epoch": 0.007653314200047833, "grad_norm": 0.11970255523920059, "learning_rate": 2e-07, "loss": 0.0294, "num_tokens": 51889708.0, "reward": 0.5446428656578064, "reward_std": 0.23897498846054077, "rewards/simpleverify_reward/mean": 0.5446428656578064, "rewards/simpleverify_reward/std": 0.4982811510562897, "step": 82 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 2570.0, "completions/mean_length": 607.2176513671875, "completions/mean_terminated_length": 567.8408813476562, "completions/min_length": 95.0, "completions/min_terminated_length": 95.0, "epoch": 0.007746647300048416, "grad_norm": 0.13053211569786072, "learning_rate": 2e-07, "loss": 0.0224, "num_tokens": 52518183.0, "reward": 0.53125, "reward_std": 0.24071136116981506, "rewards/simpleverify_reward/mean": 0.53125, "rewards/simpleverify_reward/std": 0.4993011951446533, "step": 83 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0267857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2837.0, "completions/mean_length": 643.0513916015625, "completions/mean_terminated_length": 548.0160522460938, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 0.007839980400049, "grad_norm": 0.12160377204418182, "learning_rate": 2e-07, "loss": 0.0227, "num_tokens": 53183909.0, "reward": 0.6127232313156128, "reward_std": 0.20246043801307678, "rewards/simpleverify_reward/mean": 0.6127232313156128, "rewards/simpleverify_reward/std": 0.4873998463153839, "step": 84 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2146.0, "completions/mean_length": 630.8873291015625, "completions/mean_terminated_length": 579.8720092773438, "completions/min_length": 105.0, "completions/min_terminated_length": 105.0, "epoch": 0.007933313500049582, "grad_norm": 0.11715319007635117, "learning_rate": 2e-07, "loss": 0.0307, "num_tokens": 53835048.0, "reward": 0.5970982313156128, "reward_std": 0.21383923292160034, "rewards/simpleverify_reward/mean": 0.5970982313156128, "rewards/simpleverify_reward/std": 0.49075525999069214, "step": 85 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 2659.0, "completions/mean_length": 589.2142944335938, "completions/mean_terminated_length": 549.6343383789062, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 0.008026646600050166, "grad_norm": 0.12226558476686478, "learning_rate": 2e-07, "loss": 0.0239, "num_tokens": 54448784.0, "reward": 0.520089328289032, "reward_std": 0.2246662974357605, "rewards/simpleverify_reward/mean": 0.5200892686843872, "rewards/simpleverify_reward/std": 0.4998753070831299, "step": 86 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3485.0, "completions/mean_length": 546.7567138671875, "completions/mean_terminated_length": 514.7815551757812, "completions/min_length": 4.0, "completions/min_terminated_length": 4.0, "epoch": 0.00811997970005075, "grad_norm": 0.12142011523246765, "learning_rate": 2e-07, "loss": 0.0231, "num_tokens": 55024494.0, "reward": 0.5691964626312256, "reward_std": 0.20027051866054535, "rewards/simpleverify_reward/mean": 0.5691964030265808, "rewards/simpleverify_reward/std": 0.4954652488231659, "step": 87 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3454.0, "completions/mean_length": 627.6171875, "completions/mean_terminated_length": 576.5537719726562, "completions/min_length": 31.0, "completions/min_terminated_length": 31.0, "epoch": 0.008213312800051333, "grad_norm": 0.10394436120986938, "learning_rate": 2e-07, "loss": 0.0197, "num_tokens": 55681839.0, "reward": 0.5089285969734192, "reward_std": 0.1707427203655243, "rewards/simpleverify_reward/mean": 0.5089285969734192, "rewards/simpleverify_reward/std": 0.5001994967460632, "step": 88 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 2484.0, "completions/mean_length": 600.015625, "completions/mean_terminated_length": 572.4882202148438, "completions/min_length": 19.0, "completions/min_terminated_length": 19.0, "epoch": 0.008306645900051916, "grad_norm": 0.12543246150016785, "learning_rate": 2e-07, "loss": 0.0336, "num_tokens": 56303749.0, "reward": 0.5658482313156128, "reward_std": 0.2359677106142044, "rewards/simpleverify_reward/mean": 0.5658482313156128, "rewards/simpleverify_reward/std": 0.49592188000679016, "step": 89 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3921.0, "completions/mean_length": 579.640625, "completions/mean_terminated_length": 547.9617309570312, "completions/min_length": 164.0, "completions/min_terminated_length": 164.0, "epoch": 0.0083999790000525, "grad_norm": 0.12909793853759766, "learning_rate": 2e-07, "loss": 0.0288, "num_tokens": 56910843.0, "reward": 0.5546875, "reward_std": 0.25265106558799744, "rewards/simpleverify_reward/mean": 0.5546875, "rewards/simpleverify_reward/std": 0.4972778558731079, "step": 90 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3874.0, "completions/mean_length": 558.6674194335938, "completions/mean_terminated_length": 530.8143920898438, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 0.008493312100053083, "grad_norm": 0.10572243481874466, "learning_rate": 2e-07, "loss": 0.0101, "num_tokens": 57497841.0, "reward": 0.5256696939468384, "reward_std": 0.17937780916690826, "rewards/simpleverify_reward/mean": 0.5256696343421936, "rewards/simpleverify_reward/std": 0.4996195137500763, "step": 91 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2912.0, "completions/mean_length": 605.700927734375, "completions/mean_terminated_length": 554.3148193359375, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 0.008586645200053667, "grad_norm": 0.1250797063112259, "learning_rate": 2e-07, "loss": 0.0444, "num_tokens": 58133557.0, "reward": 0.5479910969734192, "reward_std": 0.19889113306999207, "rewards/simpleverify_reward/mean": 0.5479910969734192, "rewards/simpleverify_reward/std": 0.49796950817108154, "step": 92 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3638.0, "completions/mean_length": 632.8326416015625, "completions/mean_terminated_length": 593.7449340820312, "completions/min_length": 84.0, "completions/min_terminated_length": 84.0, "epoch": 0.00867997830005425, "grad_norm": 0.108908511698246, "learning_rate": 2e-07, "loss": 0.0145, "num_tokens": 58786047.0, "reward": 0.5680803656578064, "reward_std": 0.1874253898859024, "rewards/simpleverify_reward/mean": 0.5680803656578064, "rewards/simpleverify_reward/std": 0.4956200420856476, "step": 93 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3861.0, "completions/mean_length": 668.71875, "completions/mean_terminated_length": 606.404541015625, "completions/min_length": 59.0, "completions/min_terminated_length": 59.0, "epoch": 0.008773311400054834, "grad_norm": 0.10387402027845383, "learning_rate": 2e-07, "loss": 0.018, "num_tokens": 59475099.0, "reward": 0.5290178656578064, "reward_std": 0.20527057349681854, "rewards/simpleverify_reward/mean": 0.5290178656578064, "rewards/simpleverify_reward/std": 0.49943605065345764, "step": 94 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3956.0, "completions/mean_length": 686.1317138671875, "completions/mean_terminated_length": 612.2576904296875, "completions/min_length": 60.0, "completions/min_terminated_length": 60.0, "epoch": 0.008866644500055417, "grad_norm": 0.11647990345954895, "learning_rate": 2e-07, "loss": 0.0401, "num_tokens": 60186689.0, "reward": 0.5234375, "reward_std": 0.24487335979938507, "rewards/simpleverify_reward/mean": 0.5234375, "rewards/simpleverify_reward/std": 0.49972933530807495, "step": 95 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2863.0, "completions/mean_length": 635.6652221679688, "completions/mean_terminated_length": 576.7492065429688, "completions/min_length": 75.0, "completions/min_terminated_length": 75.0, "epoch": 0.008959977600056, "grad_norm": 0.13032424449920654, "learning_rate": 2e-07, "loss": 0.0006, "num_tokens": 60845493.0, "reward": 0.527901828289032, "reward_std": 0.2484130561351776, "rewards/simpleverify_reward/mean": 0.5279017686843872, "rewards/simpleverify_reward/std": 0.49949970841407776, "step": 96 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3588.0, "completions/mean_length": 575.3795166015625, "completions/mean_terminated_length": 519.49658203125, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 0.009053310700056582, "grad_norm": 0.12801982462406158, "learning_rate": 2e-07, "loss": 0.0341, "num_tokens": 61446073.0, "reward": 0.5803571939468384, "reward_std": 0.21812643110752106, "rewards/simpleverify_reward/mean": 0.5803571343421936, "rewards/simpleverify_reward/std": 0.4937761127948761, "step": 97 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 2505.0, "completions/mean_length": 553.9832763671875, "completions/mean_terminated_length": 526.0933837890625, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 0.009146643800057166, "grad_norm": 0.11647334694862366, "learning_rate": 2e-07, "loss": 0.0069, "num_tokens": 62022570.0, "reward": 0.6049107313156128, "reward_std": 0.18153446912765503, "rewards/simpleverify_reward/mean": 0.6049107313156128, "rewards/simpleverify_reward/std": 0.48914292454719543, "step": 98 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2638.0, "completions/mean_length": 636.732177734375, "completions/mean_terminated_length": 573.8363647460938, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 0.00923997690005775, "grad_norm": 0.1137307733297348, "learning_rate": 2e-07, "loss": 0.0062, "num_tokens": 62684418.0, "reward": 0.5569196939468384, "reward_std": 0.2054641991853714, "rewards/simpleverify_reward/mean": 0.5569196343421936, "rewards/simpleverify_reward/std": 0.4970270097255707, "step": 99 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3625.0, "completions/mean_length": 651.0435791015625, "completions/mean_terminated_length": 588.407958984375, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 0.009333310000058333, "grad_norm": 0.112153060734272, "learning_rate": 2e-07, "loss": 0.0333, "num_tokens": 63362841.0, "reward": 0.4810267984867096, "reward_std": 0.21902361512184143, "rewards/simpleverify_reward/mean": 0.4810267984867096, "rewards/simpleverify_reward/std": 0.49991899728775024, "step": 100 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2653.0, "completions/mean_length": 597.9152221679688, "completions/mean_terminated_length": 518.0502319335938, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 0.009426643100058916, "grad_norm": 0.13159026205539703, "learning_rate": 2e-07, "loss": 0.0349, "num_tokens": 63989309.0, "reward": 0.5290178656578064, "reward_std": 0.2125674933195114, "rewards/simpleverify_reward/mean": 0.5290178656578064, "rewards/simpleverify_reward/std": 0.49943605065345764, "step": 101 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 2124.0, "completions/mean_length": 569.2589721679688, "completions/mean_terminated_length": 541.4893188476562, "completions/min_length": 158.0, "completions/min_terminated_length": 158.0, "epoch": 0.0095199762000595, "grad_norm": 0.13029839098453522, "learning_rate": 2e-07, "loss": 0.0015, "num_tokens": 64591549.0, "reward": 0.6049107313156128, "reward_std": 0.21996904909610748, "rewards/simpleverify_reward/mean": 0.6049107313156128, "rewards/simpleverify_reward/std": 0.48914292454719543, "step": 102 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 4074.0, "completions/mean_length": 612.2199096679688, "completions/mean_terminated_length": 560.9297485351562, "completions/min_length": 106.0, "completions/min_terminated_length": 106.0, "epoch": 0.009613309300060083, "grad_norm": 0.1410360187292099, "learning_rate": 2e-07, "loss": 0.0136, "num_tokens": 65230730.0, "reward": 0.5290178656578064, "reward_std": 0.26076576113700867, "rewards/simpleverify_reward/mean": 0.5290178656578064, "rewards/simpleverify_reward/std": 0.49943605065345764, "step": 103 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3805.0, "completions/mean_length": 651.4420166015625, "completions/mean_terminated_length": 572.799072265625, "completions/min_length": 86.0, "completions/min_terminated_length": 86.0, "epoch": 0.009706642400060667, "grad_norm": 0.12718802690505981, "learning_rate": 2e-07, "loss": 0.036, "num_tokens": 65902358.0, "reward": 0.5234375, "reward_std": 0.2372158020734787, "rewards/simpleverify_reward/mean": 0.5234375, "rewards/simpleverify_reward/std": 0.49972933530807495, "step": 104 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3843.0, "completions/mean_length": 689.8002319335938, "completions/mean_terminated_length": 612.0330810546875, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 0.00979997550006125, "grad_norm": 0.11132699251174927, "learning_rate": 2e-07, "loss": 0.0086, "num_tokens": 66608187.0, "reward": 0.4966517984867096, "reward_std": 0.20572522282600403, "rewards/simpleverify_reward/mean": 0.4966517984867096, "rewards/simpleverify_reward/std": 0.5002680420875549, "step": 105 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3789.0, "completions/mean_length": 639.84375, "completions/mean_terminated_length": 577.0045166015625, "completions/min_length": 114.0, "completions/min_terminated_length": 114.0, "epoch": 0.009893308600061834, "grad_norm": 0.1305176466703415, "learning_rate": 2e-07, "loss": 0.0233, "num_tokens": 67275903.0, "reward": 0.5290178656578064, "reward_std": 0.22770215570926666, "rewards/simpleverify_reward/mean": 0.5290178656578064, "rewards/simpleverify_reward/std": 0.49943605065345764, "step": 106 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 4029.0, "completions/mean_length": 593.65625, "completions/mean_terminated_length": 562.1036376953125, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 0.009986641700062417, "grad_norm": 0.124730683863163, "learning_rate": 2e-07, "loss": 0.0194, "num_tokens": 67899123.0, "reward": 0.5636160969734192, "reward_std": 0.22048786282539368, "rewards/simpleverify_reward/mean": 0.5636160969734192, "rewards/simpleverify_reward/std": 0.49621346592903137, "step": 107 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 4057.0, "completions/mean_length": 574.7857666015625, "completions/mean_terminated_length": 543.0630493164062, "completions/min_length": 116.0, "completions/min_terminated_length": 116.0, "epoch": 0.010079974800063, "grad_norm": 0.13059444725513458, "learning_rate": 2e-07, "loss": 0.0273, "num_tokens": 68504539.0, "reward": 0.543526828289032, "reward_std": 0.23143410682678223, "rewards/simpleverify_reward/mean": 0.5435267686843872, "rewards/simpleverify_reward/std": 0.49838000535964966, "step": 108 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3806.0, "completions/mean_length": 673.9342041015625, "completions/mean_terminated_length": 607.7508544921875, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 0.010173307900063584, "grad_norm": 0.10617094486951828, "learning_rate": 2e-07, "loss": 0.0255, "num_tokens": 69198240.0, "reward": 0.5267857313156128, "reward_std": 0.19953122735023499, "rewards/simpleverify_reward/mean": 0.5267857313156128, "rewards/simpleverify_reward/std": 0.4995608627796173, "step": 109 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2616.0, "completions/mean_length": 552.2299194335938, "completions/mean_terminated_length": 520.3040771484375, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 0.010266641000064166, "grad_norm": 0.12546810507774353, "learning_rate": 2e-07, "loss": 0.0211, "num_tokens": 69774670.0, "reward": 0.6183035969734192, "reward_std": 0.22465601563453674, "rewards/simpleverify_reward/mean": 0.6183035969734192, "rewards/simpleverify_reward/std": 0.4860740303993225, "step": 110 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2560.0, "completions/mean_length": 587.109375, "completions/mean_terminated_length": 539.4774169921875, "completions/min_length": 81.0, "completions/min_terminated_length": 81.0, "epoch": 0.010359974100064749, "grad_norm": 0.1288343369960785, "learning_rate": 2e-07, "loss": 0.0237, "num_tokens": 70388272.0, "reward": 0.5546875, "reward_std": 0.21319982409477234, "rewards/simpleverify_reward/mean": 0.5546875, "rewards/simpleverify_reward/std": 0.4972778558731079, "step": 111 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 2696.0, "completions/mean_length": 611.3136596679688, "completions/mean_terminated_length": 571.9830932617188, "completions/min_length": 27.0, "completions/min_terminated_length": 27.0, "epoch": 0.010453307200065333, "grad_norm": 0.11703980714082718, "learning_rate": 2e-07, "loss": 0.029, "num_tokens": 71027473.0, "reward": 0.5245535969734192, "reward_std": 0.2191316783428192, "rewards/simpleverify_reward/mean": 0.5245535969734192, "rewards/simpleverify_reward/std": 0.4996756613254547, "step": 112 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2278.0, "completions/mean_length": 596.7277221679688, "completions/mean_terminated_length": 545.20947265625, "completions/min_length": 14.0, "completions/min_terminated_length": 14.0, "epoch": 0.010546640300065916, "grad_norm": 0.11673013120889664, "learning_rate": 2e-07, "loss": 0.0265, "num_tokens": 71658061.0, "reward": 0.5256696939468384, "reward_std": 0.19200216233730316, "rewards/simpleverify_reward/mean": 0.5256696343421936, "rewards/simpleverify_reward/std": 0.4996195137500763, "step": 113 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2235.0, "completions/mean_length": 546.3660888671875, "completions/mean_terminated_length": 510.3494567871094, "completions/min_length": 133.0, "completions/min_terminated_length": 133.0, "epoch": 0.0106399734000665, "grad_norm": 0.1305750012397766, "learning_rate": 2e-07, "loss": 0.0251, "num_tokens": 72238797.0, "reward": 0.5580357313156128, "reward_std": 0.24547138810157776, "rewards/simpleverify_reward/mean": 0.5580357313156128, "rewards/simpleverify_reward/std": 0.49689781665802, "step": 114 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.006696428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3870.0, "completions/mean_length": 605.1506958007812, "completions/mean_terminated_length": 581.6168823242188, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 0.010733306500067083, "grad_norm": 0.12955506145954132, "learning_rate": 2e-07, "loss": 0.0332, "num_tokens": 72875372.0, "reward": 0.5491071939468384, "reward_std": 0.23394006490707397, "rewards/simpleverify_reward/mean": 0.5491071343421936, "rewards/simpleverify_reward/std": 0.49786055088043213, "step": 115 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2313.0, "completions/mean_length": 514.6395263671875, "completions/mean_terminated_length": 482.375, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 0.010826639600067666, "grad_norm": 0.13708357512950897, "learning_rate": 2e-07, "loss": 0.0178, "num_tokens": 73421361.0, "reward": 0.6573660969734192, "reward_std": 0.18581029772758484, "rewards/simpleverify_reward/mean": 0.6573660969734192, "rewards/simpleverify_reward/std": 0.47485533356666565, "step": 116 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3617.0, "completions/mean_length": 579.3035888671875, "completions/mean_terminated_length": 539.6117553710938, "completions/min_length": 38.0, "completions/min_terminated_length": 38.0, "epoch": 0.01091997270006825, "grad_norm": 0.13148626685142517, "learning_rate": 2e-07, "loss": 0.0327, "num_tokens": 74032449.0, "reward": 0.5479910969734192, "reward_std": 0.20790556073188782, "rewards/simpleverify_reward/mean": 0.5479910969734192, "rewards/simpleverify_reward/std": 0.49796950817108154, "step": 117 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2512.0, "completions/mean_length": 597.2857666015625, "completions/mean_terminated_length": 541.7505493164062, "completions/min_length": 45.0, "completions/min_terminated_length": 45.0, "epoch": 0.011013305800068833, "grad_norm": 0.12842407822608948, "learning_rate": 2e-07, "loss": 0.0183, "num_tokens": 74649025.0, "reward": 0.5959821939468384, "reward_std": 0.22503580152988434, "rewards/simpleverify_reward/mean": 0.5959821343421936, "rewards/simpleverify_reward/std": 0.490975022315979, "step": 118 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2261.0, "completions/mean_length": 566.8392944335938, "completions/mean_terminated_length": 535.0450439453125, "completions/min_length": 64.0, "completions/min_terminated_length": 64.0, "epoch": 0.011106638900069417, "grad_norm": 0.12525783479213715, "learning_rate": 2e-07, "loss": 0.0205, "num_tokens": 75238705.0, "reward": 0.5345982313156128, "reward_std": 0.2131231427192688, "rewards/simpleverify_reward/mean": 0.5345982313156128, "rewards/simpleverify_reward/std": 0.4990801215171814, "step": 119 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 1967.0, "completions/mean_length": 603.7890625, "completions/mean_terminated_length": 560.383056640625, "completions/min_length": 112.0, "completions/min_terminated_length": 112.0, "epoch": 0.01119997200007, "grad_norm": 0.13363763689994812, "learning_rate": 2e-07, "loss": 0.0077, "num_tokens": 75865852.0, "reward": 0.5558035969734192, "reward_std": 0.2386358678340912, "rewards/simpleverify_reward/mean": 0.5558035969734192, "rewards/simpleverify_reward/std": 0.49715372920036316, "step": 120 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2479.0, "completions/mean_length": 577.8225708007812, "completions/mean_terminated_length": 534.0938110351562, "completions/min_length": 114.0, "completions/min_terminated_length": 114.0, "epoch": 0.011293305100070584, "grad_norm": 0.12736153602600098, "learning_rate": 2e-07, "loss": 0.0326, "num_tokens": 76471325.0, "reward": 0.5401785969734192, "reward_std": 0.20692914724349976, "rewards/simpleverify_reward/mean": 0.5401785969734192, "rewards/simpleverify_reward/std": 0.49866142868995667, "step": 121 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.024553571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3337.0, "completions/mean_length": 660.2332763671875, "completions/mean_terminated_length": 573.7493896484375, "completions/min_length": 97.0, "completions/min_terminated_length": 97.0, "epoch": 0.011386638200071167, "grad_norm": 0.11117804050445557, "learning_rate": 2e-07, "loss": 0.0396, "num_tokens": 77152366.0, "reward": 0.5022321939468384, "reward_std": 0.19873958826065063, "rewards/simpleverify_reward/mean": 0.5022321343421936, "rewards/simpleverify_reward/std": 0.5002743005752563, "step": 122 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3957.0, "completions/mean_length": 599.9085083007812, "completions/mean_terminated_length": 560.44921875, "completions/min_length": 97.0, "completions/min_terminated_length": 97.0, "epoch": 0.011479971300071749, "grad_norm": 0.11935939639806747, "learning_rate": 2e-07, "loss": 0.057, "num_tokens": 77777420.0, "reward": 0.5424107313156128, "reward_std": 0.22732238471508026, "rewards/simpleverify_reward/mean": 0.5424107313156128, "rewards/simpleverify_reward/std": 0.4984763264656067, "step": 123 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 4032.0, "completions/mean_length": 640.9241333007812, "completions/mean_terminated_length": 590.056640625, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 0.011573304400072332, "grad_norm": 0.1191592887043953, "learning_rate": 2e-07, "loss": 0.0272, "num_tokens": 78460576.0, "reward": 0.5022321939468384, "reward_std": 0.21132442355155945, "rewards/simpleverify_reward/mean": 0.5022321343421936, "rewards/simpleverify_reward/std": 0.5002742409706116, "step": 124 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3822.0, "completions/mean_length": 611.8114013671875, "completions/mean_terminated_length": 580.4223022460938, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 0.011666637500072916, "grad_norm": 0.10703187435865402, "learning_rate": 2e-07, "loss": 0.0158, "num_tokens": 79101823.0, "reward": 0.559151828289032, "reward_std": 0.20098592340946198, "rewards/simpleverify_reward/mean": 0.5591517686843872, "rewards/simpleverify_reward/std": 0.496766060590744, "step": 125 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3374.0, "completions/mean_length": 566.950927734375, "completions/mean_terminated_length": 514.9943237304688, "completions/min_length": 26.0, "completions/min_terminated_length": 26.0, "epoch": 0.0117599706000735, "grad_norm": 0.13137181103229523, "learning_rate": 2e-07, "loss": 0.0373, "num_tokens": 79685635.0, "reward": 0.6261160969734192, "reward_std": 0.22721292078495026, "rewards/simpleverify_reward/mean": 0.6261160969734192, "rewards/simpleverify_reward/std": 0.48410359025001526, "step": 126 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0267857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3653.0, "completions/mean_length": 651.5647583007812, "completions/mean_terminated_length": 556.7637329101562, "completions/min_length": 116.0, "completions/min_terminated_length": 116.0, "epoch": 0.011853303700074083, "grad_norm": 0.12269758433103561, "learning_rate": 2e-07, "loss": 0.0275, "num_tokens": 80348997.0, "reward": 0.535714328289032, "reward_std": 0.21658408641815186, "rewards/simpleverify_reward/mean": 0.5357142686843872, "rewards/simpleverify_reward/std": 0.4990014135837555, "step": 127 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3958.0, "completions/mean_length": 601.6886596679688, "completions/mean_terminated_length": 562.2494506835938, "completions/min_length": 156.0, "completions/min_terminated_length": 156.0, "epoch": 0.011946636800074666, "grad_norm": 0.12361564487218857, "learning_rate": 2e-07, "loss": -0.0029, "num_tokens": 80981518.0, "reward": 0.5345982313156128, "reward_std": 0.19569022953510284, "rewards/simpleverify_reward/mean": 0.5345982313156128, "rewards/simpleverify_reward/std": 0.4990801215171814, "step": 128 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3002.0, "completions/mean_length": 612.833740234375, "completions/mean_terminated_length": 537.3717041015625, "completions/min_length": 79.0, "completions/min_terminated_length": 79.0, "epoch": 0.01203996990007525, "grad_norm": 0.12123405188322067, "learning_rate": 2e-07, "loss": 0.0218, "num_tokens": 81619225.0, "reward": 0.5625, "reward_std": 0.19787125289440155, "rewards/simpleverify_reward/mean": 0.5625, "rewards/simpleverify_reward/std": 0.49635544419288635, "step": 129 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3345.0, "completions/mean_length": 662.7689819335938, "completions/mean_terminated_length": 592.3838500976562, "completions/min_length": 137.0, "completions/min_terminated_length": 137.0, "epoch": 0.012133303000075833, "grad_norm": 0.10080352425575256, "learning_rate": 2e-07, "loss": 0.0205, "num_tokens": 82309594.0, "reward": 0.5189732313156128, "reward_std": 0.1665322184562683, "rewards/simpleverify_reward/mean": 0.5189732313156128, "rewards/simpleverify_reward/std": 0.49991893768310547, "step": 130 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3718.0, "completions/mean_length": 586.6908569335938, "completions/mean_terminated_length": 535.02490234375, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 0.012226636100076417, "grad_norm": 0.1188618615269661, "learning_rate": 2e-07, "loss": 0.0456, "num_tokens": 82934749.0, "reward": 0.4977678656578064, "reward_std": 0.19035597145557404, "rewards/simpleverify_reward/mean": 0.4977678656578064, "rewards/simpleverify_reward/std": 0.5002742409706116, "step": 131 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.005580357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2257.0, "completions/mean_length": 540.6217041015625, "completions/mean_terminated_length": 520.6700439453125, "completions/min_length": 114.0, "completions/min_terminated_length": 114.0, "epoch": 0.012319969200077, "grad_norm": 0.12831594049930573, "learning_rate": 2e-07, "loss": 0.0185, "num_tokens": 83510426.0, "reward": 0.6004464626312256, "reward_std": 0.23244258761405945, "rewards/simpleverify_reward/mean": 0.6004464030265808, "rewards/simpleverify_reward/std": 0.49008017778396606, "step": 132 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2412.0, "completions/mean_length": 620.3326416015625, "completions/mean_terminated_length": 545.0330810546875, "completions/min_length": 7.0, "completions/min_terminated_length": 7.0, "epoch": 0.012413302300077584, "grad_norm": 0.12372425198554993, "learning_rate": 2e-07, "loss": 0.0344, "num_tokens": 84155868.0, "reward": 0.504464328289032, "reward_std": 0.22198855876922607, "rewards/simpleverify_reward/mean": 0.5044642686843872, "rewards/simpleverify_reward/std": 0.5002593398094177, "step": 133 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3894.0, "completions/mean_length": 634.9810791015625, "completions/mean_terminated_length": 559.9988403320312, "completions/min_length": 120.0, "completions/min_terminated_length": 120.0, "epoch": 0.012506635400078167, "grad_norm": 0.1294793337583542, "learning_rate": 2e-07, "loss": 0.0325, "num_tokens": 84818195.0, "reward": 0.5290178656578064, "reward_std": 0.24930886924266815, "rewards/simpleverify_reward/mean": 0.5290178656578064, "rewards/simpleverify_reward/std": 0.49943602085113525, "step": 134 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3851.0, "completions/mean_length": 662.208740234375, "completions/mean_terminated_length": 603.74462890625, "completions/min_length": 18.0, "completions/min_terminated_length": 18.0, "epoch": 0.01259996850007875, "grad_norm": 0.11795096099376678, "learning_rate": 2e-07, "loss": 0.0377, "num_tokens": 85502622.0, "reward": 0.5558035969734192, "reward_std": 0.2253798544406891, "rewards/simpleverify_reward/mean": 0.5558035969734192, "rewards/simpleverify_reward/std": 0.49715372920036316, "step": 135 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3227.0, "completions/mean_length": 592.5881958007812, "completions/mean_terminated_length": 549.04296875, "completions/min_length": 137.0, "completions/min_terminated_length": 137.0, "epoch": 0.012693301600079332, "grad_norm": 0.11853031069040298, "learning_rate": 2e-07, "loss": 0.0297, "num_tokens": 86118581.0, "reward": 0.5725446939468384, "reward_std": 0.19835911691188812, "rewards/simpleverify_reward/mean": 0.5725446343421936, "rewards/simpleverify_reward/std": 0.49498558044433594, "step": 136 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3090.0, "completions/mean_length": 617.989990234375, "completions/mean_terminated_length": 546.6868286132812, "completions/min_length": 123.0, "completions/min_terminated_length": 123.0, "epoch": 0.012786634700079916, "grad_norm": 0.1265747845172882, "learning_rate": 2e-07, "loss": 0.0206, "num_tokens": 86758996.0, "reward": 0.5390625, "reward_std": 0.20775583386421204, "rewards/simpleverify_reward/mean": 0.5390625, "rewards/simpleverify_reward/std": 0.4987502098083496, "step": 137 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 4094.0, "completions/mean_length": 652.5502319335938, "completions/mean_terminated_length": 605.8065795898438, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 0.0128799678000805, "grad_norm": 0.11794104427099228, "learning_rate": 2e-07, "loss": 0.0399, "num_tokens": 87432089.0, "reward": 0.5290178656578064, "reward_std": 0.21106228232383728, "rewards/simpleverify_reward/mean": 0.5290178656578064, "rewards/simpleverify_reward/std": 0.49943602085113525, "step": 138 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3650.0, "completions/mean_length": 585.997802734375, "completions/mean_terminated_length": 542.37060546875, "completions/min_length": 74.0, "completions/min_terminated_length": 74.0, "epoch": 0.012973300900081083, "grad_norm": 0.12881088256835938, "learning_rate": 2e-07, "loss": 0.0197, "num_tokens": 88041527.0, "reward": 0.5446428656578064, "reward_std": 0.22142964601516724, "rewards/simpleverify_reward/mean": 0.5446428656578064, "rewards/simpleverify_reward/std": 0.4982811510562897, "step": 139 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3107.0, "completions/mean_length": 644.7254638671875, "completions/mean_terminated_length": 573.9703979492188, "completions/min_length": 40.0, "completions/min_terminated_length": 40.0, "epoch": 0.013066634000081666, "grad_norm": 0.11491694301366806, "learning_rate": 2e-07, "loss": 0.0433, "num_tokens": 88709489.0, "reward": 0.5479910969734192, "reward_std": 0.21117176115512848, "rewards/simpleverify_reward/mean": 0.5479910969734192, "rewards/simpleverify_reward/std": 0.49796950817108154, "step": 140 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 2766.0, "completions/mean_length": 561.333740234375, "completions/mean_terminated_length": 533.501708984375, "completions/min_length": 115.0, "completions/min_terminated_length": 115.0, "epoch": 0.01315996710008225, "grad_norm": 0.12250196188688278, "learning_rate": 2e-07, "loss": 0.0226, "num_tokens": 89295932.0, "reward": 0.578125, "reward_std": 0.18878155946731567, "rewards/simpleverify_reward/mean": 0.578125, "rewards/simpleverify_reward/std": 0.4941346049308777, "step": 141 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3910.0, "completions/mean_length": 668.4129638671875, "completions/mean_terminated_length": 598.1435546875, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 0.013253300200082833, "grad_norm": 0.10875729471445084, "learning_rate": 2e-07, "loss": 0.004, "num_tokens": 89983518.0, "reward": 0.5647321939468384, "reward_std": 0.20917478203773499, "rewards/simpleverify_reward/mean": 0.5647321343421936, "rewards/simpleverify_reward/std": 0.49606895446777344, "step": 142 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3995.0, "completions/mean_length": 653.5402221679688, "completions/mean_terminated_length": 598.89794921875, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 0.013346633300083417, "grad_norm": 0.10802579671144485, "learning_rate": 2e-07, "loss": 0.0208, "num_tokens": 90658394.0, "reward": 0.527901828289032, "reward_std": 0.22331123054027557, "rewards/simpleverify_reward/mean": 0.5279017686843872, "rewards/simpleverify_reward/std": 0.49949970841407776, "step": 143 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3209.0, "completions/mean_length": 589.3717041015625, "completions/mean_terminated_length": 541.7703857421875, "completions/min_length": 113.0, "completions/min_terminated_length": 113.0, "epoch": 0.013439966400084, "grad_norm": 0.12542743980884552, "learning_rate": 2e-07, "loss": 0.029, "num_tokens": 91276727.0, "reward": 0.578125, "reward_std": 0.2150445580482483, "rewards/simpleverify_reward/mean": 0.578125, "rewards/simpleverify_reward/std": 0.4941346049308777, "step": 144 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3736.0, "completions/mean_length": 597.2835083007812, "completions/mean_terminated_length": 565.7635498046875, "completions/min_length": 18.0, "completions/min_terminated_length": 18.0, "epoch": 0.013533299500084583, "grad_norm": 0.11318818479776382, "learning_rate": 2e-07, "loss": 0.0244, "num_tokens": 91901221.0, "reward": 0.551339328289032, "reward_std": 0.18565621972084045, "rewards/simpleverify_reward/mean": 0.5513392686843872, "rewards/simpleverify_reward/std": 0.4976350665092468, "step": 145 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3460.0, "completions/mean_length": 630.546875, "completions/mean_terminated_length": 575.5396728515625, "completions/min_length": 118.0, "completions/min_terminated_length": 118.0, "epoch": 0.013626632600085167, "grad_norm": 0.12216174602508545, "learning_rate": 2e-07, "loss": 0.0384, "num_tokens": 92551751.0, "reward": 0.5859375, "reward_std": 0.22112837433815002, "rewards/simpleverify_reward/mean": 0.5859375, "rewards/simpleverify_reward/std": 0.4928344786167145, "step": 146 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3841.0, "completions/mean_length": 596.3761596679688, "completions/mean_terminated_length": 548.8699340820312, "completions/min_length": 85.0, "completions/min_terminated_length": 85.0, "epoch": 0.01371996570008575, "grad_norm": 0.13053810596466064, "learning_rate": 2e-07, "loss": 0.0259, "num_tokens": 93172448.0, "reward": 0.5736607313156128, "reward_std": 0.24359667301177979, "rewards/simpleverify_reward/mean": 0.5736607313156128, "rewards/simpleverify_reward/std": 0.4948205351829529, "step": 147 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3881.0, "completions/mean_length": 585.5011596679688, "completions/mean_terminated_length": 521.673828125, "completions/min_length": 33.0, "completions/min_terminated_length": 33.0, "epoch": 0.013813298800086334, "grad_norm": 0.12775394320487976, "learning_rate": 2e-07, "loss": 0.0361, "num_tokens": 93797209.0, "reward": 0.5167410969734192, "reward_std": 0.2116268128156662, "rewards/simpleverify_reward/mean": 0.5167410969734192, "rewards/simpleverify_reward/std": 0.4999987483024597, "step": 148 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2840.0, "completions/mean_length": 581.5067138671875, "completions/mean_terminated_length": 533.7986450195312, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 0.013906631900086916, "grad_norm": 0.13133786618709564, "learning_rate": 2e-07, "loss": 0.0264, "num_tokens": 94411143.0, "reward": 0.5647321939468384, "reward_std": 0.22890567779541016, "rewards/simpleverify_reward/mean": 0.5647321343421936, "rewards/simpleverify_reward/std": 0.49606895446777344, "step": 149 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2728.0, "completions/mean_length": 580.2935791015625, "completions/mean_terminated_length": 548.6204833984375, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 0.013999965000087499, "grad_norm": 0.11838987469673157, "learning_rate": 2e-07, "loss": 0.0332, "num_tokens": 95023510.0, "reward": 0.5625, "reward_std": 0.18047182261943817, "rewards/simpleverify_reward/mean": 0.5625, "rewards/simpleverify_reward/std": 0.49635544419288635, "step": 150 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.006696428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3092.0, "completions/mean_length": 590.1261596679688, "completions/mean_terminated_length": 566.4910278320312, "completions/min_length": 101.0, "completions/min_terminated_length": 101.0, "epoch": 0.014093298100088083, "grad_norm": 0.12253440171480179, "learning_rate": 2e-07, "loss": 0.0139, "num_tokens": 95646671.0, "reward": 0.559151828289032, "reward_std": 0.2188713699579239, "rewards/simpleverify_reward/mean": 0.5591517686843872, "rewards/simpleverify_reward/std": 0.496766060590744, "step": 151 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2614.0, "completions/mean_length": 610.0926513671875, "completions/mean_terminated_length": 574.72265625, "completions/min_length": 116.0, "completions/min_terminated_length": 116.0, "epoch": 0.014186631200088666, "grad_norm": 0.12554332613945007, "learning_rate": 2e-07, "loss": 0.0252, "num_tokens": 96278202.0, "reward": 0.5926339626312256, "reward_std": 0.22113613784313202, "rewards/simpleverify_reward/mean": 0.5926339030265808, "rewards/simpleverify_reward/std": 0.49161845445632935, "step": 152 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3900.0, "completions/mean_length": 573.0424194335938, "completions/mean_terminated_length": 525.219482421875, "completions/min_length": 69.0, "completions/min_terminated_length": 69.0, "epoch": 0.01427996430008925, "grad_norm": 0.1310548037290573, "learning_rate": 2e-07, "loss": 0.0131, "num_tokens": 96880488.0, "reward": 0.621651828289032, "reward_std": 0.24394112825393677, "rewards/simpleverify_reward/mean": 0.6216517686843872, "rewards/simpleverify_reward/std": 0.4852459728717804, "step": 153 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 2061.0, "completions/mean_length": 608.3795166015625, "completions/mean_terminated_length": 569.0158081054688, "completions/min_length": 4.0, "completions/min_terminated_length": 4.0, "epoch": 0.014373297400089833, "grad_norm": 0.13412144780158997, "learning_rate": 2e-07, "loss": 0.0208, "num_tokens": 97515708.0, "reward": 0.5345982313156128, "reward_std": 0.24202406406402588, "rewards/simpleverify_reward/mean": 0.5345982313156128, "rewards/simpleverify_reward/std": 0.4990801215171814, "step": 154 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3250.0, "completions/mean_length": 566.3717041015625, "completions/mean_terminated_length": 526.5338745117188, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 0.014466630500090416, "grad_norm": 0.12922994792461395, "learning_rate": 2e-07, "loss": 0.0149, "num_tokens": 98114905.0, "reward": 0.5502232313156128, "reward_std": 0.22675783932209015, "rewards/simpleverify_reward/mean": 0.5502232313156128, "rewards/simpleverify_reward/std": 0.49774909019470215, "step": 155 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2038.0, "completions/mean_length": 549.4342041015625, "completions/mean_terminated_length": 517.4830932617188, "completions/min_length": 93.0, "completions/min_terminated_length": 93.0, "epoch": 0.014559963600091, "grad_norm": 0.13988733291625977, "learning_rate": 2e-07, "loss": 0.0261, "num_tokens": 98705454.0, "reward": 0.5770089626312256, "reward_std": 0.2370203733444214, "rewards/simpleverify_reward/mean": 0.5770089030265808, "rewards/simpleverify_reward/std": 0.4943099617958069, "step": 156 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3766.0, "completions/mean_length": 641.8560791015625, "completions/mean_terminated_length": 587.0283813476562, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 0.014653296700091583, "grad_norm": 0.11424431949853897, "learning_rate": 2e-07, "loss": 0.0182, "num_tokens": 99376005.0, "reward": 0.515625, "reward_std": 0.23596841096878052, "rewards/simpleverify_reward/mean": 0.515625, "rewards/simpleverify_reward/std": 0.5000349283218384, "step": 157 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2532.0, "completions/mean_length": 680.5067138671875, "completions/mean_terminated_length": 614.4505004882812, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 0.014746629800092167, "grad_norm": 0.11115574836730957, "learning_rate": 2e-07, "loss": 0.0306, "num_tokens": 100067787.0, "reward": 0.5022321939468384, "reward_std": 0.22628279030323029, "rewards/simpleverify_reward/mean": 0.5022321343421936, "rewards/simpleverify_reward/std": 0.5002742409706116, "step": 158 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 2623.0, "completions/mean_length": 577.5826416015625, "completions/mean_terminated_length": 537.871337890625, "completions/min_length": 123.0, "completions/min_terminated_length": 123.0, "epoch": 0.01483996290009275, "grad_norm": 0.11850984394550323, "learning_rate": 2e-07, "loss": 0.0118, "num_tokens": 100676549.0, "reward": 0.5725446939468384, "reward_std": 0.1962568610906601, "rewards/simpleverify_reward/mean": 0.5725446343421936, "rewards/simpleverify_reward/std": 0.49498558044433594, "step": 159 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3814.0, "completions/mean_length": 673.3928833007812, "completions/mean_terminated_length": 562.9861450195312, "completions/min_length": 3.0, "completions/min_terminated_length": 3.0, "epoch": 0.014933296000093334, "grad_norm": 0.11845435202121735, "learning_rate": 2e-07, "loss": 0.0452, "num_tokens": 101373629.0, "reward": 0.455357164144516, "reward_std": 0.21643072366714478, "rewards/simpleverify_reward/mean": 0.4553571343421936, "rewards/simpleverify_reward/std": 0.49828118085861206, "step": 160 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2264.0, "completions/mean_length": 609.4129638671875, "completions/mean_terminated_length": 558.08154296875, "completions/min_length": 57.0, "completions/min_terminated_length": 57.0, "epoch": 0.015026629100093917, "grad_norm": 0.12154609709978104, "learning_rate": 2e-07, "loss": 0.0273, "num_tokens": 101999511.0, "reward": 0.5714285969734192, "reward_std": 0.203317791223526, "rewards/simpleverify_reward/mean": 0.5714285969734192, "rewards/simpleverify_reward/std": 0.49514803290367126, "step": 161 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2989.0, "completions/mean_length": 602.7332763671875, "completions/mean_terminated_length": 543.2565307617188, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 0.015119962200094499, "grad_norm": 0.1252088099718094, "learning_rate": 2e-07, "loss": 0.0151, "num_tokens": 102623344.0, "reward": 0.5491071939468384, "reward_std": 0.19745828211307526, "rewards/simpleverify_reward/mean": 0.5491071343421936, "rewards/simpleverify_reward/std": 0.49786055088043213, "step": 162 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 4065.0, "completions/mean_length": 614.1495971679688, "completions/mean_terminated_length": 582.7815551757812, "completions/min_length": 37.0, "completions/min_terminated_length": 37.0, "epoch": 0.015213295300095082, "grad_norm": 0.11403872072696686, "learning_rate": 2e-07, "loss": 0.0172, "num_tokens": 103261310.0, "reward": 0.5368303656578064, "reward_std": 0.19012635946273804, "rewards/simpleverify_reward/mean": 0.5368303656578064, "rewards/simpleverify_reward/std": 0.49892017245292664, "step": 163 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 2375.0, "completions/mean_length": 629.6674194335938, "completions/mean_terminated_length": 590.5440063476562, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 0.015306628400095666, "grad_norm": 0.11468911170959473, "learning_rate": 2e-07, "loss": 0.0176, "num_tokens": 103917892.0, "reward": 0.5022321939468384, "reward_std": 0.20977209508419037, "rewards/simpleverify_reward/mean": 0.5022321343421936, "rewards/simpleverify_reward/std": 0.5002742409706116, "step": 164 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.005580357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2499.0, "completions/mean_length": 553.421875, "completions/mean_terminated_length": 533.5421142578125, "completions/min_length": 106.0, "completions/min_terminated_length": 106.0, "epoch": 0.01539996150009625, "grad_norm": 0.12357461452484131, "learning_rate": 2e-07, "loss": 0.0202, "num_tokens": 104521398.0, "reward": 0.5881696939468384, "reward_std": 0.1855902224779129, "rewards/simpleverify_reward/mean": 0.5881696343421936, "rewards/simpleverify_reward/std": 0.4924395978450775, "step": 165 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3820.0, "completions/mean_length": 619.044677734375, "completions/mean_terminated_length": 567.8550415039062, "completions/min_length": 137.0, "completions/min_terminated_length": 137.0, "epoch": 0.015493294600096833, "grad_norm": 0.12371724843978882, "learning_rate": 2e-07, "loss": 0.0334, "num_tokens": 105161862.0, "reward": 0.578125, "reward_std": 0.2303275465965271, "rewards/simpleverify_reward/mean": 0.578125, "rewards/simpleverify_reward/std": 0.4941346049308777, "step": 166 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3179.0, "completions/mean_length": 660.341552734375, "completions/mean_terminated_length": 605.8072509765625, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 0.015586627700097416, "grad_norm": 0.10843689739704132, "learning_rate": 2e-07, "loss": 0.0135, "num_tokens": 105843040.0, "reward": 0.5546875, "reward_std": 0.1680731475353241, "rewards/simpleverify_reward/mean": 0.5546875, "rewards/simpleverify_reward/std": 0.4972778558731079, "step": 167 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3406.0, "completions/mean_length": 674.9163208007812, "completions/mean_terminated_length": 620.6134033203125, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 0.015679960800098, "grad_norm": 0.11655478179454803, "learning_rate": 2e-07, "loss": 0.0506, "num_tokens": 106534693.0, "reward": 0.535714328289032, "reward_std": 0.23052188754081726, "rewards/simpleverify_reward/mean": 0.5357142686843872, "rewards/simpleverify_reward/std": 0.4990014135837555, "step": 168 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 1918.0, "completions/mean_length": 608.6049194335938, "completions/mean_terminated_length": 557.2615966796875, "completions/min_length": 16.0, "completions/min_terminated_length": 16.0, "epoch": 0.015773293900098583, "grad_norm": 0.12603414058685303, "learning_rate": 2e-07, "loss": 0.0154, "num_tokens": 107162139.0, "reward": 0.5535714626312256, "reward_std": 0.2422497719526291, "rewards/simpleverify_reward/mean": 0.5535714030265808, "rewards/simpleverify_reward/std": 0.4973995089530945, "step": 169 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2405.0, "completions/mean_length": 631.2689819335938, "completions/mean_terminated_length": 596.1138305664062, "completions/min_length": 45.0, "completions/min_terminated_length": 45.0, "epoch": 0.015866627000099165, "grad_norm": 0.10595300793647766, "learning_rate": 2e-07, "loss": 0.0338, "num_tokens": 107835364.0, "reward": 0.4587053656578064, "reward_std": 0.1730746179819107, "rewards/simpleverify_reward/mean": 0.4587053656578064, "rewards/simpleverify_reward/std": 0.49857014417648315, "step": 170 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3792.0, "completions/mean_length": 665.1529541015625, "completions/mean_terminated_length": 618.580322265625, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 0.01595996010009975, "grad_norm": 0.1036030724644661, "learning_rate": 2e-07, "loss": 0.0249, "num_tokens": 108514933.0, "reward": 0.59375, "reward_std": 0.1658935248851776, "rewards/simpleverify_reward/mean": 0.59375, "rewards/simpleverify_reward/std": 0.4914066195487976, "step": 171 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3709.0, "completions/mean_length": 618.4342041015625, "completions/mean_terminated_length": 555.2056884765625, "completions/min_length": 116.0, "completions/min_terminated_length": 116.0, "epoch": 0.016053293200100332, "grad_norm": 0.11602609604597092, "learning_rate": 2e-07, "loss": 0.0123, "num_tokens": 109162754.0, "reward": 0.5424107313156128, "reward_std": 0.20286384224891663, "rewards/simpleverify_reward/mean": 0.5424107313156128, "rewards/simpleverify_reward/std": 0.4984763264656067, "step": 172 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3898.0, "completions/mean_length": 609.075927734375, "completions/mean_terminated_length": 549.7071533203125, "completions/min_length": 97.0, "completions/min_terminated_length": 97.0, "epoch": 0.016146626300100917, "grad_norm": 0.12419219315052032, "learning_rate": 2e-07, "loss": 0.0354, "num_tokens": 109790398.0, "reward": 0.5457589626312256, "reward_std": 0.20895794034004211, "rewards/simpleverify_reward/mean": 0.5457589030265808, "rewards/simpleverify_reward/std": 0.4981798231601715, "step": 173 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2802.0, "completions/mean_length": 574.5491333007812, "completions/mean_terminated_length": 514.592529296875, "completions/min_length": 115.0, "completions/min_terminated_length": 115.0, "epoch": 0.0162399594001015, "grad_norm": 0.12758852541446686, "learning_rate": 2e-07, "loss": 0.0187, "num_tokens": 110388258.0, "reward": 0.6171875, "reward_std": 0.2085471898317337, "rewards/simpleverify_reward/mean": 0.6171875, "rewards/simpleverify_reward/std": 0.4863446056842804, "step": 174 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0267857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4045.0, "completions/mean_length": 641.177490234375, "completions/mean_terminated_length": 546.090576171875, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 0.016333292500102084, "grad_norm": 0.13298849761486053, "learning_rate": 2e-07, "loss": 0.0272, "num_tokens": 111054305.0, "reward": 0.5390625, "reward_std": 0.2369908094406128, "rewards/simpleverify_reward/mean": 0.5390625, "rewards/simpleverify_reward/std": 0.4987502098083496, "step": 175 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3836.0, "completions/mean_length": 636.8850708007812, "completions/mean_terminated_length": 561.944091796875, "completions/min_length": 73.0, "completions/min_terminated_length": 73.0, "epoch": 0.016426625600102666, "grad_norm": 0.12455269694328308, "learning_rate": 2e-07, "loss": 0.0163, "num_tokens": 111717842.0, "reward": 0.5870535969734192, "reward_std": 0.22800664603710175, "rewards/simpleverify_reward/mean": 0.5870535969734192, "rewards/simpleverify_reward/std": 0.49263837933540344, "step": 176 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2543.0, "completions/mean_length": 589.7232666015625, "completions/mean_terminated_length": 546.1423950195312, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 0.01651995870010325, "grad_norm": 0.1278800368309021, "learning_rate": 2e-07, "loss": 0.0093, "num_tokens": 112336722.0, "reward": 0.5803571939468384, "reward_std": 0.2363061010837555, "rewards/simpleverify_reward/mean": 0.5803571343421936, "rewards/simpleverify_reward/std": 0.4937761127948761, "step": 177 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 4066.0, "completions/mean_length": 614.5569458007812, "completions/mean_terminated_length": 587.1439819335938, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 0.016613291800103833, "grad_norm": 0.1255771517753601, "learning_rate": 2e-07, "loss": 0.017, "num_tokens": 112976413.0, "reward": 0.5167410969734192, "reward_std": 0.24867017567157745, "rewards/simpleverify_reward/mean": 0.5167410969734192, "rewards/simpleverify_reward/std": 0.4999987483024597, "step": 178 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2625.0, "completions/mean_length": 647.2355346679688, "completions/mean_terminated_length": 584.5306396484375, "completions/min_length": 105.0, "completions/min_terminated_length": 105.0, "epoch": 0.016706624900104418, "grad_norm": 0.12528231739997864, "learning_rate": 2e-07, "loss": 0.0327, "num_tokens": 113658320.0, "reward": 0.5078125, "reward_std": 0.25227126479148865, "rewards/simpleverify_reward/mean": 0.5078125, "rewards/simpleverify_reward/std": 0.5002182126045227, "step": 179 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 1805.0, "completions/mean_length": 630.2611694335938, "completions/mean_terminated_length": 575.2494506835938, "completions/min_length": 166.0, "completions/min_terminated_length": 166.0, "epoch": 0.016799958000105, "grad_norm": 0.11826111376285553, "learning_rate": 2e-07, "loss": 0.0318, "num_tokens": 114330690.0, "reward": 0.5212053656578064, "reward_std": 0.21086867153644562, "rewards/simpleverify_reward/mean": 0.5212053656578064, "rewards/simpleverify_reward/std": 0.49982914328575134, "step": 180 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 2431.0, "completions/mean_length": 625.3248291015625, "completions/mean_terminated_length": 586.1524047851562, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 0.016893291100105585, "grad_norm": 0.11020992696285248, "learning_rate": 2e-07, "loss": 0.0145, "num_tokens": 114971661.0, "reward": 0.5736607313156128, "reward_std": 0.19166657328605652, "rewards/simpleverify_reward/mean": 0.5736607313156128, "rewards/simpleverify_reward/std": 0.4948205351829529, "step": 181 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3687.0, "completions/mean_length": 574.935302734375, "completions/mean_terminated_length": 543.2139892578125, "completions/min_length": 120.0, "completions/min_terminated_length": 120.0, "epoch": 0.016986624200106167, "grad_norm": 0.1311573088169098, "learning_rate": 2e-07, "loss": 0.0202, "num_tokens": 115572723.0, "reward": 0.5970982313156128, "reward_std": 0.24543607234954834, "rewards/simpleverify_reward/mean": 0.5970982313156128, "rewards/simpleverify_reward/std": 0.49075525999069214, "step": 182 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2174.0, "completions/mean_length": 643.6495971679688, "completions/mean_terminated_length": 568.8551635742188, "completions/min_length": 79.0, "completions/min_terminated_length": 79.0, "epoch": 0.017079957300106748, "grad_norm": 0.12451576441526413, "learning_rate": 2e-07, "loss": 0.021, "num_tokens": 116234809.0, "reward": 0.5111607313156128, "reward_std": 0.21132300794124603, "rewards/simpleverify_reward/mean": 0.5111607313156128, "rewards/simpleverify_reward/std": 0.5001546144485474, "step": 183 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 4072.0, "completions/mean_length": 639.8404541015625, "completions/mean_terminated_length": 592.9242553710938, "completions/min_length": 16.0, "completions/min_terminated_length": 16.0, "epoch": 0.017173290400107333, "grad_norm": 0.1206892803311348, "learning_rate": 2e-07, "loss": 0.0289, "num_tokens": 116897026.0, "reward": 0.5055803656578064, "reward_std": 0.21797235310077667, "rewards/simpleverify_reward/mean": 0.5055803656578064, "rewards/simpleverify_reward/std": 0.5002480745315552, "step": 184 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2315.0, "completions/mean_length": 567.3147583007812, "completions/mean_terminated_length": 531.5106811523438, "completions/min_length": 107.0, "completions/min_terminated_length": 107.0, "epoch": 0.017266623500107915, "grad_norm": 0.13165459036827087, "learning_rate": 2e-07, "loss": 0.0006, "num_tokens": 117501060.0, "reward": 0.559151828289032, "reward_std": 0.19768761098384857, "rewards/simpleverify_reward/mean": 0.5591517686843872, "rewards/simpleverify_reward/std": 0.496766060590744, "step": 185 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3277.0, "completions/mean_length": 625.7824096679688, "completions/mean_terminated_length": 566.6981201171875, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 0.0173599566001085, "grad_norm": 0.12957888841629028, "learning_rate": 2e-07, "loss": 0.0237, "num_tokens": 118141281.0, "reward": 0.5535714626312256, "reward_std": 0.23086068034172058, "rewards/simpleverify_reward/mean": 0.5535714030265808, "rewards/simpleverify_reward/std": 0.4973994493484497, "step": 186 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3766.0, "completions/mean_length": 617.9721069335938, "completions/mean_terminated_length": 558.7548217773438, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 0.017453289700109082, "grad_norm": 0.11669562757015228, "learning_rate": 2e-07, "loss": 0.0402, "num_tokens": 118786472.0, "reward": 0.53125, "reward_std": 0.2019302248954773, "rewards/simpleverify_reward/mean": 0.53125, "rewards/simpleverify_reward/std": 0.4993011951446533, "step": 187 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2885.0, "completions/mean_length": 617.677490234375, "completions/mean_terminated_length": 558.4552001953125, "completions/min_length": 101.0, "completions/min_terminated_length": 101.0, "epoch": 0.017546622800109667, "grad_norm": 0.12096396833658218, "learning_rate": 2e-07, "loss": 0.0171, "num_tokens": 119423119.0, "reward": 0.5915178656578064, "reward_std": 0.19448880851268768, "rewards/simpleverify_reward/mean": 0.5915178656578064, "rewards/simpleverify_reward/std": 0.49182769656181335, "step": 188 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 4050.0, "completions/mean_length": 625.7366333007812, "completions/mean_terminated_length": 562.640869140625, "completions/min_length": 114.0, "completions/min_terminated_length": 114.0, "epoch": 0.01763995590011025, "grad_norm": 0.11733868718147278, "learning_rate": 2e-07, "loss": 0.02, "num_tokens": 120075419.0, "reward": 0.4966517984867096, "reward_std": 0.20692986249923706, "rewards/simpleverify_reward/mean": 0.4966517984867096, "rewards/simpleverify_reward/std": 0.5002680420875549, "step": 189 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2298.0, "completions/mean_length": 623.8392944335938, "completions/mean_terminated_length": 572.7202758789062, "completions/min_length": 3.0, "completions/min_terminated_length": 3.0, "epoch": 0.017733289000110834, "grad_norm": 0.1129811480641365, "learning_rate": 2e-07, "loss": 0.0289, "num_tokens": 120733651.0, "reward": 0.5178571939468384, "reward_std": 0.21643072366714478, "rewards/simpleverify_reward/mean": 0.5178571343421936, "rewards/simpleverify_reward/std": 0.4999600946903229, "step": 190 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3772.0, "completions/mean_length": 603.5033569335938, "completions/mean_terminated_length": 556.0939331054688, "completions/min_length": 87.0, "completions/min_terminated_length": 87.0, "epoch": 0.017826622100111416, "grad_norm": 0.13240930438041687, "learning_rate": 2e-07, "loss": 0.0307, "num_tokens": 121376846.0, "reward": 0.5412946939468384, "reward_std": 0.22837798297405243, "rewards/simpleverify_reward/mean": 0.5412946343421936, "rewards/simpleverify_reward/std": 0.49857014417648315, "step": 191 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2700.0, "completions/mean_length": 579.3092041015625, "completions/mean_terminated_length": 543.6268310546875, "completions/min_length": 3.0, "completions/min_terminated_length": 3.0, "epoch": 0.017919955200112, "grad_norm": 0.12217244505882263, "learning_rate": 2e-07, "loss": 0.0113, "num_tokens": 121985563.0, "reward": 0.5803571939468384, "reward_std": 0.20828351378440857, "rewards/simpleverify_reward/mean": 0.5803571343421936, "rewards/simpleverify_reward/std": 0.4937761425971985, "step": 192 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2442.0, "completions/mean_length": 618.7265625, "completions/mean_terminated_length": 563.53173828125, "completions/min_length": 5.0, "completions/min_terminated_length": 5.0, "epoch": 0.018013288300112583, "grad_norm": 0.12152739614248276, "learning_rate": 2e-07, "loss": 0.0164, "num_tokens": 122624958.0, "reward": 0.53125, "reward_std": 0.21199730038642883, "rewards/simpleverify_reward/mean": 0.53125, "rewards/simpleverify_reward/std": 0.4993011951446533, "step": 193 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4025.0, "completions/mean_length": 648.193115234375, "completions/mean_terminated_length": 577.5091552734375, "completions/min_length": 95.0, "completions/min_terminated_length": 95.0, "epoch": 0.018106621400113165, "grad_norm": 0.11923814564943314, "learning_rate": 2e-07, "loss": 0.0364, "num_tokens": 123299579.0, "reward": 0.5602678656578064, "reward_std": 0.20339195430278778, "rewards/simpleverify_reward/mean": 0.5602678656578064, "rewards/simpleverify_reward/std": 0.4966317117214203, "step": 194 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.005580357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3991.0, "completions/mean_length": 625.1317138671875, "completions/mean_terminated_length": 605.6543579101562, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 0.01819995450011375, "grad_norm": 0.1181858628988266, "learning_rate": 2e-07, "loss": 0.0138, "num_tokens": 123956497.0, "reward": 0.5334821939468384, "reward_std": 0.21507525444030762, "rewards/simpleverify_reward/mean": 0.5334821343421936, "rewards/simpleverify_reward/std": 0.49915632605552673, "step": 195 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3463.0, "completions/mean_length": 622.4754638671875, "completions/mean_terminated_length": 559.3204345703125, "completions/min_length": 112.0, "completions/min_terminated_length": 112.0, "epoch": 0.01829328760011433, "grad_norm": 0.12871982157230377, "learning_rate": 2e-07, "loss": 0.0404, "num_tokens": 124601795.0, "reward": 0.5524553656578064, "reward_std": 0.24705366790294647, "rewards/simpleverify_reward/mean": 0.5524553656578064, "rewards/simpleverify_reward/std": 0.49751853942871094, "step": 196 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2109.0, "completions/mean_length": 571.03125, "completions/mean_terminated_length": 523.1810302734375, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 0.018386620700114917, "grad_norm": 0.128322035074234, "learning_rate": 2e-07, "loss": 0.0255, "num_tokens": 125213103.0, "reward": 0.6026785969734192, "reward_std": 0.21147273480892181, "rewards/simpleverify_reward/mean": 0.6026785969734192, "rewards/simpleverify_reward/std": 0.48961687088012695, "step": 197 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3239.0, "completions/mean_length": 665.6819458007812, "completions/mean_terminated_length": 591.3648681640625, "completions/min_length": 133.0, "completions/min_terminated_length": 133.0, "epoch": 0.0184799538001155, "grad_norm": 0.10308924317359924, "learning_rate": 2e-07, "loss": 0.0308, "num_tokens": 125902738.0, "reward": 0.5736607313156128, "reward_std": 0.1840757429599762, "rewards/simpleverify_reward/mean": 0.5736607313156128, "rewards/simpleverify_reward/std": 0.4948205351829529, "step": 198 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3071.0, "completions/mean_length": 647.8326416015625, "completions/mean_terminated_length": 585.1386108398438, "completions/min_length": 94.0, "completions/min_terminated_length": 94.0, "epoch": 0.018573286900116084, "grad_norm": 0.11584942787885666, "learning_rate": 2e-07, "loss": 0.0167, "num_tokens": 126574900.0, "reward": 0.504464328289032, "reward_std": 0.19310013949871063, "rewards/simpleverify_reward/mean": 0.5044642686843872, "rewards/simpleverify_reward/std": 0.5002593398094177, "step": 199 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4075.0, "completions/mean_length": 623.9486694335938, "completions/mean_terminated_length": 556.7986450195312, "completions/min_length": 162.0, "completions/min_terminated_length": 162.0, "epoch": 0.018666620000116665, "grad_norm": 0.11971444636583328, "learning_rate": 2e-07, "loss": 0.025, "num_tokens": 127225686.0, "reward": 0.5915178656578064, "reward_std": 0.21790635585784912, "rewards/simpleverify_reward/mean": 0.5915178656578064, "rewards/simpleverify_reward/std": 0.49182769656181335, "step": 200 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2799.0, "completions/mean_length": 573.1674194335938, "completions/mean_terminated_length": 529.3807983398438, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 0.01875995310011725, "grad_norm": 0.11861925572156906, "learning_rate": 2e-07, "loss": 0.0183, "num_tokens": 127821684.0, "reward": 0.5714285969734192, "reward_std": 0.18829552829265594, "rewards/simpleverify_reward/mean": 0.5714285969734192, "rewards/simpleverify_reward/std": 0.49514803290367126, "step": 201 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3630.0, "completions/mean_length": 598.904052734375, "completions/mean_terminated_length": 567.398681640625, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 0.018853286200117832, "grad_norm": 0.14078229665756226, "learning_rate": 2e-07, "loss": 0.0103, "num_tokens": 128445918.0, "reward": 0.5569196939468384, "reward_std": 0.2588878273963928, "rewards/simpleverify_reward/mean": 0.5569196343421936, "rewards/simpleverify_reward/std": 0.49702703952789307, "step": 202 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3398.0, "completions/mean_length": 618.2879638671875, "completions/mean_terminated_length": 571.0792236328125, "completions/min_length": 4.0, "completions/min_terminated_length": 4.0, "epoch": 0.018946619300118418, "grad_norm": 0.12417794018983841, "learning_rate": 2e-07, "loss": 0.0445, "num_tokens": 129092504.0, "reward": 0.4810267984867096, "reward_std": 0.24228255450725555, "rewards/simpleverify_reward/mean": 0.4810267984867096, "rewards/simpleverify_reward/std": 0.49991899728775024, "step": 203 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3109.0, "completions/mean_length": 574.7154541015625, "completions/mean_terminated_length": 538.9864501953125, "completions/min_length": 110.0, "completions/min_terminated_length": 110.0, "epoch": 0.019039952400119, "grad_norm": 0.12666264176368713, "learning_rate": 2e-07, "loss": 0.0216, "num_tokens": 129690337.0, "reward": 0.515625, "reward_std": 0.22755269706249237, "rewards/simpleverify_reward/mean": 0.515625, "rewards/simpleverify_reward/std": 0.5000349283218384, "step": 204 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3946.0, "completions/mean_length": 586.466552734375, "completions/mean_terminated_length": 538.8258056640625, "completions/min_length": 76.0, "completions/min_terminated_length": 76.0, "epoch": 0.019133285500119585, "grad_norm": 0.12717123329639435, "learning_rate": 2e-07, "loss": 0.036, "num_tokens": 130307635.0, "reward": 0.5457589626312256, "reward_std": 0.2220298945903778, "rewards/simpleverify_reward/mean": 0.5457589030265808, "rewards/simpleverify_reward/std": 0.4981798231601715, "step": 205 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3377.0, "completions/mean_length": 604.4408569335938, "completions/mean_terminated_length": 569.0134887695312, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 0.019226618600120166, "grad_norm": 0.12991714477539062, "learning_rate": 2e-07, "loss": 0.031, "num_tokens": 130936590.0, "reward": 0.6082589626312256, "reward_std": 0.23330657184123993, "rewards/simpleverify_reward/mean": 0.6082589030265808, "rewards/simpleverify_reward/std": 0.48841196298599243, "step": 206 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3632.0, "completions/mean_length": 634.7109375, "completions/mean_terminated_length": 587.7251586914062, "completions/min_length": 119.0, "completions/min_terminated_length": 119.0, "epoch": 0.019319951700120748, "grad_norm": 0.1104765236377716, "learning_rate": 2e-07, "loss": 0.0154, "num_tokens": 131590131.0, "reward": 0.5078125, "reward_std": 0.1749083548784256, "rewards/simpleverify_reward/mean": 0.5078125, "rewards/simpleverify_reward/std": 0.5002182126045227, "step": 207 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3383.0, "completions/mean_length": 642.0592041015625, "completions/mean_terminated_length": 571.2494506835938, "completions/min_length": 53.0, "completions/min_terminated_length": 53.0, "epoch": 0.019413284800121333, "grad_norm": 0.11308775842189789, "learning_rate": 2e-07, "loss": 0.0262, "num_tokens": 132261896.0, "reward": 0.5245535969734192, "reward_std": 0.18645039200782776, "rewards/simpleverify_reward/mean": 0.5245535969734192, "rewards/simpleverify_reward/std": 0.4996756613254547, "step": 208 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3668.0, "completions/mean_length": 622.4810791015625, "completions/mean_terminated_length": 567.3458251953125, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 0.019506617900121915, "grad_norm": 0.11230313777923584, "learning_rate": 2e-07, "loss": 0.0043, "num_tokens": 132903639.0, "reward": 0.5725446939468384, "reward_std": 0.17292124032974243, "rewards/simpleverify_reward/mean": 0.5725446343421936, "rewards/simpleverify_reward/std": 0.49498558044433594, "step": 209 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2657.0, "completions/mean_length": 595.3783569335938, "completions/mean_terminated_length": 551.8677978515625, "completions/min_length": 117.0, "completions/min_terminated_length": 117.0, "epoch": 0.0195999510001225, "grad_norm": 0.12127574533224106, "learning_rate": 2e-07, "loss": 0.0157, "num_tokens": 133518434.0, "reward": 0.5078125, "reward_std": 0.20354601740837097, "rewards/simpleverify_reward/mean": 0.5078125, "rewards/simpleverify_reward/std": 0.5002182126045227, "step": 210 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 2177.0, "completions/mean_length": 586.8928833007812, "completions/mean_terminated_length": 547.2866821289062, "completions/min_length": 94.0, "completions/min_terminated_length": 94.0, "epoch": 0.019693284100123082, "grad_norm": 0.11536861956119537, "learning_rate": 2e-07, "loss": 0.0164, "num_tokens": 134130034.0, "reward": 0.5457589626312256, "reward_std": 0.195388525724411, "rewards/simpleverify_reward/mean": 0.5457589030265808, "rewards/simpleverify_reward/std": 0.4981797933578491, "step": 211 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3172.0, "completions/mean_length": 596.5223388671875, "completions/mean_terminated_length": 561.0146484375, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 0.019786617200123667, "grad_norm": 0.12042361497879028, "learning_rate": 2e-07, "loss": 0.0256, "num_tokens": 134748774.0, "reward": 0.5915178656578064, "reward_std": 0.20580121874809265, "rewards/simpleverify_reward/mean": 0.5915178656578064, "rewards/simpleverify_reward/std": 0.49182769656181335, "step": 212 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3485.0, "completions/mean_length": 620.4888916015625, "completions/mean_terminated_length": 573.3099975585938, "completions/min_length": 87.0, "completions/min_terminated_length": 87.0, "epoch": 0.01987995030012425, "grad_norm": 0.11829041689634323, "learning_rate": 2e-07, "loss": 0.0363, "num_tokens": 135400052.0, "reward": 0.5412946939468384, "reward_std": 0.20043204724788666, "rewards/simpleverify_reward/mean": 0.5412946343421936, "rewards/simpleverify_reward/std": 0.49857014417648315, "step": 213 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2914.0, "completions/mean_length": 620.2645263671875, "completions/mean_terminated_length": 573.0825805664062, "completions/min_length": 4.0, "completions/min_terminated_length": 4.0, "epoch": 0.019973283400124834, "grad_norm": 0.13445612788200378, "learning_rate": 2e-07, "loss": 0.0141, "num_tokens": 136044873.0, "reward": 0.4877232313156128, "reward_std": 0.23732459545135498, "rewards/simpleverify_reward/mean": 0.4877232015132904, "rewards/simpleverify_reward/std": 0.500128448009491, "step": 214 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0033482142857143016, "completions/max_length": 4096.0, "completions/max_terminated_length": 2578.0, "completions/mean_length": 576.2645263671875, "completions/mean_terminated_length": 564.4401245117188, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 0.020066616500125416, "grad_norm": 0.11851615458726883, "learning_rate": 2e-07, "loss": 0.025, "num_tokens": 136639214.0, "reward": 0.5859375, "reward_std": 0.20929311215877533, "rewards/simpleverify_reward/mean": 0.5859375, "rewards/simpleverify_reward/std": 0.4928344786167145, "step": 215 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3803.0, "completions/mean_length": 635.2053833007812, "completions/mean_terminated_length": 592.1898193359375, "completions/min_length": 97.0, "completions/min_terminated_length": 97.0, "epoch": 0.020159949600126, "grad_norm": 0.1224694475531578, "learning_rate": 2e-07, "loss": 0.0217, "num_tokens": 137296950.0, "reward": 0.520089328289032, "reward_std": 0.19415251910686493, "rewards/simpleverify_reward/mean": 0.5200892686843872, "rewards/simpleverify_reward/std": 0.4998753070831299, "step": 216 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3838.0, "completions/mean_length": 639.443115234375, "completions/mean_terminated_length": 600.4300537109375, "completions/min_length": 133.0, "completions/min_terminated_length": 133.0, "epoch": 0.020253282700126583, "grad_norm": 0.11229932308197021, "learning_rate": 2e-07, "loss": 0.001, "num_tokens": 137964243.0, "reward": 0.5189732313156128, "reward_std": 0.19005078077316284, "rewards/simpleverify_reward/mean": 0.5189732313156128, "rewards/simpleverify_reward/std": 0.49991893768310547, "step": 217 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2492.0, "completions/mean_length": 567.2254638671875, "completions/mean_terminated_length": 515.27294921875, "completions/min_length": 91.0, "completions/min_terminated_length": 91.0, "epoch": 0.020346615800127168, "grad_norm": 0.11987549066543579, "learning_rate": 2e-07, "loss": 0.0181, "num_tokens": 138556349.0, "reward": 0.6171875, "reward_std": 0.2000511735677719, "rewards/simpleverify_reward/mean": 0.6171875, "rewards/simpleverify_reward/std": 0.4863446056842804, "step": 218 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3968.0, "completions/mean_length": 614.2857666015625, "completions/mean_terminated_length": 555.0056762695312, "completions/min_length": 3.0, "completions/min_terminated_length": 3.0, "epoch": 0.02043994890012775, "grad_norm": 0.11533152312040329, "learning_rate": 2e-07, "loss": 0.0309, "num_tokens": 139194957.0, "reward": 0.5647321939468384, "reward_std": 0.2083933800458908, "rewards/simpleverify_reward/mean": 0.5647321343421936, "rewards/simpleverify_reward/std": 0.49606895446777344, "step": 219 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.006696428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3223.0, "completions/mean_length": 552.521240234375, "completions/mean_terminated_length": 528.632568359375, "completions/min_length": 3.0, "completions/min_terminated_length": 3.0, "epoch": 0.02053328200012833, "grad_norm": 0.1412806659936905, "learning_rate": 2e-07, "loss": 0.0315, "num_tokens": 139789008.0, "reward": 0.5926339626312256, "reward_std": 0.25423547625541687, "rewards/simpleverify_reward/mean": 0.5926339030265808, "rewards/simpleverify_reward/std": 0.49161845445632935, "step": 220 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3793.0, "completions/mean_length": 633.4967041015625, "completions/mean_terminated_length": 562.5114135742188, "completions/min_length": 122.0, "completions/min_terminated_length": 122.0, "epoch": 0.020626615100128916, "grad_norm": 0.11193571984767914, "learning_rate": 2e-07, "loss": 0.0346, "num_tokens": 140449453.0, "reward": 0.598214328289032, "reward_std": 0.20632296800613403, "rewards/simpleverify_reward/mean": 0.5982142686843872, "rewards/simpleverify_reward/std": 0.49053287506103516, "step": 221 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2832.0, "completions/mean_length": 621.0022583007812, "completions/mean_terminated_length": 569.8414306640625, "completions/min_length": 106.0, "completions/min_terminated_length": 106.0, "epoch": 0.020719948200129498, "grad_norm": 0.11859031021595001, "learning_rate": 2e-07, "loss": 0.0346, "num_tokens": 141095007.0, "reward": 0.543526828289032, "reward_std": 0.19738341867923737, "rewards/simpleverify_reward/mean": 0.5435267686843872, "rewards/simpleverify_reward/std": 0.49838000535964966, "step": 222 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 2309.0, "completions/mean_length": 551.7277221679688, "completions/mean_terminated_length": 511.724609375, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 0.020813281300130083, "grad_norm": 0.13303828239440918, "learning_rate": 2e-07, "loss": 0.0345, "num_tokens": 141673499.0, "reward": 0.621651828289032, "reward_std": 0.24630440771579742, "rewards/simpleverify_reward/mean": 0.6216517686843872, "rewards/simpleverify_reward/std": 0.4852459728717804, "step": 223 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.006696428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2291.0, "completions/mean_length": 576.8995971679688, "completions/mean_terminated_length": 553.17529296875, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 0.020906614400130665, "grad_norm": 0.13367369771003723, "learning_rate": 2e-07, "loss": 0.011, "num_tokens": 142278465.0, "reward": 0.5502232313156128, "reward_std": 0.22913390398025513, "rewards/simpleverify_reward/mean": 0.5502232313156128, "rewards/simpleverify_reward/std": 0.49774909019470215, "step": 224 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3825.0, "completions/mean_length": 639.7120971679688, "completions/mean_terminated_length": 572.8668823242188, "completions/min_length": 91.0, "completions/min_terminated_length": 91.0, "epoch": 0.02099994750013125, "grad_norm": 0.11145082116127014, "learning_rate": 2e-07, "loss": 0.0237, "num_tokens": 142932047.0, "reward": 0.582589328289032, "reward_std": 0.19825245440006256, "rewards/simpleverify_reward/mean": 0.5825892686843872, "rewards/simpleverify_reward/std": 0.4934072494506836, "step": 225 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4067.0, "completions/mean_length": 662.8638916015625, "completions/mean_terminated_length": 592.4806518554688, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 0.021093280600131832, "grad_norm": 0.11113393306732178, "learning_rate": 2e-07, "loss": 0.0355, "num_tokens": 143621229.0, "reward": 0.5178571939468384, "reward_std": 0.20703653991222382, "rewards/simpleverify_reward/mean": 0.5178571343421936, "rewards/simpleverify_reward/std": 0.4999600946903229, "step": 226 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3820.0, "completions/mean_length": 574.5045166015625, "completions/mean_terminated_length": 546.7761840820312, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 0.021186613700132417, "grad_norm": 0.12723323702812195, "learning_rate": 2e-07, "loss": -0.0004, "num_tokens": 144232545.0, "reward": 0.5334821939468384, "reward_std": 0.19982405006885529, "rewards/simpleverify_reward/mean": 0.5334821343421936, "rewards/simpleverify_reward/std": 0.49915632605552673, "step": 227 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3005.0, "completions/mean_length": 661.1629638671875, "completions/mean_terminated_length": 614.5362548828125, "completions/min_length": 114.0, "completions/min_terminated_length": 114.0, "epoch": 0.021279946800133, "grad_norm": 0.12399915605783463, "learning_rate": 2e-07, "loss": 0.0129, "num_tokens": 144918235.0, "reward": 0.5368303656578064, "reward_std": 0.25273409485816956, "rewards/simpleverify_reward/mean": 0.5368303656578064, "rewards/simpleverify_reward/std": 0.49892017245292664, "step": 228 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3618.0, "completions/mean_length": 626.7879638671875, "completions/mean_terminated_length": 595.5338134765625, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 0.021373279900133584, "grad_norm": 0.10704998672008514, "learning_rate": 2e-07, "loss": 0.0284, "num_tokens": 145562861.0, "reward": 0.5401785969734192, "reward_std": 0.20430122315883636, "rewards/simpleverify_reward/mean": 0.5401785969734192, "rewards/simpleverify_reward/std": 0.49866142868995667, "step": 229 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3376.0, "completions/mean_length": 581.029052734375, "completions/mean_terminated_length": 553.3521118164062, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 0.021466613000134166, "grad_norm": 0.13537251949310303, "learning_rate": 2e-07, "loss": 0.023, "num_tokens": 146166151.0, "reward": 0.6127232313156128, "reward_std": 0.22345955669879913, "rewards/simpleverify_reward/mean": 0.6127232313156128, "rewards/simpleverify_reward/std": 0.4873998463153839, "step": 230 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3784.0, "completions/mean_length": 623.8627319335938, "completions/mean_terminated_length": 560.73291015625, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 0.02155994610013475, "grad_norm": 0.12304084002971649, "learning_rate": 2e-07, "loss": 0.0606, "num_tokens": 146815692.0, "reward": 0.543526828289032, "reward_std": 0.20534543693065643, "rewards/simpleverify_reward/mean": 0.5435267686843872, "rewards/simpleverify_reward/std": 0.49838000535964966, "step": 231 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2488.0, "completions/mean_length": 623.671875, "completions/mean_terminated_length": 572.5503540039062, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 0.021653279200135333, "grad_norm": 0.13233868777751923, "learning_rate": 2e-07, "loss": 0.044, "num_tokens": 147462262.0, "reward": 0.5301339626312256, "reward_std": 0.26378050446510315, "rewards/simpleverify_reward/mean": 0.5301339030265808, "rewards/simpleverify_reward/std": 0.49936985969543457, "step": 232 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3491.0, "completions/mean_length": 589.4364013671875, "completions/mean_terminated_length": 553.8568115234375, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 0.021746612300135915, "grad_norm": 0.11948335915803909, "learning_rate": 2e-07, "loss": 0.0221, "num_tokens": 148084213.0, "reward": 0.5736607313156128, "reward_std": 0.18979185819625854, "rewards/simpleverify_reward/mean": 0.5736607313156128, "rewards/simpleverify_reward/std": 0.4948205351829529, "step": 233 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3672.0, "completions/mean_length": 614.3783569335938, "completions/mean_terminated_length": 559.114501953125, "completions/min_length": 101.0, "completions/min_terminated_length": 101.0, "epoch": 0.0218399454001365, "grad_norm": 0.11163806915283203, "learning_rate": 2e-07, "loss": 0.0317, "num_tokens": 148715840.0, "reward": 0.5412946939468384, "reward_std": 0.18220779299736023, "rewards/simpleverify_reward/mean": 0.5412946343421936, "rewards/simpleverify_reward/std": 0.49857014417648315, "step": 234 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2704.0, "completions/mean_length": 580.5636596679688, "completions/mean_terminated_length": 528.8074340820312, "completions/min_length": 158.0, "completions/min_terminated_length": 158.0, "epoch": 0.02193327850013708, "grad_norm": 0.11213609576225281, "learning_rate": 2e-07, "loss": 0.0093, "num_tokens": 149328697.0, "reward": 0.5691964626312256, "reward_std": 0.19738270342350006, "rewards/simpleverify_reward/mean": 0.5691964030265808, "rewards/simpleverify_reward/std": 0.4954652488231659, "step": 235 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2446.0, "completions/mean_length": 638.4185791015625, "completions/mean_terminated_length": 575.5534057617188, "completions/min_length": 120.0, "completions/min_terminated_length": 120.0, "epoch": 0.022026611600137667, "grad_norm": 0.11981825530529022, "learning_rate": 2e-07, "loss": 0.0342, "num_tokens": 149985744.0, "reward": 0.5401785969734192, "reward_std": 0.20121413469314575, "rewards/simpleverify_reward/mean": 0.5401785969734192, "rewards/simpleverify_reward/std": 0.49866142868995667, "step": 236 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2060.0, "completions/mean_length": 591.3449096679688, "completions/mean_terminated_length": 555.78466796875, "completions/min_length": 117.0, "completions/min_terminated_length": 117.0, "epoch": 0.02211994470013825, "grad_norm": 0.112509585916996, "learning_rate": 2e-07, "loss": 0.0085, "num_tokens": 150601789.0, "reward": 0.582589328289032, "reward_std": 0.19441214203834534, "rewards/simpleverify_reward/mean": 0.5825892686843872, "rewards/simpleverify_reward/std": 0.493407279253006, "step": 237 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3642.0, "completions/mean_length": 558.8069458007812, "completions/mean_terminated_length": 518.8837890625, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 0.022213277800138834, "grad_norm": 0.12562325596809387, "learning_rate": 2e-07, "loss": 0.0051, "num_tokens": 151189248.0, "reward": 0.6116071939468384, "reward_std": 0.2168871909379959, "rewards/simpleverify_reward/mean": 0.6116071343421936, "rewards/simpleverify_reward/std": 0.4876568913459778, "step": 238 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3615.0, "completions/mean_length": 563.1529541015625, "completions/mean_terminated_length": 515.1957397460938, "completions/min_length": 5.0, "completions/min_terminated_length": 5.0, "epoch": 0.022306610900139415, "grad_norm": 0.1343999207019806, "learning_rate": 2e-07, "loss": 0.0353, "num_tokens": 151772225.0, "reward": 0.598214328289032, "reward_std": 0.2375834882259369, "rewards/simpleverify_reward/mean": 0.5982142686843872, "rewards/simpleverify_reward/std": 0.49053290486335754, "step": 239 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3621.0, "completions/mean_length": 632.1495971679688, "completions/mean_terminated_length": 573.1737060546875, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 0.02239994400014, "grad_norm": 0.12639294564723969, "learning_rate": 2e-07, "loss": 0.0372, "num_tokens": 152428967.0, "reward": 0.5491071939468384, "reward_std": 0.217755526304245, "rewards/simpleverify_reward/mean": 0.5491071343421936, "rewards/simpleverify_reward/std": 0.49786055088043213, "step": 240 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2330.0, "completions/mean_length": 564.3951416015625, "completions/mean_terminated_length": 504.265625, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 0.022493277100140582, "grad_norm": 0.12059276551008224, "learning_rate": 2e-07, "loss": 0.0103, "num_tokens": 153017529.0, "reward": 0.590401828289032, "reward_std": 0.18479111790657043, "rewards/simpleverify_reward/mean": 0.5904017686843872, "rewards/simpleverify_reward/std": 0.49203425645828247, "step": 241 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3019.0, "completions/mean_length": 605.0, "completions/mean_terminated_length": 557.6109008789062, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 0.022586610200141168, "grad_norm": 0.11692879348993301, "learning_rate": 2e-07, "loss": 0.0291, "num_tokens": 153653393.0, "reward": 0.6026785969734192, "reward_std": 0.2151840180158615, "rewards/simpleverify_reward/mean": 0.6026785969734192, "rewards/simpleverify_reward/std": 0.48961687088012695, "step": 242 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3868.0, "completions/mean_length": 573.5178833007812, "completions/mean_terminated_length": 537.7767333984375, "completions/min_length": 100.0, "completions/min_terminated_length": 100.0, "epoch": 0.02267994330014175, "grad_norm": 0.1351606547832489, "learning_rate": 2e-07, "loss": 0.0321, "num_tokens": 154244721.0, "reward": 0.5959821939468384, "reward_std": 0.2149689942598343, "rewards/simpleverify_reward/mean": 0.5959821343421936, "rewards/simpleverify_reward/std": 0.490975022315979, "step": 243 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3012.0, "completions/mean_length": 641.3370971679688, "completions/mean_terminated_length": 578.5249633789062, "completions/min_length": 162.0, "completions/min_terminated_length": 162.0, "epoch": 0.022773276400142334, "grad_norm": 0.11484739929437637, "learning_rate": 2e-07, "loss": 0.0222, "num_tokens": 154906039.0, "reward": 0.5390625, "reward_std": 0.19622112810611725, "rewards/simpleverify_reward/mean": 0.5390625, "rewards/simpleverify_reward/std": 0.4987502098083496, "step": 244 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0323660714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 4016.0, "completions/mean_length": 694.7891235351562, "completions/mean_terminated_length": 581.0230712890625, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 0.022866609500142916, "grad_norm": 0.11905704438686371, "learning_rate": 2e-07, "loss": 0.0643, "num_tokens": 155629154.0, "reward": 0.4698660969734192, "reward_std": 0.23500338196754456, "rewards/simpleverify_reward/mean": 0.4698660671710968, "rewards/simpleverify_reward/std": 0.49936988949775696, "step": 245 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2485.0, "completions/mean_length": 635.3125, "completions/mean_terminated_length": 560.3375244140625, "completions/min_length": 97.0, "completions/min_terminated_length": 97.0, "epoch": 0.022959942600143498, "grad_norm": 0.10711460560560226, "learning_rate": 2e-07, "loss": 0.0327, "num_tokens": 156285042.0, "reward": 0.5345982313156128, "reward_std": 0.1765337437391281, "rewards/simpleverify_reward/mean": 0.5345982313156128, "rewards/simpleverify_reward/std": 0.4990801215171814, "step": 246 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2138.0, "completions/mean_length": 651.015625, "completions/mean_terminated_length": 604.2511596679688, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 0.023053275700144083, "grad_norm": 0.11132089793682098, "learning_rate": 2e-07, "loss": 0.033, "num_tokens": 156957280.0, "reward": 0.5033482313156128, "reward_std": 0.2098148614168167, "rewards/simpleverify_reward/mean": 0.5033482313156128, "rewards/simpleverify_reward/std": 0.5002680420875549, "step": 247 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2332.0, "completions/mean_length": 578.154052734375, "completions/mean_terminated_length": 522.315185546875, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 0.023146608800144665, "grad_norm": 0.12913449108600616, "learning_rate": 2e-07, "loss": 0.0228, "num_tokens": 157562434.0, "reward": 0.5524553656578064, "reward_std": 0.2272142916917801, "rewards/simpleverify_reward/mean": 0.5524553656578064, "rewards/simpleverify_reward/std": 0.49751853942871094, "step": 248 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3622.0, "completions/mean_length": 615.7433471679688, "completions/mean_terminated_length": 552.4658813476562, "completions/min_length": 33.0, "completions/min_terminated_length": 33.0, "epoch": 0.02323994190014525, "grad_norm": 0.12833014130592346, "learning_rate": 2e-07, "loss": 0.0435, "num_tokens": 158202284.0, "reward": 0.5189732313156128, "reward_std": 0.24086037278175354, "rewards/simpleverify_reward/mean": 0.5189732313156128, "rewards/simpleverify_reward/std": 0.49991893768310547, "step": 249 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 4051.0, "completions/mean_length": 639.46875, "completions/mean_terminated_length": 576.6226806640625, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 0.023333275000145832, "grad_norm": 0.10520416498184204, "learning_rate": 2e-07, "loss": 0.0311, "num_tokens": 158862352.0, "reward": 0.535714328289032, "reward_std": 0.185732901096344, "rewards/simpleverify_reward/mean": 0.5357142686843872, "rewards/simpleverify_reward/std": 0.4990014135837555, "step": 250 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 1745.0, "completions/mean_length": 597.3203125, "completions/mean_terminated_length": 529.6552734375, "completions/min_length": 40.0, "completions/min_terminated_length": 40.0, "epoch": 0.023426608100146417, "grad_norm": 0.12229571491479874, "learning_rate": 2e-07, "loss": 0.0401, "num_tokens": 159484831.0, "reward": 0.5647321939468384, "reward_std": 0.21553030610084534, "rewards/simpleverify_reward/mean": 0.5647321343421936, "rewards/simpleverify_reward/std": 0.49606895446777344, "step": 251 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3988.0, "completions/mean_length": 646.2890625, "completions/mean_terminated_length": 591.53173828125, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 0.023519941200147, "grad_norm": 0.11330760270357132, "learning_rate": 2e-07, "loss": 0.0316, "num_tokens": 160147818.0, "reward": 0.5245535969734192, "reward_std": 0.23149645328521729, "rewards/simpleverify_reward/mean": 0.5245535969734192, "rewards/simpleverify_reward/std": 0.4996756613254547, "step": 252 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3873.0, "completions/mean_length": 601.78125, "completions/mean_terminated_length": 562.3431396484375, "completions/min_length": 159.0, "completions/min_terminated_length": 159.0, "epoch": 0.023613274300147584, "grad_norm": 0.1277487874031067, "learning_rate": 2e-07, "loss": 0.0405, "num_tokens": 160776174.0, "reward": 0.5881696939468384, "reward_std": 0.21245165169239044, "rewards/simpleverify_reward/mean": 0.5881696343421936, "rewards/simpleverify_reward/std": 0.4924395978450775, "step": 253 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3853.0, "completions/mean_length": 614.4699096679688, "completions/mean_terminated_length": 559.20751953125, "completions/min_length": 5.0, "completions/min_terminated_length": 5.0, "epoch": 0.023706607400148166, "grad_norm": 0.11618292331695557, "learning_rate": 2e-07, "loss": 0.0011, "num_tokens": 161414883.0, "reward": 0.5870535969734192, "reward_std": 0.18013693392276764, "rewards/simpleverify_reward/mean": 0.5870535969734192, "rewards/simpleverify_reward/std": 0.49263837933540344, "step": 254 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3779.0, "completions/mean_length": 644.921875, "completions/mean_terminated_length": 566.130126953125, "completions/min_length": 161.0, "completions/min_terminated_length": 161.0, "epoch": 0.02379994050014875, "grad_norm": 0.12149063497781754, "learning_rate": 2e-07, "loss": 0.0243, "num_tokens": 162094149.0, "reward": 0.5345982313156128, "reward_std": 0.2110626995563507, "rewards/simpleverify_reward/mean": 0.5345982313156128, "rewards/simpleverify_reward/std": 0.4990801215171814, "step": 255 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2414.0, "completions/mean_length": 644.1730346679688, "completions/mean_terminated_length": 573.4066162109375, "completions/min_length": 78.0, "completions/min_terminated_length": 78.0, "epoch": 0.023893273600149333, "grad_norm": 0.12446026504039764, "learning_rate": 2e-07, "loss": 0.0253, "num_tokens": 162763232.0, "reward": 0.5379464626312256, "reward_std": 0.22225703299045563, "rewards/simpleverify_reward/mean": 0.5379464030265808, "rewards/simpleverify_reward/std": 0.4988364577293396, "step": 256 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3427.0, "completions/mean_length": 619.2957763671875, "completions/mean_terminated_length": 576.08251953125, "completions/min_length": 117.0, "completions/min_terminated_length": 117.0, "epoch": 0.023986606700149918, "grad_norm": 0.10498727113008499, "learning_rate": 2e-07, "loss": 0.0193, "num_tokens": 163405585.0, "reward": 0.5446428656578064, "reward_std": 0.192794531583786, "rewards/simpleverify_reward/mean": 0.5446428656578064, "rewards/simpleverify_reward/std": 0.4982811510562897, "step": 257 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3428.0, "completions/mean_length": 639.265625, "completions/mean_terminated_length": 560.3447265625, "completions/min_length": 20.0, "completions/min_terminated_length": 20.0, "epoch": 0.0240799398001505, "grad_norm": 0.12136202305555344, "learning_rate": 2e-07, "loss": 0.0464, "num_tokens": 164072807.0, "reward": 0.4966517984867096, "reward_std": 0.22901737689971924, "rewards/simpleverify_reward/mean": 0.4966517984867096, "rewards/simpleverify_reward/std": 0.5002680420875549, "step": 258 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.024553571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2801.0, "completions/mean_length": 590.8292846679688, "completions/mean_terminated_length": 502.598388671875, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 0.02417327290015108, "grad_norm": 0.12219705432653427, "learning_rate": 2e-07, "loss": 0.044, "num_tokens": 164686390.0, "reward": 0.5613839626312256, "reward_std": 0.1890750676393509, "rewards/simpleverify_reward/mean": 0.5613839030265808, "rewards/simpleverify_reward/std": 0.496494859457016, "step": 259 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3763.0, "completions/mean_length": 634.4732666015625, "completions/mean_terminated_length": 591.4486083984375, "completions/min_length": 119.0, "completions/min_terminated_length": 119.0, "epoch": 0.024266606000151666, "grad_norm": 0.11939338594675064, "learning_rate": 2e-07, "loss": 0.0149, "num_tokens": 165341574.0, "reward": 0.543526828289032, "reward_std": 0.18490734696388245, "rewards/simpleverify_reward/mean": 0.5435267686843872, "rewards/simpleverify_reward/std": 0.49838000535964966, "step": 260 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3808.0, "completions/mean_length": 605.7366333007812, "completions/mean_terminated_length": 558.3574829101562, "completions/min_length": 105.0, "completions/min_terminated_length": 105.0, "epoch": 0.024359939100152248, "grad_norm": 0.126478374004364, "learning_rate": 2e-07, "loss": 0.0227, "num_tokens": 165971522.0, "reward": 0.5725446939468384, "reward_std": 0.19700755178928375, "rewards/simpleverify_reward/mean": 0.5725446343421936, "rewards/simpleverify_reward/std": 0.49498558044433594, "step": 261 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3818.0, "completions/mean_length": 616.0569458007812, "completions/mean_terminated_length": 560.8197631835938, "completions/min_length": 119.0, "completions/min_terminated_length": 119.0, "epoch": 0.024453272200152833, "grad_norm": 0.12602128088474274, "learning_rate": 2e-07, "loss": 0.0306, "num_tokens": 166608845.0, "reward": 0.5256696939468384, "reward_std": 0.2258574366569519, "rewards/simpleverify_reward/mean": 0.5256696343421936, "rewards/simpleverify_reward/std": 0.4996195435523987, "step": 262 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.005580357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3280.0, "completions/mean_length": 597.4832763671875, "completions/mean_terminated_length": 577.8507690429688, "completions/min_length": 3.0, "completions/min_terminated_length": 3.0, "epoch": 0.024546605300153415, "grad_norm": 0.12643718719482422, "learning_rate": 2e-07, "loss": 0.0218, "num_tokens": 167232134.0, "reward": 0.5837053656578064, "reward_std": 0.22067444026470184, "rewards/simpleverify_reward/mean": 0.5837053656578064, "rewards/simpleverify_reward/std": 0.49321892857551575, "step": 263 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3868.0, "completions/mean_length": 592.2511596679688, "completions/mean_terminated_length": 540.6670532226562, "completions/min_length": 119.0, "completions/min_terminated_length": 119.0, "epoch": 0.024639938400154, "grad_norm": 0.11290130764245987, "learning_rate": 2e-07, "loss": 0.018, "num_tokens": 167845279.0, "reward": 0.6116071939468384, "reward_std": 0.18121999502182007, "rewards/simpleverify_reward/mean": 0.6116071343421936, "rewards/simpleverify_reward/std": 0.4876568913459778, "step": 264 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3639.0, "completions/mean_length": 598.9866333007812, "completions/mean_terminated_length": 519.1461181640625, "completions/min_length": 122.0, "completions/min_terminated_length": 122.0, "epoch": 0.024733271500154582, "grad_norm": 0.12652729451656342, "learning_rate": 2e-07, "loss": 0.0265, "num_tokens": 168467691.0, "reward": 0.5524553656578064, "reward_std": 0.21027132868766785, "rewards/simpleverify_reward/mean": 0.5524553656578064, "rewards/simpleverify_reward/std": 0.49751853942871094, "step": 265 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3336.0, "completions/mean_length": 597.779052734375, "completions/mean_terminated_length": 558.2957153320312, "completions/min_length": 87.0, "completions/min_terminated_length": 87.0, "epoch": 0.024826604600155167, "grad_norm": 0.12412650138139725, "learning_rate": 2e-07, "loss": 0.0238, "num_tokens": 169091061.0, "reward": 0.53125, "reward_std": 0.22236579656600952, "rewards/simpleverify_reward/mean": 0.53125, "rewards/simpleverify_reward/std": 0.4993011951446533, "step": 266 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2258.0, "completions/mean_length": 616.8326416015625, "completions/mean_terminated_length": 561.6077270507812, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 0.02491993770015575, "grad_norm": 0.12642021477222443, "learning_rate": 2e-07, "loss": 0.007, "num_tokens": 169734575.0, "reward": 0.5223214626312256, "reward_std": 0.21601775288581848, "rewards/simpleverify_reward/mean": 0.5223214030265808, "rewards/simpleverify_reward/std": 0.49978047609329224, "step": 267 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 4038.0, "completions/mean_length": 633.0067138671875, "completions/mean_terminated_length": 574.04541015625, "completions/min_length": 119.0, "completions/min_terminated_length": 119.0, "epoch": 0.025013270800156334, "grad_norm": 0.11760075390338898, "learning_rate": 2e-07, "loss": 0.0429, "num_tokens": 170386309.0, "reward": 0.535714328289032, "reward_std": 0.2242058962583542, "rewards/simpleverify_reward/mean": 0.5357142686843872, "rewards/simpleverify_reward/std": 0.4990014135837555, "step": 268 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3926.0, "completions/mean_length": 588.5971069335938, "completions/mean_terminated_length": 540.9852905273438, "completions/min_length": 16.0, "completions/min_terminated_length": 16.0, "epoch": 0.025106603900156916, "grad_norm": 0.11077727377414703, "learning_rate": 2e-07, "loss": 0.0162, "num_tokens": 171004628.0, "reward": 0.5758928656578064, "reward_std": 0.1906997561454773, "rewards/simpleverify_reward/mean": 0.5758928656578064, "rewards/simpleverify_reward/std": 0.49448272585868835, "step": 269 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.006696428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3076.0, "completions/mean_length": 595.5201416015625, "completions/mean_terminated_length": 571.9213256835938, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 0.0251999370001575, "grad_norm": 0.12699738144874573, "learning_rate": 2e-07, "loss": 0.0196, "num_tokens": 171625726.0, "reward": 0.5770089626312256, "reward_std": 0.22473089396953583, "rewards/simpleverify_reward/mean": 0.5770089030265808, "rewards/simpleverify_reward/std": 0.4943099617958069, "step": 270 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.030133928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 4038.0, "completions/mean_length": 651.3214721679688, "completions/mean_terminated_length": 544.2946166992188, "completions/min_length": 11.0, "completions/min_terminated_length": 11.0, "epoch": 0.025293270100158083, "grad_norm": 0.12280620634555817, "learning_rate": 2e-07, "loss": 0.0326, "num_tokens": 172296446.0, "reward": 0.5390625, "reward_std": 0.22931934893131256, "rewards/simpleverify_reward/mean": 0.5390625, "rewards/simpleverify_reward/std": 0.4987502098083496, "step": 271 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 4052.0, "completions/mean_length": 607.6350708007812, "completions/mean_terminated_length": 544.210205078125, "completions/min_length": 133.0, "completions/min_terminated_length": 133.0, "epoch": 0.025386603200158665, "grad_norm": 0.12548625469207764, "learning_rate": 2e-07, "loss": 0.0441, "num_tokens": 172925159.0, "reward": 0.59375, "reward_std": 0.19825245440006256, "rewards/simpleverify_reward/mean": 0.59375, "rewards/simpleverify_reward/std": 0.4914066195487976, "step": 272 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3997.0, "completions/mean_length": 652.6964721679688, "completions/mean_terminated_length": 598.0408325195312, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 0.02547993630015925, "grad_norm": 0.11672059446573257, "learning_rate": 2e-07, "loss": 0.0132, "num_tokens": 173596703.0, "reward": 0.5133928656578064, "reward_std": 0.20200437307357788, "rewards/simpleverify_reward/mean": 0.5133928656578064, "rewards/simpleverify_reward/std": 0.500099778175354, "step": 273 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 4091.0, "completions/mean_length": 672.078125, "completions/mean_terminated_length": 597.899658203125, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 0.02557326940015983, "grad_norm": 0.12118873745203018, "learning_rate": 2e-07, "loss": 0.0384, "num_tokens": 174296029.0, "reward": 0.5022321939468384, "reward_std": 0.23622803390026093, "rewards/simpleverify_reward/mean": 0.5022321343421936, "rewards/simpleverify_reward/std": 0.5002743005752563, "step": 274 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 4032.0, "completions/mean_length": 611.5, "completions/mean_terminated_length": 584.06298828125, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 0.025666602500160417, "grad_norm": 0.11909826099872589, "learning_rate": 2e-07, "loss": 0.0208, "num_tokens": 174932957.0, "reward": 0.5680803656578064, "reward_std": 0.20951178669929504, "rewards/simpleverify_reward/mean": 0.5680803656578064, "rewards/simpleverify_reward/std": 0.4956200122833252, "step": 275 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3322.0, "completions/mean_length": 599.7076416015625, "completions/mean_terminated_length": 519.883544921875, "completions/min_length": 38.0, "completions/min_terminated_length": 38.0, "epoch": 0.025759935600161, "grad_norm": 0.13238736987113953, "learning_rate": 2e-07, "loss": 0.0354, "num_tokens": 175553887.0, "reward": 0.5535714626312256, "reward_std": 0.22469699382781982, "rewards/simpleverify_reward/mean": 0.5535714030265808, "rewards/simpleverify_reward/std": 0.4973994791507721, "step": 276 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3100.0, "completions/mean_length": 625.3616333007812, "completions/mean_terminated_length": 554.2095947265625, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 0.025853268700161584, "grad_norm": 0.12622685730457306, "learning_rate": 2e-07, "loss": 0.0304, "num_tokens": 176202419.0, "reward": 0.5948660969734192, "reward_std": 0.16863587498664856, "rewards/simpleverify_reward/mean": 0.5948660969734192, "rewards/simpleverify_reward/std": 0.49119213223457336, "step": 277 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3661.0, "completions/mean_length": 531.3035888671875, "completions/mean_terminated_length": 495.1341247558594, "completions/min_length": 121.0, "completions/min_terminated_length": 121.0, "epoch": 0.025946601800162165, "grad_norm": 0.13599437475204468, "learning_rate": 2e-07, "loss": 0.0236, "num_tokens": 176756059.0, "reward": 0.6149553656578064, "reward_std": 0.2130935937166214, "rewards/simpleverify_reward/mean": 0.6149553656578064, "rewards/simpleverify_reward/std": 0.4868776500225067, "step": 278 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2136.0, "completions/mean_length": 601.2623291015625, "completions/mean_terminated_length": 553.8224487304688, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 0.02603993490016275, "grad_norm": 0.1265670508146286, "learning_rate": 2e-07, "loss": 0.025, "num_tokens": 177382302.0, "reward": 0.578125, "reward_std": 0.20843547582626343, "rewards/simpleverify_reward/mean": 0.578125, "rewards/simpleverify_reward/std": 0.4941346049308777, "step": 279 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3516.0, "completions/mean_length": 625.6819458007812, "completions/mean_terminated_length": 582.5480346679688, "completions/min_length": 153.0, "completions/min_terminated_length": 153.0, "epoch": 0.026133268000163332, "grad_norm": 0.11913011968135834, "learning_rate": 2e-07, "loss": 0.0214, "num_tokens": 178030153.0, "reward": 0.5267857313156128, "reward_std": 0.22281834483146667, "rewards/simpleverify_reward/mean": 0.5267857313156128, "rewards/simpleverify_reward/std": 0.4995608329772949, "step": 280 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2200.0, "completions/mean_length": 625.3717041015625, "completions/mean_terminated_length": 562.269287109375, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 0.026226601100163918, "grad_norm": 0.11387820541858673, "learning_rate": 2e-07, "loss": 0.016, "num_tokens": 178681166.0, "reward": 0.5546875, "reward_std": 0.1944117248058319, "rewards/simpleverify_reward/mean": 0.5546875, "rewards/simpleverify_reward/std": 0.4972778558731079, "step": 281 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 2979.0, "completions/mean_length": 614.4397583007812, "completions/mean_terminated_length": 575.1444702148438, "completions/min_length": 156.0, "completions/min_terminated_length": 156.0, "epoch": 0.0263199342001645, "grad_norm": 0.10716240853071213, "learning_rate": 2e-07, "loss": 0.0189, "num_tokens": 179321088.0, "reward": 0.520089328289032, "reward_std": 0.1986924707889557, "rewards/simpleverify_reward/mean": 0.5200892686843872, "rewards/simpleverify_reward/std": 0.4998753070831299, "step": 282 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3225.0, "completions/mean_length": 659.8482666015625, "completions/mean_terminated_length": 581.397216796875, "completions/min_length": 116.0, "completions/min_terminated_length": 116.0, "epoch": 0.026413267300165084, "grad_norm": 0.11088128387928009, "learning_rate": 2e-07, "loss": 0.0544, "num_tokens": 180007696.0, "reward": 0.5580357313156128, "reward_std": 0.2047053426504135, "rewards/simpleverify_reward/mean": 0.5580357313156128, "rewards/simpleverify_reward/std": 0.49689778685569763, "step": 283 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 4079.0, "completions/mean_length": 627.03125, "completions/mean_terminated_length": 587.8781127929688, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 0.026506600400165666, "grad_norm": 0.11821258068084717, "learning_rate": 2e-07, "loss": 0.011, "num_tokens": 180661972.0, "reward": 0.582589328289032, "reward_std": 0.2044457048177719, "rewards/simpleverify_reward/mean": 0.5825892686843872, "rewards/simpleverify_reward/std": 0.4934072494506836, "step": 284 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3829.0, "completions/mean_length": 611.8917846679688, "completions/mean_terminated_length": 560.5968017578125, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 0.026599933500166248, "grad_norm": 0.12807463109493256, "learning_rate": 2e-07, "loss": 0.0106, "num_tokens": 181294531.0, "reward": 0.551339328289032, "reward_std": 0.22469696402549744, "rewards/simpleverify_reward/mean": 0.5513392686843872, "rewards/simpleverify_reward/std": 0.4976350665092468, "step": 285 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 2241.0, "completions/mean_length": 549.6060791015625, "completions/mean_terminated_length": 509.5790100097656, "completions/min_length": 103.0, "completions/min_terminated_length": 103.0, "epoch": 0.026693266600166833, "grad_norm": 0.12148451060056686, "learning_rate": 2e-07, "loss": 0.0233, "num_tokens": 181876826.0, "reward": 0.5892857313156128, "reward_std": 0.19276133179664612, "rewards/simpleverify_reward/mean": 0.5892857313156128, "rewards/simpleverify_reward/std": 0.49223825335502625, "step": 286 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3390.0, "completions/mean_length": 605.6116333007812, "completions/mean_terminated_length": 570.1961669921875, "completions/min_length": 86.0, "completions/min_terminated_length": 86.0, "epoch": 0.026786599700167415, "grad_norm": 0.11443469673395157, "learning_rate": 2e-07, "loss": 0.0368, "num_tokens": 182510390.0, "reward": 0.4988839626312256, "reward_std": 0.1936967670917511, "rewards/simpleverify_reward/mean": 0.4988839328289032, "rewards/simpleverify_reward/std": 0.5002779960632324, "step": 287 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 2509.0, "completions/mean_length": 598.5748291015625, "completions/mean_terminated_length": 559.1004638671875, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 0.026879932800168, "grad_norm": 0.13432221114635468, "learning_rate": 2e-07, "loss": 0.0155, "num_tokens": 183137521.0, "reward": 0.5691964626312256, "reward_std": 0.22785551846027374, "rewards/simpleverify_reward/mean": 0.5691964030265808, "rewards/simpleverify_reward/std": 0.4954652488231659, "step": 288 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3553.0, "completions/mean_length": 588.7221069335938, "completions/mean_terminated_length": 529.0068359375, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 0.026973265900168582, "grad_norm": 0.1207047551870346, "learning_rate": 2e-07, "loss": 0.0529, "num_tokens": 183748224.0, "reward": 0.598214328289032, "reward_std": 0.1983586847782135, "rewards/simpleverify_reward/mean": 0.5982142686843872, "rewards/simpleverify_reward/std": 0.49053290486335754, "step": 289 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3627.0, "completions/mean_length": 651.3538208007812, "completions/mean_terminated_length": 608.5390014648438, "completions/min_length": 115.0, "completions/min_terminated_length": 115.0, "epoch": 0.027066599000169167, "grad_norm": 0.10818062722682953, "learning_rate": 2e-07, "loss": 0.03, "num_tokens": 184424045.0, "reward": 0.4921875298023224, "reward_std": 0.19527865946292877, "rewards/simpleverify_reward/mean": 0.4921875, "rewards/simpleverify_reward/std": 0.5002182126045227, "step": 290 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3751.0, "completions/mean_length": 595.4442138671875, "completions/mean_terminated_length": 547.9253540039062, "completions/min_length": 79.0, "completions/min_terminated_length": 79.0, "epoch": 0.02715993210016975, "grad_norm": 0.11716513335704803, "learning_rate": 2e-07, "loss": 0.0318, "num_tokens": 185050139.0, "reward": 0.5725446939468384, "reward_std": 0.2083185464143753, "rewards/simpleverify_reward/mean": 0.5725446343421936, "rewards/simpleverify_reward/std": 0.49498558044433594, "step": 291 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 4039.0, "completions/mean_length": 598.9933471679688, "completions/mean_terminated_length": 559.5237426757812, "completions/min_length": 103.0, "completions/min_terminated_length": 103.0, "epoch": 0.027253265200170334, "grad_norm": 0.12386538088321686, "learning_rate": 2e-07, "loss": 0.0171, "num_tokens": 185675637.0, "reward": 0.5647321939468384, "reward_std": 0.1936628520488739, "rewards/simpleverify_reward/mean": 0.5647321343421936, "rewards/simpleverify_reward/std": 0.49606892466545105, "step": 292 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2923.0, "completions/mean_length": 656.2611694335938, "completions/mean_terminated_length": 589.7360229492188, "completions/min_length": 32.0, "completions/min_terminated_length": 32.0, "epoch": 0.027346598300170916, "grad_norm": 0.10925793647766113, "learning_rate": 2e-07, "loss": 0.0328, "num_tokens": 186342447.0, "reward": 0.5345982313156128, "reward_std": 0.19918397068977356, "rewards/simpleverify_reward/mean": 0.5345982313156128, "rewards/simpleverify_reward/std": 0.4990801215171814, "step": 293 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3904.0, "completions/mean_length": 642.0223388671875, "completions/mean_terminated_length": 603.0383911132812, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 0.0274399314001715, "grad_norm": 0.12657123804092407, "learning_rate": 2e-07, "loss": 0.0167, "num_tokens": 187009323.0, "reward": 0.5089285969734192, "reward_std": 0.2288312166929245, "rewards/simpleverify_reward/mean": 0.5089285969734192, "rewards/simpleverify_reward/std": 0.5001994967460632, "step": 294 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3757.0, "completions/mean_length": 630.2678833007812, "completions/mean_terminated_length": 591.1512451171875, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 0.027533264500172083, "grad_norm": 0.1168990209698677, "learning_rate": 2e-07, "loss": 0.0255, "num_tokens": 187659739.0, "reward": 0.5345982313156128, "reward_std": 0.20857705175876617, "rewards/simpleverify_reward/mean": 0.5345982313156128, "rewards/simpleverify_reward/std": 0.4990801215171814, "step": 295 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3752.0, "completions/mean_length": 612.575927734375, "completions/mean_terminated_length": 549.2409057617188, "completions/min_length": 80.0, "completions/min_terminated_length": 80.0, "epoch": 0.027626597600172668, "grad_norm": 0.10252423584461212, "learning_rate": 2e-07, "loss": 0.0086, "num_tokens": 188294335.0, "reward": 0.5145089626312256, "reward_std": 0.1569860726594925, "rewards/simpleverify_reward/mean": 0.5145089030265808, "rewards/simpleverify_reward/std": 0.5000685453414917, "step": 296 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2299.0, "completions/mean_length": 583.9721069335938, "completions/mean_terminated_length": 516.0488891601562, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 0.02771993070017325, "grad_norm": 0.1200820729136467, "learning_rate": 2e-07, "loss": 0.004, "num_tokens": 188898702.0, "reward": 0.606026828289032, "reward_std": 0.20632115006446838, "rewards/simpleverify_reward/mean": 0.6060267686843872, "rewards/simpleverify_reward/std": 0.48890194296836853, "step": 297 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2734.0, "completions/mean_length": 584.6004638671875, "completions/mean_terminated_length": 548.9718017578125, "completions/min_length": 105.0, "completions/min_terminated_length": 105.0, "epoch": 0.02781326380017383, "grad_norm": 0.12720705568790436, "learning_rate": 2e-07, "loss": 0.0169, "num_tokens": 189511856.0, "reward": 0.512276828289032, "reward_std": 0.19006218016147614, "rewards/simpleverify_reward/mean": 0.5122767686843872, "rewards/simpleverify_reward/std": 0.500128448009491, "step": 298 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 1931.0, "completions/mean_length": 569.8895263671875, "completions/mean_terminated_length": 538.1227416992188, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 0.027906596900174416, "grad_norm": 0.12598156929016113, "learning_rate": 2e-07, "loss": 0.0142, "num_tokens": 190111613.0, "reward": 0.6149553656578064, "reward_std": 0.21812321245670319, "rewards/simpleverify_reward/mean": 0.6149553656578064, "rewards/simpleverify_reward/std": 0.4868776500225067, "step": 299 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3921.0, "completions/mean_length": 619.78125, "completions/mean_terminated_length": 568.6024780273438, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 0.027999930000174998, "grad_norm": 0.14453649520874023, "learning_rate": 2e-07, "loss": 0.0227, "num_tokens": 190752561.0, "reward": 0.5680803656578064, "reward_std": 0.24717383086681366, "rewards/simpleverify_reward/mean": 0.5680803656578064, "rewards/simpleverify_reward/std": 0.4956200420856476, "step": 300 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3642.0, "completions/mean_length": 656.359375, "completions/mean_terminated_length": 597.7957153320312, "completions/min_length": 115.0, "completions/min_terminated_length": 115.0, "epoch": 0.028093263100175583, "grad_norm": 0.1199445053935051, "learning_rate": 2e-07, "loss": 0.0259, "num_tokens": 191422067.0, "reward": 0.551339328289032, "reward_std": 0.23149757087230682, "rewards/simpleverify_reward/mean": 0.5513392686843872, "rewards/simpleverify_reward/std": 0.4976350665092468, "step": 301 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3994.0, "completions/mean_length": 654.9910888671875, "completions/mean_terminated_length": 588.44140625, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 0.028186596200176165, "grad_norm": 0.11553218215703964, "learning_rate": 2e-07, "loss": 0.0295, "num_tokens": 192106771.0, "reward": 0.535714328289032, "reward_std": 0.1988915503025055, "rewards/simpleverify_reward/mean": 0.5357142686843872, "rewards/simpleverify_reward/std": 0.4990014135837555, "step": 302 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2502.0, "completions/mean_length": 607.5145263671875, "completions/mean_terminated_length": 564.15478515625, "completions/min_length": 137.0, "completions/min_terminated_length": 137.0, "epoch": 0.02827992930017675, "grad_norm": 0.1192716658115387, "learning_rate": 2e-07, "loss": 0.0308, "num_tokens": 192744704.0, "reward": 0.5625, "reward_std": 0.20388302206993103, "rewards/simpleverify_reward/mean": 0.5625, "rewards/simpleverify_reward/std": 0.49635544419288635, "step": 303 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3971.0, "completions/mean_length": 659.3292846679688, "completions/mean_terminated_length": 576.84912109375, "completions/min_length": 111.0, "completions/min_terminated_length": 111.0, "epoch": 0.028373262400177332, "grad_norm": 0.10585419833660126, "learning_rate": 2e-07, "loss": 0.0217, "num_tokens": 193420151.0, "reward": 0.590401828289032, "reward_std": 0.18378332257270813, "rewards/simpleverify_reward/mean": 0.5904017686843872, "rewards/simpleverify_reward/std": 0.49203425645828247, "step": 304 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3521.0, "completions/mean_length": 626.5167846679688, "completions/mean_terminated_length": 567.4449462890625, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 0.028466595500177917, "grad_norm": 0.11742942035198212, "learning_rate": 2e-07, "loss": 0.0189, "num_tokens": 194064718.0, "reward": 0.53125, "reward_std": 0.2103448063135147, "rewards/simpleverify_reward/mean": 0.53125, "rewards/simpleverify_reward/std": 0.4993011951446533, "step": 305 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 2315.0, "completions/mean_length": 560.5892944335938, "completions/mean_terminated_length": 532.7514038085938, "completions/min_length": 116.0, "completions/min_terminated_length": 116.0, "epoch": 0.0285599286001785, "grad_norm": 0.11773978173732758, "learning_rate": 2e-07, "loss": 0.0169, "num_tokens": 194656174.0, "reward": 0.5100446939468384, "reward_std": 0.20531447231769562, "rewards/simpleverify_reward/mean": 0.5100446343421936, "rewards/simpleverify_reward/std": 0.5001782774925232, "step": 306 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 2796.0, "completions/mean_length": 628.3404541015625, "completions/mean_terminated_length": 589.2020263671875, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 0.028653261700179084, "grad_norm": 0.12680616974830627, "learning_rate": 2e-07, "loss": 0.0158, "num_tokens": 195312103.0, "reward": 0.5580357313156128, "reward_std": 0.2079048454761505, "rewards/simpleverify_reward/mean": 0.5580357313156128, "rewards/simpleverify_reward/std": 0.49689781665802, "step": 307 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.004464285714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2749.0, "completions/mean_length": 548.8035888671875, "completions/mean_terminated_length": 532.8969116210938, "completions/min_length": 121.0, "completions/min_terminated_length": 121.0, "epoch": 0.028746594800179666, "grad_norm": 0.1390857696533203, "learning_rate": 2e-07, "loss": 0.0023, "num_tokens": 195897415.0, "reward": 0.5892857313156128, "reward_std": 0.24657611548900604, "rewards/simpleverify_reward/mean": 0.5892857313156128, "rewards/simpleverify_reward/std": 0.49223825335502625, "step": 308 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3165.0, "completions/mean_length": 594.6027221679688, "completions/mean_terminated_length": 551.08251953125, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 0.02883992790018025, "grad_norm": 0.1139182597398758, "learning_rate": 2e-07, "loss": 0.0354, "num_tokens": 196523875.0, "reward": 0.5323660969734192, "reward_std": 0.18738332390785217, "rewards/simpleverify_reward/mean": 0.5323660969734192, "rewards/simpleverify_reward/std": 0.4992299973964691, "step": 309 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2102.0, "completions/mean_length": 577.6439819335938, "completions/mean_terminated_length": 529.8834838867188, "completions/min_length": 99.0, "completions/min_terminated_length": 99.0, "epoch": 0.028933261000180833, "grad_norm": 0.12072154134511948, "learning_rate": 2e-07, "loss": 0.0304, "num_tokens": 197121428.0, "reward": 0.5892857313156128, "reward_std": 0.19873210787773132, "rewards/simpleverify_reward/mean": 0.5892857313156128, "rewards/simpleverify_reward/std": 0.49223825335502625, "step": 310 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.024553571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2449.0, "completions/mean_length": 654.1328125, "completions/mean_terminated_length": 567.4954223632812, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 0.029026594100181415, "grad_norm": 0.11374376714229584, "learning_rate": 2e-07, "loss": 0.0404, "num_tokens": 197791987.0, "reward": 0.5178571939468384, "reward_std": 0.20418612658977509, "rewards/simpleverify_reward/mean": 0.5178571343421936, "rewards/simpleverify_reward/std": 0.4999600946903229, "step": 311 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3141.0, "completions/mean_length": 612.25, "completions/mean_terminated_length": 540.8291625976562, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 0.029119927200182, "grad_norm": 0.12334593385457993, "learning_rate": 2e-07, "loss": 0.0499, "num_tokens": 198426211.0, "reward": 0.59375, "reward_std": 0.19861942529678345, "rewards/simpleverify_reward/mean": 0.59375, "rewards/simpleverify_reward/std": 0.4914066195487976, "step": 312 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3945.0, "completions/mean_length": 621.9989013671875, "completions/mean_terminated_length": 562.8502197265625, "completions/min_length": 106.0, "completions/min_terminated_length": 106.0, "epoch": 0.02921326030018258, "grad_norm": 0.10969259589910507, "learning_rate": 2e-07, "loss": 0.0278, "num_tokens": 199064226.0, "reward": 0.5881696939468384, "reward_std": 0.18313999474048615, "rewards/simpleverify_reward/mean": 0.5881696343421936, "rewards/simpleverify_reward/std": 0.4924395978450775, "step": 313 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 4051.0, "completions/mean_length": 593.060302734375, "completions/mean_terminated_length": 533.4188842773438, "completions/min_length": 108.0, "completions/min_terminated_length": 108.0, "epoch": 0.029306593400183167, "grad_norm": 0.12832622230052948, "learning_rate": 2e-07, "loss": 0.028, "num_tokens": 199682744.0, "reward": 0.5837053656578064, "reward_std": 0.22733193635940552, "rewards/simpleverify_reward/mean": 0.5837053656578064, "rewards/simpleverify_reward/std": 0.49321892857551575, "step": 314 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3916.0, "completions/mean_length": 633.935302734375, "completions/mean_terminated_length": 574.9898071289062, "completions/min_length": 120.0, "completions/min_terminated_length": 120.0, "epoch": 0.02939992650018375, "grad_norm": 0.10042139142751694, "learning_rate": 2e-07, "loss": 0.0094, "num_tokens": 200333710.0, "reward": 0.5245535969734192, "reward_std": 0.17728260159492493, "rewards/simpleverify_reward/mean": 0.5245535969734192, "rewards/simpleverify_reward/std": 0.4996756613254547, "step": 315 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3195.0, "completions/mean_length": 624.3035888671875, "completions/mean_terminated_length": 557.160400390625, "completions/min_length": 66.0, "completions/min_terminated_length": 66.0, "epoch": 0.029493259600184334, "grad_norm": 0.12920136749744415, "learning_rate": 2e-07, "loss": 0.0458, "num_tokens": 200983014.0, "reward": 0.5859375, "reward_std": 0.22823600471019745, "rewards/simpleverify_reward/mean": 0.5859375, "rewards/simpleverify_reward/std": 0.4928344786167145, "step": 316 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3720.0, "completions/mean_length": 586.5279541015625, "completions/mean_terminated_length": 558.894287109375, "completions/min_length": 96.0, "completions/min_terminated_length": 96.0, "epoch": 0.029586592700184915, "grad_norm": 0.11971534043550491, "learning_rate": 2e-07, "loss": 0.0296, "num_tokens": 201590271.0, "reward": 0.6071428656578064, "reward_std": 0.18314211070537567, "rewards/simpleverify_reward/mean": 0.6071428656578064, "rewards/simpleverify_reward/std": 0.48865827918052673, "step": 317 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.006696428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2277.0, "completions/mean_length": 563.482177734375, "completions/mean_terminated_length": 539.6674194335938, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 0.0296799258001855, "grad_norm": 0.11608321219682693, "learning_rate": 2e-07, "loss": 0.0099, "num_tokens": 202177543.0, "reward": 0.5881696939468384, "reward_std": 0.18489736318588257, "rewards/simpleverify_reward/mean": 0.5881696343421936, "rewards/simpleverify_reward/std": 0.4924395978450775, "step": 318 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3614.0, "completions/mean_length": 568.1741333007812, "completions/mean_terminated_length": 528.356689453125, "completions/min_length": 3.0, "completions/min_terminated_length": 3.0, "epoch": 0.029773258900186082, "grad_norm": 0.13157042860984802, "learning_rate": 2e-07, "loss": 0.0063, "num_tokens": 202777179.0, "reward": 0.5926339626312256, "reward_std": 0.19881446659564972, "rewards/simpleverify_reward/mean": 0.5926339030265808, "rewards/simpleverify_reward/std": 0.49161845445632935, "step": 319 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 4001.0, "completions/mean_length": 651.6640625, "completions/mean_terminated_length": 573.0262451171875, "completions/min_length": 157.0, "completions/min_terminated_length": 157.0, "epoch": 0.029866592000186667, "grad_norm": 0.12401121109724045, "learning_rate": 2e-07, "loss": 0.0213, "num_tokens": 203447030.0, "reward": 0.5022321939468384, "reward_std": 0.21466590464115143, "rewards/simpleverify_reward/mean": 0.5022321343421936, "rewards/simpleverify_reward/std": 0.5002742409706116, "step": 320 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 4014.0, "completions/mean_length": 589.3381958007812, "completions/mean_terminated_length": 557.7466430664062, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 0.02995992510018725, "grad_norm": 0.12737920880317688, "learning_rate": 2e-07, "loss": 0.0305, "num_tokens": 204062797.0, "reward": 0.590401828289032, "reward_std": 0.21684440970420837, "rewards/simpleverify_reward/mean": 0.5904017686843872, "rewards/simpleverify_reward/std": 0.49203425645828247, "step": 321 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.025669642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 4004.0, "completions/mean_length": 650.3225708007812, "completions/mean_terminated_length": 559.54296875, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 0.030053258200187834, "grad_norm": 0.12256307154893875, "learning_rate": 2e-07, "loss": 0.0342, "num_tokens": 204745134.0, "reward": 0.527901828289032, "reward_std": 0.20947857201099396, "rewards/simpleverify_reward/mean": 0.5279017686843872, "rewards/simpleverify_reward/std": 0.49949970841407776, "step": 322 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3287.0, "completions/mean_length": 606.578125, "completions/mean_terminated_length": 543.1340942382812, "completions/min_length": 32.0, "completions/min_terminated_length": 32.0, "epoch": 0.030146591300188416, "grad_norm": 0.11216285824775696, "learning_rate": 2e-07, "loss": 0.006, "num_tokens": 205378148.0, "reward": 0.5524553656578064, "reward_std": 0.16548095643520355, "rewards/simpleverify_reward/mean": 0.5524553656578064, "rewards/simpleverify_reward/std": 0.49751853942871094, "step": 323 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2910.0, "completions/mean_length": 579.4285888671875, "completions/mean_terminated_length": 547.7477416992188, "completions/min_length": 122.0, "completions/min_terminated_length": 122.0, "epoch": 0.030239924400188998, "grad_norm": 0.12758012115955353, "learning_rate": 2e-07, "loss": 0.0132, "num_tokens": 205987852.0, "reward": 0.590401828289032, "reward_std": 0.20061752200126648, "rewards/simpleverify_reward/mean": 0.5904017686843872, "rewards/simpleverify_reward/std": 0.49203425645828247, "step": 324 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2308.0, "completions/mean_length": 615.8236694335938, "completions/mean_terminated_length": 568.5814819335938, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 0.030333257500189583, "grad_norm": 0.1201363131403923, "learning_rate": 2e-07, "loss": 0.0103, "num_tokens": 206627582.0, "reward": 0.566964328289032, "reward_std": 0.19895853102207184, "rewards/simpleverify_reward/mean": 0.5669642686843872, "rewards/simpleverify_reward/std": 0.49577224254608154, "step": 325 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4074.0, "completions/mean_length": 622.7377319335938, "completions/mean_terminated_length": 555.5642700195312, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 0.030426590600190165, "grad_norm": 0.1212654560804367, "learning_rate": 2e-07, "loss": 0.0213, "num_tokens": 207271227.0, "reward": 0.5915178656578064, "reward_std": 0.19494162499904633, "rewards/simpleverify_reward/mean": 0.5915178656578064, "rewards/simpleverify_reward/std": 0.49182769656181335, "step": 326 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2381.0, "completions/mean_length": 600.2701416015625, "completions/mean_terminated_length": 552.8167724609375, "completions/min_length": 158.0, "completions/min_terminated_length": 158.0, "epoch": 0.03051992370019075, "grad_norm": 0.12713059782981873, "learning_rate": 2e-07, "loss": 0.0234, "num_tokens": 207897301.0, "reward": 0.5758928656578064, "reward_std": 0.2230154573917389, "rewards/simpleverify_reward/mean": 0.5758928656578064, "rewards/simpleverify_reward/std": 0.49448272585868835, "step": 327 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 1940.0, "completions/mean_length": 595.1183471679688, "completions/mean_terminated_length": 531.4658813476562, "completions/min_length": 117.0, "completions/min_terminated_length": 117.0, "epoch": 0.030613256800191332, "grad_norm": 0.13184987008571625, "learning_rate": 2e-07, "loss": 0.019, "num_tokens": 208511423.0, "reward": 0.5691964626312256, "reward_std": 0.2288312315940857, "rewards/simpleverify_reward/mean": 0.5691964030265808, "rewards/simpleverify_reward/std": 0.4954652488231659, "step": 328 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.025669642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2731.0, "completions/mean_length": 664.255615234375, "completions/mean_terminated_length": 573.8430786132812, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 0.030706589900191917, "grad_norm": 0.12144416570663452, "learning_rate": 2e-07, "loss": 0.0236, "num_tokens": 209197788.0, "reward": 0.5602678656578064, "reward_std": 0.22698859870433807, "rewards/simpleverify_reward/mean": 0.5602678656578064, "rewards/simpleverify_reward/std": 0.4966317415237427, "step": 329 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 2570.0, "completions/mean_length": 592.71875, "completions/mean_terminated_length": 565.1339111328125, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 0.0307999230001925, "grad_norm": 0.11700914055109024, "learning_rate": 2e-07, "loss": 0.0312, "num_tokens": 209822248.0, "reward": 0.6305803656578064, "reward_std": 0.22251734137535095, "rewards/simpleverify_reward/mean": 0.6305803656578064, "rewards/simpleverify_reward/std": 0.4829172194004059, "step": 330 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3476.0, "completions/mean_length": 590.6183471679688, "completions/mean_terminated_length": 534.9773559570312, "completions/min_length": 92.0, "completions/min_terminated_length": 92.0, "epoch": 0.030893256100193084, "grad_norm": 0.11191953718662262, "learning_rate": 2e-07, "loss": 0.0172, "num_tokens": 210437714.0, "reward": 0.5770089626312256, "reward_std": 0.20012785494327545, "rewards/simpleverify_reward/mean": 0.5770089030265808, "rewards/simpleverify_reward/std": 0.4943099319934845, "step": 331 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3260.0, "completions/mean_length": 621.302490234375, "completions/mean_terminated_length": 562.1419067382812, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 0.030986589200193666, "grad_norm": 0.11805511265993118, "learning_rate": 2e-07, "loss": 0.0398, "num_tokens": 211080641.0, "reward": 0.5636160969734192, "reward_std": 0.19997559487819672, "rewards/simpleverify_reward/mean": 0.5636160969734192, "rewards/simpleverify_reward/std": 0.49621346592903137, "step": 332 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3862.0, "completions/mean_length": 632.8772583007812, "completions/mean_terminated_length": 569.9113159179688, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 0.03107992230019425, "grad_norm": 0.11533884704113007, "learning_rate": 2e-07, "loss": 0.0293, "num_tokens": 211743427.0, "reward": 0.5613839626312256, "reward_std": 0.19794611632823944, "rewards/simpleverify_reward/mean": 0.5613839030265808, "rewards/simpleverify_reward/std": 0.496494859457016, "step": 333 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 4011.0, "completions/mean_length": 646.0089721679688, "completions/mean_terminated_length": 599.176513671875, "completions/min_length": 101.0, "completions/min_terminated_length": 101.0, "epoch": 0.031173255400194833, "grad_norm": 0.12304108589887619, "learning_rate": 2e-07, "loss": 0.0263, "num_tokens": 212408067.0, "reward": 0.5368303656578064, "reward_std": 0.22552113234996796, "rewards/simpleverify_reward/mean": 0.5368303656578064, "rewards/simpleverify_reward/std": 0.49892017245292664, "step": 334 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3951.0, "completions/mean_length": 648.7467041015625, "completions/mean_terminated_length": 590.0534057617188, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 0.03126658850019542, "grad_norm": 0.11291728913784027, "learning_rate": 2e-07, "loss": 0.0379, "num_tokens": 213085152.0, "reward": 0.5424107313156128, "reward_std": 0.18757833540439606, "rewards/simpleverify_reward/mean": 0.5424107313156128, "rewards/simpleverify_reward/std": 0.4984763562679291, "step": 335 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2461.0, "completions/mean_length": 639.927490234375, "completions/mean_terminated_length": 581.0840454101562, "completions/min_length": 76.0, "completions/min_terminated_length": 76.0, "epoch": 0.031359921600196, "grad_norm": 0.11926426738500595, "learning_rate": 2e-07, "loss": 0.0304, "num_tokens": 213746799.0, "reward": 0.543526828289032, "reward_std": 0.2364562451839447, "rewards/simpleverify_reward/mean": 0.5435267686843872, "rewards/simpleverify_reward/std": 0.49838003516197205, "step": 336 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3005.0, "completions/mean_length": 630.1428833007812, "completions/mean_terminated_length": 563.1126098632812, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 0.03145325470019658, "grad_norm": 0.12297292053699493, "learning_rate": 2e-07, "loss": 0.0434, "num_tokens": 214399919.0, "reward": 0.5758928656578064, "reward_std": 0.20249433815479279, "rewards/simpleverify_reward/mean": 0.5758928656578064, "rewards/simpleverify_reward/std": 0.49448272585868835, "step": 337 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3714.0, "completions/mean_length": 619.9241333007812, "completions/mean_terminated_length": 556.7227172851562, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 0.031546587800197166, "grad_norm": 0.1212683916091919, "learning_rate": 2e-07, "loss": 0.0155, "num_tokens": 215052155.0, "reward": 0.5133928656578064, "reward_std": 0.20136497914791107, "rewards/simpleverify_reward/mean": 0.5133928656578064, "rewards/simpleverify_reward/std": 0.500099778175354, "step": 338 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3924.0, "completions/mean_length": 591.0424194335938, "completions/mean_terminated_length": 527.31591796875, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 0.03163992090019775, "grad_norm": 0.11627273261547089, "learning_rate": 2e-07, "loss": 0.011, "num_tokens": 215667681.0, "reward": 0.5926339626312256, "reward_std": 0.17217306792736053, "rewards/simpleverify_reward/mean": 0.5926339030265808, "rewards/simpleverify_reward/std": 0.49161845445632935, "step": 339 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3023.0, "completions/mean_length": 588.5703125, "completions/mean_terminated_length": 540.9581909179688, "completions/min_length": 102.0, "completions/min_terminated_length": 102.0, "epoch": 0.03173325400019833, "grad_norm": 0.13151662051677704, "learning_rate": 2e-07, "loss": 0.0239, "num_tokens": 216291720.0, "reward": 0.5892857313156128, "reward_std": 0.22161510586738586, "rewards/simpleverify_reward/mean": 0.5892857313156128, "rewards/simpleverify_reward/std": 0.49223825335502625, "step": 340 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.024553571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3352.0, "completions/mean_length": 647.9765625, "completions/mean_terminated_length": 561.1842041015625, "completions/min_length": 117.0, "completions/min_terminated_length": 117.0, "epoch": 0.031826587100198915, "grad_norm": 0.11409888416528702, "learning_rate": 2e-07, "loss": 0.0237, "num_tokens": 216957787.0, "reward": 0.5223214626312256, "reward_std": 0.2000504583120346, "rewards/simpleverify_reward/mean": 0.5223214030265808, "rewards/simpleverify_reward/std": 0.49978047609329224, "step": 341 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3645.0, "completions/mean_length": 642.5904541015625, "completions/mean_terminated_length": 587.7744140625, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 0.0319199202001995, "grad_norm": 0.11551409959793091, "learning_rate": 2e-07, "loss": 0.0351, "num_tokens": 217622556.0, "reward": 0.527901828289032, "reward_std": 0.20639783143997192, "rewards/simpleverify_reward/mean": 0.5279017686843872, "rewards/simpleverify_reward/std": 0.49949970841407776, "step": 342 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3554.0, "completions/mean_length": 618.3203125, "completions/mean_terminated_length": 567.1200561523438, "completions/min_length": 103.0, "completions/min_terminated_length": 103.0, "epoch": 0.032013253300200085, "grad_norm": 0.1178201362490654, "learning_rate": 2e-07, "loss": 0.0023, "num_tokens": 218279035.0, "reward": 0.5412946939468384, "reward_std": 0.19674287736415863, "rewards/simpleverify_reward/mean": 0.5412946343421936, "rewards/simpleverify_reward/std": 0.49857014417648315, "step": 343 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 4023.0, "completions/mean_length": 611.3683471679688, "completions/mean_terminated_length": 583.9302978515625, "completions/min_length": 157.0, "completions/min_terminated_length": 157.0, "epoch": 0.032106586400200664, "grad_norm": 0.12041269987821579, "learning_rate": 2e-07, "loss": 0.0379, "num_tokens": 218906293.0, "reward": 0.5691964626312256, "reward_std": 0.21455349028110504, "rewards/simpleverify_reward/mean": 0.5691964030265808, "rewards/simpleverify_reward/std": 0.4954652786254883, "step": 344 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3509.0, "completions/mean_length": 629.0881958007812, "completions/mean_terminated_length": 574.057861328125, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 0.03219991950020125, "grad_norm": 0.13019467890262604, "learning_rate": 2e-07, "loss": 0.023, "num_tokens": 219562996.0, "reward": 0.5870535969734192, "reward_std": 0.205877885222435, "rewards/simpleverify_reward/mean": 0.5870535969734192, "rewards/simpleverify_reward/std": 0.49263837933540344, "step": 345 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2848.0, "completions/mean_length": 582.1217041015625, "completions/mean_terminated_length": 514.1626586914062, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 0.032293252600201834, "grad_norm": 0.1156524196267128, "learning_rate": 2e-07, "loss": 0.027, "num_tokens": 220174177.0, "reward": 0.4921875298023224, "reward_std": 0.16686852276325226, "rewards/simpleverify_reward/mean": 0.4921875, "rewards/simpleverify_reward/std": 0.5002182126045227, "step": 346 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2938.0, "completions/mean_length": 636.0279541015625, "completions/mean_terminated_length": 557.0330810546875, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 0.03238658570020242, "grad_norm": 0.11778948456048965, "learning_rate": 2e-07, "loss": 0.0257, "num_tokens": 220834770.0, "reward": 0.5424107313156128, "reward_std": 0.21158403158187866, "rewards/simpleverify_reward/mean": 0.5424107313156128, "rewards/simpleverify_reward/std": 0.4984763562679291, "step": 347 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3445.0, "completions/mean_length": 604.2366333007812, "completions/mean_terminated_length": 552.8289794921875, "completions/min_length": 42.0, "completions/min_terminated_length": 42.0, "epoch": 0.032479918800203, "grad_norm": 0.14455190300941467, "learning_rate": 2e-07, "loss": 0.0534, "num_tokens": 221458982.0, "reward": 0.6026785969734192, "reward_std": 0.24348536133766174, "rewards/simpleverify_reward/mean": 0.6026785969734192, "rewards/simpleverify_reward/std": 0.48961687088012695, "step": 348 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 4096.0, "completions/max_terminated_length": 2549.0, "completions/mean_length": 699.3783569335938, "completions/mean_terminated_length": 617.8594360351562, "completions/min_length": 6.0, "completions/min_terminated_length": 6.0, "epoch": 0.03257325190020358, "grad_norm": 0.10802318900823593, "learning_rate": 2e-07, "loss": 0.0302, "num_tokens": 222189569.0, "reward": 0.4933035969734192, "reward_std": 0.16871435940265656, "rewards/simpleverify_reward/mean": 0.4933035671710968, "rewards/simpleverify_reward/std": 0.5002344250679016, "step": 349 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2875.0, "completions/mean_length": 638.2455444335938, "completions/mean_terminated_length": 559.3013305664062, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 0.03266658500020417, "grad_norm": 0.1078496053814888, "learning_rate": 2e-07, "loss": 0.0223, "num_tokens": 222854157.0, "reward": 0.5401785969734192, "reward_std": 0.20733851194381714, "rewards/simpleverify_reward/mean": 0.5401785969734192, "rewards/simpleverify_reward/std": 0.49866142868995667, "step": 350 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4022.0, "completions/mean_length": 676.46875, "completions/mean_terminated_length": 606.364501953125, "completions/min_length": 116.0, "completions/min_terminated_length": 116.0, "epoch": 0.032759918100204746, "grad_norm": 0.10684423893690109, "learning_rate": 2e-07, "loss": 0.025, "num_tokens": 223561929.0, "reward": 0.512276828289032, "reward_std": 0.1731078028678894, "rewards/simpleverify_reward/mean": 0.5122767686843872, "rewards/simpleverify_reward/std": 0.500128448009491, "step": 351 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2652.0, "completions/mean_length": 603.3895263671875, "completions/mean_terminated_length": 555.978515625, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 0.03285325120020533, "grad_norm": 0.1365782916545868, "learning_rate": 2e-07, "loss": 0.0187, "num_tokens": 224193302.0, "reward": 0.6037946939468384, "reward_std": 0.24258677661418915, "rewards/simpleverify_reward/mean": 0.6037946343421936, "rewards/simpleverify_reward/std": 0.48938122391700745, "step": 352 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3217.0, "completions/mean_length": 610.0625, "completions/mean_terminated_length": 558.7406616210938, "completions/min_length": 94.0, "completions/min_terminated_length": 94.0, "epoch": 0.03294658430020592, "grad_norm": 0.13101808726787567, "learning_rate": 2e-07, "loss": 0.0334, "num_tokens": 224829046.0, "reward": 0.5625, "reward_std": 0.21940521895885468, "rewards/simpleverify_reward/mean": 0.5625, "rewards/simpleverify_reward/std": 0.49635544419288635, "step": 353 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3992.0, "completions/mean_length": 636.786865234375, "completions/mean_terminated_length": 597.7438354492188, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 0.0330399174002065, "grad_norm": 0.12078133225440979, "learning_rate": 2e-07, "loss": 0.0222, "num_tokens": 225489327.0, "reward": 0.5368303656578064, "reward_std": 0.20940300822257996, "rewards/simpleverify_reward/mean": 0.5368303656578064, "rewards/simpleverify_reward/std": 0.49892017245292664, "step": 354 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 2691.0, "completions/mean_length": 553.7589721679688, "completions/mean_terminated_length": 525.8673095703125, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 0.03313325050020708, "grad_norm": 0.12745332717895508, "learning_rate": 2e-07, "loss": 0.026, "num_tokens": 226068135.0, "reward": 0.6171875, "reward_std": 0.22353512048721313, "rewards/simpleverify_reward/mean": 0.6171875, "rewards/simpleverify_reward/std": 0.4863446056842804, "step": 355 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2309.0, "completions/mean_length": 618.7991333007812, "completions/mean_terminated_length": 575.5796508789062, "completions/min_length": 42.0, "completions/min_terminated_length": 42.0, "epoch": 0.033226583600207665, "grad_norm": 0.11167008429765701, "learning_rate": 2e-07, "loss": 0.0147, "num_tokens": 226714043.0, "reward": 0.5524553656578064, "reward_std": 0.19061489403247833, "rewards/simpleverify_reward/mean": 0.5524553656578064, "rewards/simpleverify_reward/std": 0.49751853942871094, "step": 356 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 4096.0, "completions/mean_length": 627.4252319335938, "completions/mean_terminated_length": 576.3590087890625, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 0.03331991670020825, "grad_norm": 0.11163613945245743, "learning_rate": 2e-07, "loss": 0.0218, "num_tokens": 227356312.0, "reward": 0.5881696939468384, "reward_std": 0.179983988404274, "rewards/simpleverify_reward/mean": 0.5881696343421936, "rewards/simpleverify_reward/std": 0.4924396276473999, "step": 357 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3501.0, "completions/mean_length": 585.3125, "completions/mean_terminated_length": 541.6768188476562, "completions/min_length": 79.0, "completions/min_terminated_length": 79.0, "epoch": 0.033413249800208836, "grad_norm": 0.1179451048374176, "learning_rate": 2e-07, "loss": 0.0289, "num_tokens": 227979384.0, "reward": 0.5491071939468384, "reward_std": 0.18896450102329254, "rewards/simpleverify_reward/mean": 0.5491071343421936, "rewards/simpleverify_reward/std": 0.49786055088043213, "step": 358 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3898.0, "completions/mean_length": 625.2154541015625, "completions/mean_terminated_length": 554.0603637695312, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 0.033506582900209414, "grad_norm": 0.12194036692380905, "learning_rate": 2e-07, "loss": 0.009, "num_tokens": 228624513.0, "reward": 0.6395089626312256, "reward_std": 0.20624810457229614, "rewards/simpleverify_reward/mean": 0.6395089030265808, "rewards/simpleverify_reward/std": 0.4804111123085022, "step": 359 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3665.0, "completions/mean_length": 549.2020263671875, "completions/mean_terminated_length": 517.2489013671875, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 0.03359991600021, "grad_norm": 0.12819644808769226, "learning_rate": 2e-07, "loss": 0.0136, "num_tokens": 229201094.0, "reward": 0.6082589626312256, "reward_std": 0.19355478882789612, "rewards/simpleverify_reward/mean": 0.6082589030265808, "rewards/simpleverify_reward/std": 0.4884119927883148, "step": 360 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3068.0, "completions/mean_length": 668.5335083007812, "completions/mean_terminated_length": 594.2781982421875, "completions/min_length": 166.0, "completions/min_terminated_length": 166.0, "epoch": 0.033693249100210584, "grad_norm": 0.11762277781963348, "learning_rate": 2e-07, "loss": 0.0488, "num_tokens": 229886228.0, "reward": 0.546875, "reward_std": 0.195279061794281, "rewards/simpleverify_reward/mean": 0.546875, "rewards/simpleverify_reward/std": 0.4980759024620056, "step": 361 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3836.0, "completions/mean_length": 640.9721069335938, "completions/mean_terminated_length": 582.1464233398438, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 0.03378658220021117, "grad_norm": 0.12857232987880707, "learning_rate": 2e-07, "loss": 0.0073, "num_tokens": 230551875.0, "reward": 0.4877232313156128, "reward_std": 0.23398281633853912, "rewards/simpleverify_reward/mean": 0.4877232015132904, "rewards/simpleverify_reward/std": 0.500128448009491, "step": 362 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3864.0, "completions/mean_length": 645.7310791015625, "completions/mean_terminated_length": 594.934326171875, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 0.03387991530021175, "grad_norm": 0.12041091918945312, "learning_rate": 2e-07, "loss": 0.0375, "num_tokens": 231221266.0, "reward": 0.535714328289032, "reward_std": 0.1841956079006195, "rewards/simpleverify_reward/mean": 0.5357142686843872, "rewards/simpleverify_reward/std": 0.4990014135837555, "step": 363 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3901.0, "completions/mean_length": 603.4910888671875, "completions/mean_terminated_length": 539.9909057617188, "completions/min_length": 118.0, "completions/min_terminated_length": 118.0, "epoch": 0.03397324840021233, "grad_norm": 0.11769693344831467, "learning_rate": 2e-07, "loss": 0.0287, "num_tokens": 231845914.0, "reward": 0.5892857313156128, "reward_std": 0.2105405032634735, "rewards/simpleverify_reward/mean": 0.5892857313156128, "rewards/simpleverify_reward/std": 0.49223825335502625, "step": 364 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.005580357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3906.0, "completions/mean_length": 545.7210083007812, "completions/mean_terminated_length": 525.7979736328125, "completions/min_length": 133.0, "completions/min_terminated_length": 133.0, "epoch": 0.03406658150021292, "grad_norm": 0.129452645778656, "learning_rate": 2e-07, "loss": 0.0324, "num_tokens": 232418448.0, "reward": 0.6283482313156128, "reward_std": 0.20184919238090515, "rewards/simpleverify_reward/mean": 0.6283482313156128, "rewards/simpleverify_reward/std": 0.4835159480571747, "step": 365 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3574.0, "completions/mean_length": 601.1361694335938, "completions/mean_terminated_length": 561.6907348632812, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 0.034159914600213497, "grad_norm": 0.12081297487020493, "learning_rate": 2e-07, "loss": 0.0292, "num_tokens": 233049658.0, "reward": 0.520089328289032, "reward_std": 0.20196372270584106, "rewards/simpleverify_reward/mean": 0.5200892686843872, "rewards/simpleverify_reward/std": 0.4998753070831299, "step": 366 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3753.0, "completions/mean_length": 620.8527221679688, "completions/mean_terminated_length": 581.6298217773438, "completions/min_length": 164.0, "completions/min_terminated_length": 164.0, "epoch": 0.03425324770021408, "grad_norm": 0.11100828647613525, "learning_rate": 2e-07, "loss": 0.0179, "num_tokens": 233697102.0, "reward": 0.4933035969734192, "reward_std": 0.19700434803962708, "rewards/simpleverify_reward/mean": 0.4933035671710968, "rewards/simpleverify_reward/std": 0.5002344250679016, "step": 367 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3903.0, "completions/mean_length": 625.7745971679688, "completions/mean_terminated_length": 574.6840209960938, "completions/min_length": 122.0, "completions/min_terminated_length": 122.0, "epoch": 0.03434658080021467, "grad_norm": 0.11460910737514496, "learning_rate": 2e-07, "loss": 0.027, "num_tokens": 234345868.0, "reward": 0.546875, "reward_std": 0.20095311105251312, "rewards/simpleverify_reward/mean": 0.546875, "rewards/simpleverify_reward/std": 0.4980759024620056, "step": 368 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3440.0, "completions/mean_length": 593.9375, "completions/mean_terminated_length": 534.31103515625, "completions/min_length": 115.0, "completions/min_terminated_length": 115.0, "epoch": 0.03443991390021525, "grad_norm": 0.11281440407037735, "learning_rate": 2e-07, "loss": 0.0449, "num_tokens": 234966412.0, "reward": 0.5837053656578064, "reward_std": 0.1762627214193344, "rewards/simpleverify_reward/mean": 0.5837053656578064, "rewards/simpleverify_reward/std": 0.49321892857551575, "step": 369 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3516.0, "completions/mean_length": 637.96875, "completions/mean_terminated_length": 579.0919799804688, "completions/min_length": 4.0, "completions/min_terminated_length": 4.0, "epoch": 0.03453324700021583, "grad_norm": 0.10753630101680756, "learning_rate": 2e-07, "loss": 0.0096, "num_tokens": 235620832.0, "reward": 0.5290178656578064, "reward_std": 0.168148010969162, "rewards/simpleverify_reward/mean": 0.5290178656578064, "rewards/simpleverify_reward/std": 0.49943605065345764, "step": 370 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3921.0, "completions/mean_length": 645.1897583007812, "completions/mean_terminated_length": 606.2415771484375, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 0.034626580100216416, "grad_norm": 0.11359869688749313, "learning_rate": 2e-07, "loss": 0.022, "num_tokens": 236280506.0, "reward": 0.5580357313156128, "reward_std": 0.1940758377313614, "rewards/simpleverify_reward/mean": 0.5580357313156128, "rewards/simpleverify_reward/std": 0.49689781665802, "step": 371 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.005580357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2040.0, "completions/mean_length": 539.7020263671875, "completions/mean_terminated_length": 519.7452392578125, "completions/min_length": 137.0, "completions/min_terminated_length": 137.0, "epoch": 0.034719913200217, "grad_norm": 0.13594967126846313, "learning_rate": 2e-07, "loss": 0.0096, "num_tokens": 236845831.0, "reward": 0.6361607313156128, "reward_std": 0.20670275390148163, "rewards/simpleverify_reward/mean": 0.6361607313156128, "rewards/simpleverify_reward/std": 0.4813718795776367, "step": 372 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3447.0, "completions/mean_length": 644.875, "completions/mean_terminated_length": 570.107177734375, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 0.034813246300217586, "grad_norm": 0.11896143853664398, "learning_rate": 2e-07, "loss": 0.0294, "num_tokens": 237529927.0, "reward": 0.4754464626312256, "reward_std": 0.1999749094247818, "rewards/simpleverify_reward/mean": 0.4754464328289032, "rewards/simpleverify_reward/std": 0.4996756315231323, "step": 373 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2234.0, "completions/mean_length": 607.2467041015625, "completions/mean_terminated_length": 555.8833618164062, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 0.034906579400218164, "grad_norm": 0.12993580102920532, "learning_rate": 2e-07, "loss": 0.033, "num_tokens": 238153180.0, "reward": 0.59375, "reward_std": 0.22980578243732452, "rewards/simpleverify_reward/mean": 0.59375, "rewards/simpleverify_reward/std": 0.4914066195487976, "step": 374 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3041.0, "completions/mean_length": 597.638427734375, "completions/mean_terminated_length": 529.9794921875, "completions/min_length": 118.0, "completions/min_terminated_length": 118.0, "epoch": 0.03499991250021875, "grad_norm": 0.12314239889383316, "learning_rate": 2e-07, "loss": 0.0219, "num_tokens": 238785248.0, "reward": 0.5691964626312256, "reward_std": 0.19738410413265228, "rewards/simpleverify_reward/mean": 0.5691964030265808, "rewards/simpleverify_reward/std": 0.4954652786254883, "step": 375 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 2365.0, "completions/mean_length": 542.2913208007812, "completions/mean_terminated_length": 514.3093872070312, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 0.035093245600219335, "grad_norm": 0.13315580785274506, "learning_rate": 2e-07, "loss": 0.0243, "num_tokens": 239365637.0, "reward": 0.6328125, "reward_std": 0.19918283820152283, "rewards/simpleverify_reward/mean": 0.6328125, "rewards/simpleverify_reward/std": 0.48230743408203125, "step": 376 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3620.0, "completions/mean_length": 597.3359375, "completions/mean_terminated_length": 549.8427734375, "completions/min_length": 162.0, "completions/min_terminated_length": 162.0, "epoch": 0.03518657870021991, "grad_norm": 0.12454646080732346, "learning_rate": 2e-07, "loss": 0.0192, "num_tokens": 239981106.0, "reward": 0.559151828289032, "reward_std": 0.21550032496452332, "rewards/simpleverify_reward/mean": 0.5591517686843872, "rewards/simpleverify_reward/std": 0.496766060590744, "step": 377 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 4026.0, "completions/mean_length": 641.6986694335938, "completions/mean_terminated_length": 598.7638549804688, "completions/min_length": 4.0, "completions/min_terminated_length": 4.0, "epoch": 0.0352799118002205, "grad_norm": 0.127675399184227, "learning_rate": 2e-07, "loss": 0.0358, "num_tokens": 240649660.0, "reward": 0.5111607313156128, "reward_std": 0.2328135073184967, "rewards/simpleverify_reward/mean": 0.5111607313156128, "rewards/simpleverify_reward/std": 0.5001546144485474, "step": 378 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3290.0, "completions/mean_length": 615.8560791015625, "completions/mean_terminated_length": 572.6000366210938, "completions/min_length": 75.0, "completions/min_terminated_length": 75.0, "epoch": 0.03537324490022108, "grad_norm": 0.12915608286857605, "learning_rate": 2e-07, "loss": 0.0225, "num_tokens": 241283739.0, "reward": 0.5535714626312256, "reward_std": 0.21485769748687744, "rewards/simpleverify_reward/mean": 0.5535714030265808, "rewards/simpleverify_reward/std": 0.4973995089530945, "step": 379 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3203.0, "completions/mean_length": 611.125, "completions/mean_terminated_length": 555.8095092773438, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 0.03546657800022167, "grad_norm": 0.11892613023519516, "learning_rate": 2e-07, "loss": 0.0291, "num_tokens": 241924723.0, "reward": 0.5691964626312256, "reward_std": 0.20057222247123718, "rewards/simpleverify_reward/mean": 0.5691964030265808, "rewards/simpleverify_reward/std": 0.4954652488231659, "step": 380 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0267857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3878.0, "completions/mean_length": 633.2801513671875, "completions/mean_terminated_length": 537.9758911132812, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 0.03555991110022225, "grad_norm": 0.11406309902667999, "learning_rate": 2e-07, "loss": 0.0385, "num_tokens": 242578966.0, "reward": 0.598214328289032, "reward_std": 0.1939634382724762, "rewards/simpleverify_reward/mean": 0.5982142686843872, "rewards/simpleverify_reward/std": 0.49053287506103516, "step": 381 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 4043.0, "completions/mean_length": 623.4475708007812, "completions/mean_terminated_length": 568.3276977539062, "completions/min_length": 85.0, "completions/min_terminated_length": 85.0, "epoch": 0.03565324420022283, "grad_norm": 0.1302098035812378, "learning_rate": 2e-07, "loss": 0.036, "num_tokens": 243224551.0, "reward": 0.527901828289032, "reward_std": 0.24006874859333038, "rewards/simpleverify_reward/mean": 0.5279017686843872, "rewards/simpleverify_reward/std": 0.49949970841407776, "step": 382 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3817.0, "completions/mean_length": 587.0100708007812, "completions/mean_terminated_length": 547.4052124023438, "completions/min_length": 107.0, "completions/min_terminated_length": 107.0, "epoch": 0.03574657730022342, "grad_norm": 0.1196148693561554, "learning_rate": 2e-07, "loss": 0.0102, "num_tokens": 243845032.0, "reward": 0.5915178656578064, "reward_std": 0.1888582408428192, "rewards/simpleverify_reward/mean": 0.5915178656578064, "rewards/simpleverify_reward/std": 0.49182769656181335, "step": 383 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3744.0, "completions/mean_length": 647.6953125, "completions/mean_terminated_length": 596.927490234375, "completions/min_length": 158.0, "completions/min_terminated_length": 158.0, "epoch": 0.035839910400224, "grad_norm": 0.1145508885383606, "learning_rate": 2e-07, "loss": 0.0147, "num_tokens": 244520607.0, "reward": 0.5189732313156128, "reward_std": 0.19305625557899475, "rewards/simpleverify_reward/mean": 0.5189732313156128, "rewards/simpleverify_reward/std": 0.49991893768310547, "step": 384 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3821.0, "completions/mean_length": 659.2846069335938, "completions/mean_terminated_length": 600.770751953125, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 0.03593324350022458, "grad_norm": 0.10823110491037369, "learning_rate": 2e-07, "loss": 0.0434, "num_tokens": 245206102.0, "reward": 0.504464328289032, "reward_std": 0.1890425831079483, "rewards/simpleverify_reward/mean": 0.5044642686843872, "rewards/simpleverify_reward/std": 0.5002593398094177, "step": 385 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3713.0, "completions/mean_length": 613.5803833007812, "completions/mean_terminated_length": 574.275390625, "completions/min_length": 120.0, "completions/min_terminated_length": 120.0, "epoch": 0.036026576600225166, "grad_norm": 0.11951193958520889, "learning_rate": 2e-07, "loss": 0.0215, "num_tokens": 245841342.0, "reward": 0.5613839626312256, "reward_std": 0.18528710305690765, "rewards/simpleverify_reward/mean": 0.5613839030265808, "rewards/simpleverify_reward/std": 0.496494859457016, "step": 386 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3800.0, "completions/mean_length": 572.9107666015625, "completions/mean_terminated_length": 545.1698608398438, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 0.03611990970022575, "grad_norm": 0.11949802935123444, "learning_rate": 2e-07, "loss": 0.0095, "num_tokens": 246442606.0, "reward": 0.5959821939468384, "reward_std": 0.17713145911693573, "rewards/simpleverify_reward/mean": 0.5959821343421936, "rewards/simpleverify_reward/std": 0.490975022315979, "step": 387 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3528.0, "completions/mean_length": 622.3995971679688, "completions/mean_terminated_length": 567.2630615234375, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 0.03621324280022633, "grad_norm": 0.12519963085651398, "learning_rate": 2e-07, "loss": 0.0457, "num_tokens": 247093908.0, "reward": 0.5625, "reward_std": 0.21838465332984924, "rewards/simpleverify_reward/mean": 0.5625, "rewards/simpleverify_reward/std": 0.49635544419288635, "step": 388 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 2839.0, "completions/mean_length": 566.5881958007812, "completions/mean_terminated_length": 526.7528076171875, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 0.036306575900226914, "grad_norm": 0.13447214663028717, "learning_rate": 2e-07, "loss": 0.02, "num_tokens": 247694971.0, "reward": 0.5491071939468384, "reward_std": 0.22030283510684967, "rewards/simpleverify_reward/mean": 0.5491071343421936, "rewards/simpleverify_reward/std": 0.49786055088043213, "step": 389 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.024553571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3600.0, "completions/mean_length": 684.9888916015625, "completions/mean_terminated_length": 599.1281127929688, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 0.0363999090002275, "grad_norm": 0.11758590489625931, "learning_rate": 2e-07, "loss": 0.0178, "num_tokens": 248399153.0, "reward": 0.5524553656578064, "reward_std": 0.20857815444469452, "rewards/simpleverify_reward/mean": 0.5524553656578064, "rewards/simpleverify_reward/std": 0.49751853942871094, "step": 390 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3412.0, "completions/mean_length": 613.9464721679688, "completions/mean_terminated_length": 546.6029663085938, "completions/min_length": 119.0, "completions/min_terminated_length": 119.0, "epoch": 0.036493242100228085, "grad_norm": 0.12281692773103714, "learning_rate": 2e-07, "loss": 0.0527, "num_tokens": 249026697.0, "reward": 0.590401828289032, "reward_std": 0.20414192974567413, "rewards/simpleverify_reward/mean": 0.5904017686843872, "rewards/simpleverify_reward/std": 0.49203425645828247, "step": 391 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 1998.0, "completions/mean_length": 577.21875, "completions/mean_terminated_length": 545.5180053710938, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 0.03658657520022866, "grad_norm": 0.11935383826494217, "learning_rate": 2e-07, "loss": 0.0158, "num_tokens": 249620813.0, "reward": 0.6194196939468384, "reward_std": 0.18904046714305878, "rewards/simpleverify_reward/mean": 0.6194196343421936, "rewards/simpleverify_reward/std": 0.48580074310302734, "step": 392 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2237.0, "completions/mean_length": 652.7545166015625, "completions/mean_terminated_length": 598.0997924804688, "completions/min_length": 28.0, "completions/min_terminated_length": 28.0, "epoch": 0.03667990830022925, "grad_norm": 0.13890476524829865, "learning_rate": 2e-07, "loss": 0.0208, "num_tokens": 250305993.0, "reward": 0.5167410969734192, "reward_std": 0.23578616976737976, "rewards/simpleverify_reward/mean": 0.5167410969734192, "rewards/simpleverify_reward/std": 0.4999987483024597, "step": 393 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 2794.0, "completions/mean_length": 579.911865234375, "completions/mean_terminated_length": 552.2261352539062, "completions/min_length": 115.0, "completions/min_terminated_length": 115.0, "epoch": 0.036773241400229834, "grad_norm": 0.12993817031383514, "learning_rate": 2e-07, "loss": 0.0231, "num_tokens": 250917914.0, "reward": 0.5658482313156128, "reward_std": 0.23601117730140686, "rewards/simpleverify_reward/mean": 0.5658482313156128, "rewards/simpleverify_reward/std": 0.49592188000679016, "step": 394 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 2314.0, "completions/mean_length": 593.5714721679688, "completions/mean_terminated_length": 554.0406494140625, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 0.03686657450023042, "grad_norm": 0.11943305283784866, "learning_rate": 2e-07, "loss": 0.0244, "num_tokens": 251530826.0, "reward": 0.6350446939468384, "reward_std": 0.2246951460838318, "rewards/simpleverify_reward/mean": 0.6350446343421936, "rewards/simpleverify_reward/std": 0.4816865026950836, "step": 395 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3128.0, "completions/mean_length": 629.7433471679688, "completions/mean_terminated_length": 562.705322265625, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 0.036959907600231, "grad_norm": 0.12212429195642471, "learning_rate": 2e-07, "loss": 0.0406, "num_tokens": 252188636.0, "reward": 0.5691964626312256, "reward_std": 0.21315565705299377, "rewards/simpleverify_reward/mean": 0.5691964030265808, "rewards/simpleverify_reward/std": 0.4954652488231659, "step": 396 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2880.0, "completions/mean_length": 615.9553833007812, "completions/mean_terminated_length": 544.6104736328125, "completions/min_length": 111.0, "completions/min_terminated_length": 111.0, "epoch": 0.03705324070023158, "grad_norm": 0.12034222483634949, "learning_rate": 2e-07, "loss": 0.0136, "num_tokens": 252832380.0, "reward": 0.5625, "reward_std": 0.20275254547595978, "rewards/simpleverify_reward/mean": 0.5625, "rewards/simpleverify_reward/std": 0.49635544419288635, "step": 397 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3097.0, "completions/mean_length": 633.0725708007812, "completions/mean_terminated_length": 566.0989379882812, "completions/min_length": 11.0, "completions/min_terminated_length": 11.0, "epoch": 0.03714657380023217, "grad_norm": 0.11985801160335541, "learning_rate": 2e-07, "loss": 0.0252, "num_tokens": 253486469.0, "reward": 0.5491071939468384, "reward_std": 0.22762800753116608, "rewards/simpleverify_reward/mean": 0.5491071343421936, "rewards/simpleverify_reward/std": 0.49786055088043213, "step": 398 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 1899.0, "completions/mean_length": 580.466552734375, "completions/mean_terminated_length": 552.78515625, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 0.03723990690023275, "grad_norm": 0.13493165373802185, "learning_rate": 2e-07, "loss": 0.0248, "num_tokens": 254087799.0, "reward": 0.5714285969734192, "reward_std": 0.2492361068725586, "rewards/simpleverify_reward/mean": 0.5714285969734192, "rewards/simpleverify_reward/std": 0.49514803290367126, "step": 399 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3915.0, "completions/mean_length": 601.4967041015625, "completions/mean_terminated_length": 550.0487060546875, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 0.03733324000023333, "grad_norm": 0.13114814460277557, "learning_rate": 2e-07, "loss": 0.0273, "num_tokens": 254713996.0, "reward": 0.590401828289032, "reward_std": 0.225184828042984, "rewards/simpleverify_reward/mean": 0.5904017686843872, "rewards/simpleverify_reward/std": 0.49203425645828247, "step": 400 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2844.0, "completions/mean_length": 623.2611694335938, "completions/mean_terminated_length": 576.1199340820312, "completions/min_length": 105.0, "completions/min_terminated_length": 105.0, "epoch": 0.037426573100233916, "grad_norm": 0.12361971288919449, "learning_rate": 2e-07, "loss": 0.0207, "num_tokens": 255361222.0, "reward": 0.5145089626312256, "reward_std": 0.24314947426319122, "rewards/simpleverify_reward/mean": 0.5145089030265808, "rewards/simpleverify_reward/std": 0.5000685453414917, "step": 401 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 4052.0, "completions/mean_length": 598.7098388671875, "completions/mean_terminated_length": 567.2026977539062, "completions/min_length": 162.0, "completions/min_terminated_length": 162.0, "epoch": 0.0375199062002345, "grad_norm": 0.12858903408050537, "learning_rate": 2e-07, "loss": 0.0456, "num_tokens": 255983666.0, "reward": 0.5803571939468384, "reward_std": 0.24134711921215057, "rewards/simpleverify_reward/mean": 0.5803571343421936, "rewards/simpleverify_reward/std": 0.4937761425971985, "step": 402 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 2457.0, "completions/mean_length": 570.5546875, "completions/mean_terminated_length": 542.7952880859375, "completions/min_length": 24.0, "completions/min_terminated_length": 24.0, "epoch": 0.03761323930023508, "grad_norm": 0.13296374678611755, "learning_rate": 2e-07, "loss": 0.0118, "num_tokens": 256594819.0, "reward": 0.5334821939468384, "reward_std": 0.2088077813386917, "rewards/simpleverify_reward/mean": 0.5334821343421936, "rewards/simpleverify_reward/std": 0.49915632605552673, "step": 403 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2252.0, "completions/mean_length": 613.0792846679688, "completions/mean_terminated_length": 561.8018188476562, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 0.037706572400235665, "grad_norm": 0.13253577053546906, "learning_rate": 2e-07, "loss": 0.022, "num_tokens": 257234538.0, "reward": 0.5379464626312256, "reward_std": 0.23570625483989716, "rewards/simpleverify_reward/mean": 0.5379464030265808, "rewards/simpleverify_reward/std": 0.4988364279270172, "step": 404 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2303.0, "completions/mean_length": 632.2801513671875, "completions/mean_terminated_length": 577.3004760742188, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 0.03779990550023625, "grad_norm": 0.13328397274017334, "learning_rate": 2e-07, "loss": 0.0225, "num_tokens": 257894781.0, "reward": 0.5424107313156128, "reward_std": 0.2489316165447235, "rewards/simpleverify_reward/mean": 0.5424107313156128, "rewards/simpleverify_reward/std": 0.4984763264656067, "step": 405 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3807.0, "completions/mean_length": 613.2366333007812, "completions/mean_terminated_length": 549.9136352539062, "completions/min_length": 98.0, "completions/min_terminated_length": 98.0, "epoch": 0.037893238600236835, "grad_norm": 0.11163464933633804, "learning_rate": 2e-07, "loss": 0.0253, "num_tokens": 258535249.0, "reward": 0.5859375, "reward_std": 0.18239323794841766, "rewards/simpleverify_reward/mean": 0.5859375, "rewards/simpleverify_reward/std": 0.4928344786167145, "step": 406 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2798.0, "completions/mean_length": 642.3917846679688, "completions/mean_terminated_length": 571.5888671875, "completions/min_length": 122.0, "completions/min_terminated_length": 122.0, "epoch": 0.03798657170023741, "grad_norm": 0.10799577832221985, "learning_rate": 2e-07, "loss": 0.0076, "num_tokens": 259200656.0, "reward": 0.5133928656578064, "reward_std": 0.18866391479969025, "rewards/simpleverify_reward/mean": 0.5133928656578064, "rewards/simpleverify_reward/std": 0.500099778175354, "step": 407 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3612.0, "completions/mean_length": 627.4933471679688, "completions/mean_terminated_length": 596.2454833984375, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 0.038079904800238, "grad_norm": 0.10824349522590637, "learning_rate": 2e-07, "loss": 0.0107, "num_tokens": 259861826.0, "reward": 0.5379464626312256, "reward_std": 0.1925348937511444, "rewards/simpleverify_reward/mean": 0.5379464030265808, "rewards/simpleverify_reward/std": 0.4988364577293396, "step": 408 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3415.0, "completions/mean_length": 629.372802734375, "completions/mean_terminated_length": 602.0765380859375, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 0.038173237900238584, "grad_norm": 0.11919102817773819, "learning_rate": 2e-07, "loss": 0.0262, "num_tokens": 260510584.0, "reward": 0.5491071939468384, "reward_std": 0.20372965931892395, "rewards/simpleverify_reward/mean": 0.5491071343421936, "rewards/simpleverify_reward/std": 0.49786055088043213, "step": 409 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 4076.0, "completions/mean_length": 642.3326416015625, "completions/mean_terminated_length": 595.4502563476562, "completions/min_length": 122.0, "completions/min_terminated_length": 122.0, "epoch": 0.03826657100023917, "grad_norm": 0.127053901553154, "learning_rate": 2e-07, "loss": 0.0192, "num_tokens": 261174154.0, "reward": 0.5390625, "reward_std": 0.23961368203163147, "rewards/simpleverify_reward/mean": 0.5390625, "rewards/simpleverify_reward/std": 0.4987502098083496, "step": 410 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 4053.0, "completions/mean_length": 557.8170166015625, "completions/mean_terminated_length": 521.9165649414062, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 0.03835990410023975, "grad_norm": 0.14120900630950928, "learning_rate": 2e-07, "loss": 0.0288, "num_tokens": 261763950.0, "reward": 0.5602678656578064, "reward_std": 0.2414223849773407, "rewards/simpleverify_reward/mean": 0.5602678656578064, "rewards/simpleverify_reward/std": 0.4966317415237427, "step": 411 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2870.0, "completions/mean_length": 589.4230346679688, "completions/mean_terminated_length": 533.7630615234375, "completions/min_length": 25.0, "completions/min_terminated_length": 25.0, "epoch": 0.03845323720024033, "grad_norm": 0.13012653589248657, "learning_rate": 2e-07, "loss": 0.0351, "num_tokens": 262389929.0, "reward": 0.5959821939468384, "reward_std": 0.23168332874774933, "rewards/simpleverify_reward/mean": 0.5959821343421936, "rewards/simpleverify_reward/std": 0.490975022315979, "step": 412 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2672.0, "completions/mean_length": 591.3404541015625, "completions/mean_terminated_length": 547.7796630859375, "completions/min_length": 109.0, "completions/min_terminated_length": 109.0, "epoch": 0.03854657030024092, "grad_norm": 0.11305096000432968, "learning_rate": 2e-07, "loss": 0.0296, "num_tokens": 263007122.0, "reward": 0.5725446939468384, "reward_std": 0.1773906648159027, "rewards/simpleverify_reward/mean": 0.5725446343421936, "rewards/simpleverify_reward/std": 0.49498558044433594, "step": 413 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3755.0, "completions/mean_length": 662.0402221679688, "completions/mean_terminated_length": 603.5732421875, "completions/min_length": 137.0, "completions/min_terminated_length": 137.0, "epoch": 0.038639903400241496, "grad_norm": 0.1126675084233284, "learning_rate": 2e-07, "loss": 0.0223, "num_tokens": 263694358.0, "reward": 0.4988839626312256, "reward_std": 0.20947858691215515, "rewards/simpleverify_reward/mean": 0.4988839328289032, "rewards/simpleverify_reward/std": 0.5002779960632324, "step": 414 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2664.0, "completions/mean_length": 633.8538208007812, "completions/mean_terminated_length": 586.8563842773438, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 0.03873323650024208, "grad_norm": 0.11877680569887161, "learning_rate": 2e-07, "loss": 0.0391, "num_tokens": 264342411.0, "reward": 0.5859375, "reward_std": 0.2268359214067459, "rewards/simpleverify_reward/mean": 0.5859375, "rewards/simpleverify_reward/std": 0.4928344786167145, "step": 415 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3732.0, "completions/mean_length": 655.318115234375, "completions/mean_terminated_length": 600.7041015625, "completions/min_length": 98.0, "completions/min_terminated_length": 98.0, "epoch": 0.038826569600242666, "grad_norm": 0.12163511663675308, "learning_rate": 2e-07, "loss": 0.0236, "num_tokens": 265023448.0, "reward": 0.5100446939468384, "reward_std": 0.21350152790546417, "rewards/simpleverify_reward/mean": 0.5100446343421936, "rewards/simpleverify_reward/std": 0.5001782774925232, "step": 416 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2697.0, "completions/mean_length": 608.2154541015625, "completions/mean_terminated_length": 548.83203125, "completions/min_length": 113.0, "completions/min_terminated_length": 113.0, "epoch": 0.03891990270024325, "grad_norm": 0.1318332850933075, "learning_rate": 2e-07, "loss": 0.0048, "num_tokens": 265657625.0, "reward": 0.5758928656578064, "reward_std": 0.22894887626171112, "rewards/simpleverify_reward/mean": 0.5758928656578064, "rewards/simpleverify_reward/std": 0.49448275566101074, "step": 417 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 4096.0, "completions/max_terminated_length": 2882.0, "completions/mean_length": 654.6127319335938, "completions/mean_terminated_length": 572.0194091796875, "completions/min_length": 81.0, "completions/min_terminated_length": 81.0, "epoch": 0.03901323580024383, "grad_norm": 0.11921422928571701, "learning_rate": 2e-07, "loss": 0.034, "num_tokens": 266328942.0, "reward": 0.5502232313156128, "reward_std": 0.219589963555336, "rewards/simpleverify_reward/mean": 0.5502232313156128, "rewards/simpleverify_reward/std": 0.49774909019470215, "step": 418 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2200.0, "completions/mean_length": 633.4498291015625, "completions/mean_terminated_length": 566.4834594726562, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 0.039106568900244415, "grad_norm": 0.12059503048658371, "learning_rate": 2e-07, "loss": 0.0128, "num_tokens": 266984345.0, "reward": 0.5167410969734192, "reward_std": 0.19652535021305084, "rewards/simpleverify_reward/mean": 0.5167410969734192, "rewards/simpleverify_reward/std": 0.4999987483024597, "step": 419 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2700.0, "completions/mean_length": 571.4397583007812, "completions/mean_terminated_length": 527.6316528320312, "completions/min_length": 121.0, "completions/min_terminated_length": 121.0, "epoch": 0.039199902000245, "grad_norm": 0.1276574283838272, "learning_rate": 2e-07, "loss": 0.0139, "num_tokens": 267580307.0, "reward": 0.5758928656578064, "reward_std": 0.20046092569828033, "rewards/simpleverify_reward/mean": 0.5758928656578064, "rewards/simpleverify_reward/std": 0.49448272585868835, "step": 420 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3951.0, "completions/mean_length": 681.7455444335938, "completions/mean_terminated_length": 643.2099609375, "completions/min_length": 158.0, "completions/min_terminated_length": 158.0, "epoch": 0.039293235100245585, "grad_norm": 0.11714023351669312, "learning_rate": 2e-07, "loss": 0.012, "num_tokens": 268289431.0, "reward": 0.4609375298023224, "reward_std": 0.2174505889415741, "rewards/simpleverify_reward/mean": 0.4609375, "rewards/simpleverify_reward/std": 0.4987502098083496, "step": 421 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2351.0, "completions/mean_length": 604.0926513671875, "completions/mean_terminated_length": 560.6904296875, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 0.039386568200246164, "grad_norm": 0.12781652808189392, "learning_rate": 2e-07, "loss": 0.0216, "num_tokens": 268922498.0, "reward": 0.5479910969734192, "reward_std": 0.21496829390525818, "rewards/simpleverify_reward/mean": 0.5479910969734192, "rewards/simpleverify_reward/std": 0.49796950817108154, "step": 422 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3330.0, "completions/mean_length": 639.5033569335938, "completions/mean_terminated_length": 576.657958984375, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 0.03947990130024675, "grad_norm": 0.12274286150932312, "learning_rate": 2e-07, "loss": 0.0221, "num_tokens": 269582061.0, "reward": 0.5770089626312256, "reward_std": 0.22620512545108795, "rewards/simpleverify_reward/mean": 0.5770089030265808, "rewards/simpleverify_reward/std": 0.4943099319934845, "step": 423 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 2296.0, "completions/mean_length": 593.060302734375, "completions/mean_terminated_length": 553.5237426757812, "completions/min_length": 99.0, "completions/min_terminated_length": 99.0, "epoch": 0.039573234400247334, "grad_norm": 0.12744764983654022, "learning_rate": 2e-07, "loss": 0.0281, "num_tokens": 270203035.0, "reward": 0.6149553656578064, "reward_std": 0.21553239226341248, "rewards/simpleverify_reward/mean": 0.6149553656578064, "rewards/simpleverify_reward/std": 0.4868776500225067, "step": 424 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2315.0, "completions/mean_length": 627.9799194335938, "completions/mean_terminated_length": 560.9078369140625, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 0.03966656750024792, "grad_norm": 0.10817217081785202, "learning_rate": 2e-07, "loss": 0.012, "num_tokens": 270853249.0, "reward": 0.5446428656578064, "reward_std": 0.17998720705509186, "rewards/simpleverify_reward/mean": 0.5446428656578064, "rewards/simpleverify_reward/std": 0.4982811510562897, "step": 425 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 4009.0, "completions/mean_length": 580.052490234375, "completions/mean_terminated_length": 548.3772583007812, "completions/min_length": 106.0, "completions/min_terminated_length": 106.0, "epoch": 0.0397599006002485, "grad_norm": 0.13167676329612732, "learning_rate": 2e-07, "loss": 0.0397, "num_tokens": 271463816.0, "reward": 0.5703125, "reward_std": 0.20290662348270416, "rewards/simpleverify_reward/mean": 0.5703125, "rewards/simpleverify_reward/std": 0.49530795216560364, "step": 426 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3991.0, "completions/mean_length": 697.5859985351562, "completions/mean_terminated_length": 616.0239868164062, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 0.03985323370024908, "grad_norm": 0.11830423772335052, "learning_rate": 2e-07, "loss": 0.0306, "num_tokens": 272172949.0, "reward": 0.5569196939468384, "reward_std": 0.21594540774822235, "rewards/simpleverify_reward/mean": 0.5569196343421936, "rewards/simpleverify_reward/std": 0.49702703952789307, "step": 427 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.005580357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2488.0, "completions/mean_length": 544.1261596679688, "completions/mean_terminated_length": 524.1942138671875, "completions/min_length": 122.0, "completions/min_terminated_length": 122.0, "epoch": 0.03994656680024967, "grad_norm": 0.14353767037391663, "learning_rate": 2e-07, "loss": 0.0398, "num_tokens": 272750750.0, "reward": 0.6417410969734192, "reward_std": 0.23180390894412994, "rewards/simpleverify_reward/mean": 0.6417410969734192, "rewards/simpleverify_reward/std": 0.47975656390190125, "step": 428 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2435.0, "completions/mean_length": 607.7623291015625, "completions/mean_terminated_length": 548.3712158203125, "completions/min_length": 156.0, "completions/min_terminated_length": 156.0, "epoch": 0.040039899900250246, "grad_norm": 0.13013318181037903, "learning_rate": 2e-07, "loss": 0.0044, "num_tokens": 273389985.0, "reward": 0.5948660969734192, "reward_std": 0.23714053630828857, "rewards/simpleverify_reward/mean": 0.5948660969734192, "rewards/simpleverify_reward/std": 0.49119213223457336, "step": 429 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 2917.0, "completions/mean_length": 574.6295166015625, "completions/mean_terminated_length": 534.8848876953125, "completions/min_length": 107.0, "completions/min_terminated_length": 107.0, "epoch": 0.04013323300025083, "grad_norm": 0.126933291554451, "learning_rate": 2e-07, "loss": 0.0083, "num_tokens": 273996541.0, "reward": 0.590401828289032, "reward_std": 0.20278392732143402, "rewards/simpleverify_reward/mean": 0.5904017686843872, "rewards/simpleverify_reward/std": 0.49203425645828247, "step": 430 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3934.0, "completions/mean_length": 632.2567138671875, "completions/mean_terminated_length": 577.2766723632812, "completions/min_length": 116.0, "completions/min_terminated_length": 116.0, "epoch": 0.04022656610025142, "grad_norm": 0.11871017515659332, "learning_rate": 2e-07, "loss": 0.0054, "num_tokens": 274666563.0, "reward": 0.5212053656578064, "reward_std": 0.2029040902853012, "rewards/simpleverify_reward/mean": 0.5212053656578064, "rewards/simpleverify_reward/std": 0.49982914328575134, "step": 431 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.005580357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2926.0, "completions/mean_length": 540.9207763671875, "completions/mean_terminated_length": 520.9708251953125, "completions/min_length": 115.0, "completions/min_terminated_length": 115.0, "epoch": 0.040319899200252, "grad_norm": 0.1297157108783722, "learning_rate": 2e-07, "loss": 0.013, "num_tokens": 275238868.0, "reward": 0.629464328289032, "reward_std": 0.20320789515972137, "rewards/simpleverify_reward/mean": 0.6294642686843872, "rewards/simpleverify_reward/std": 0.4832179844379425, "step": 432 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3069.0, "completions/mean_length": 604.0234375, "completions/mean_terminated_length": 556.62109375, "completions/min_length": 104.0, "completions/min_terminated_length": 104.0, "epoch": 0.04041323230025258, "grad_norm": 0.1147371381521225, "learning_rate": 2e-07, "loss": 0.0084, "num_tokens": 275870105.0, "reward": 0.535714328289032, "reward_std": 0.20665673911571503, "rewards/simpleverify_reward/mean": 0.5357142686843872, "rewards/simpleverify_reward/std": 0.4990014135837555, "step": 433 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2180.0, "completions/mean_length": 596.2176513671875, "completions/mean_terminated_length": 552.717529296875, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 0.040506565400253165, "grad_norm": 0.11517712473869324, "learning_rate": 2e-07, "loss": 0.0219, "num_tokens": 276489796.0, "reward": 0.5613839626312256, "reward_std": 0.19271966814994812, "rewards/simpleverify_reward/mean": 0.5613839030265808, "rewards/simpleverify_reward/std": 0.496494859457016, "step": 434 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4009.0, "completions/mean_length": 616.0848388671875, "completions/mean_terminated_length": 548.78271484375, "completions/min_length": 171.0, "completions/min_terminated_length": 171.0, "epoch": 0.04059989850025375, "grad_norm": 0.12425091117620468, "learning_rate": 2e-07, "loss": 0.0158, "num_tokens": 277132496.0, "reward": 0.5412946939468384, "reward_std": 0.22891634702682495, "rewards/simpleverify_reward/mean": 0.5412946343421936, "rewards/simpleverify_reward/std": 0.49857014417648315, "step": 435 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.005580357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2725.0, "completions/mean_length": 544.9342041015625, "completions/mean_terminated_length": 525.0067749023438, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 0.040693231600254336, "grad_norm": 0.12877397239208221, "learning_rate": 2e-07, "loss": 0.0319, "num_tokens": 277711509.0, "reward": 0.6383928656578064, "reward_std": 0.20110537111759186, "rewards/simpleverify_reward/mean": 0.6383928656578064, "rewards/simpleverify_reward/std": 0.4807341992855072, "step": 436 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2900.0, "completions/mean_length": 608.052490234375, "completions/mean_terminated_length": 548.6663208007812, "completions/min_length": 164.0, "completions/min_terminated_length": 164.0, "epoch": 0.040786564700254914, "grad_norm": 0.1287236213684082, "learning_rate": 2e-07, "loss": 0.0075, "num_tokens": 278336012.0, "reward": 0.546875, "reward_std": 0.21638864278793335, "rewards/simpleverify_reward/mean": 0.546875, "rewards/simpleverify_reward/std": 0.4980759024620056, "step": 437 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3047.0, "completions/mean_length": 611.9553833007812, "completions/mean_terminated_length": 544.5733642578125, "completions/min_length": 74.0, "completions/min_terminated_length": 74.0, "epoch": 0.0408798978002555, "grad_norm": 0.1188107505440712, "learning_rate": 2e-07, "loss": 0.0598, "num_tokens": 278978028.0, "reward": 0.5446428656578064, "reward_std": 0.20693056285381317, "rewards/simpleverify_reward/mean": 0.5446428656578064, "rewards/simpleverify_reward/std": 0.4982811510562897, "step": 438 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 2151.0, "completions/mean_length": 624.193115234375, "completions/mean_terminated_length": 585.0079345703125, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 0.040973230900256084, "grad_norm": 0.11431073397397995, "learning_rate": 2e-07, "loss": 0.0341, "num_tokens": 279632361.0, "reward": 0.546875, "reward_std": 0.1900949627161026, "rewards/simpleverify_reward/mean": 0.546875, "rewards/simpleverify_reward/std": 0.4980759024620056, "step": 439 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3044.0, "completions/mean_length": 620.794677734375, "completions/mean_terminated_length": 557.6090698242188, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 0.04106656400025666, "grad_norm": 0.11835186183452606, "learning_rate": 2e-07, "loss": 0.0301, "num_tokens": 280274985.0, "reward": 0.5658482313156128, "reward_std": 0.1947159320116043, "rewards/simpleverify_reward/mean": 0.5658482313156128, "rewards/simpleverify_reward/std": 0.49592188000679016, "step": 440 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3674.0, "completions/mean_length": 619.9364013671875, "completions/mean_terminated_length": 564.7608032226562, "completions/min_length": 96.0, "completions/min_terminated_length": 96.0, "epoch": 0.04115989710025725, "grad_norm": 0.13078753650188446, "learning_rate": 2e-07, "loss": 0.0226, "num_tokens": 280928184.0, "reward": 0.5524553656578064, "reward_std": 0.21906821429729462, "rewards/simpleverify_reward/mean": 0.5524553656578064, "rewards/simpleverify_reward/std": 0.49751853942871094, "step": 441 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3995.0, "completions/mean_length": 620.1295166015625, "completions/mean_terminated_length": 552.9055786132812, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 0.04125323020025783, "grad_norm": 0.11036499589681625, "learning_rate": 2e-07, "loss": 0.0378, "num_tokens": 281569580.0, "reward": 0.520089328289032, "reward_std": 0.17138101160526276, "rewards/simpleverify_reward/mean": 0.5200892686843872, "rewards/simpleverify_reward/std": 0.4998753070831299, "step": 442 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0323660714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3383.0, "completions/mean_length": 709.4620971679688, "completions/mean_terminated_length": 596.1868286132812, "completions/min_length": 166.0, "completions/min_terminated_length": 166.0, "epoch": 0.04134656330025842, "grad_norm": 0.11624077707529068, "learning_rate": 2e-07, "loss": 0.0315, "num_tokens": 282299266.0, "reward": 0.543526828289032, "reward_std": 0.21782150864601135, "rewards/simpleverify_reward/mean": 0.5435267686843872, "rewards/simpleverify_reward/std": 0.49838000535964966, "step": 443 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2491.0, "completions/mean_length": 641.9096069335938, "completions/mean_terminated_length": 575.10693359375, "completions/min_length": 122.0, "completions/min_terminated_length": 122.0, "epoch": 0.041439896400258996, "grad_norm": 0.1234375461935997, "learning_rate": 2e-07, "loss": 0.0556, "num_tokens": 282953841.0, "reward": 0.5870535969734192, "reward_std": 0.2403290718793869, "rewards/simpleverify_reward/mean": 0.5870535969734192, "rewards/simpleverify_reward/std": 0.49263837933540344, "step": 444 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.024553571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2064.0, "completions/mean_length": 639.4944458007812, "completions/mean_terminated_length": 552.488525390625, "completions/min_length": 115.0, "completions/min_terminated_length": 115.0, "epoch": 0.04153322950025958, "grad_norm": 0.11980868130922318, "learning_rate": 2e-07, "loss": 0.0358, "num_tokens": 283616156.0, "reward": 0.5569196939468384, "reward_std": 0.22634848952293396, "rewards/simpleverify_reward/mean": 0.5569196343421936, "rewards/simpleverify_reward/std": 0.4970270097255707, "step": 445 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2245.0, "completions/mean_length": 630.1015625, "completions/mean_terminated_length": 579.07470703125, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 0.04162656260026017, "grad_norm": 0.11323470622301102, "learning_rate": 2e-07, "loss": 0.0275, "num_tokens": 284256407.0, "reward": 0.5881696939468384, "reward_std": 0.217744842171669, "rewards/simpleverify_reward/mean": 0.5881696343421936, "rewards/simpleverify_reward/std": 0.4924395978450775, "step": 446 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.024553571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3080.0, "completions/mean_length": 688.5904541015625, "completions/mean_terminated_length": 602.8203125, "completions/min_length": 98.0, "completions/min_terminated_length": 98.0, "epoch": 0.04171989570026075, "grad_norm": 0.11070983111858368, "learning_rate": 2e-07, "loss": 0.0444, "num_tokens": 284963992.0, "reward": 0.5267857313156128, "reward_std": 0.1993369162082672, "rewards/simpleverify_reward/mean": 0.5267857313156128, "rewards/simpleverify_reward/std": 0.4995608627796173, "step": 447 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2286.0, "completions/mean_length": 611.3092041015625, "completions/mean_terminated_length": 535.8141479492188, "completions/min_length": 102.0, "completions/min_terminated_length": 102.0, "epoch": 0.04181322880026133, "grad_norm": 0.12585975229740143, "learning_rate": 2e-07, "loss": 0.0388, "num_tokens": 285603189.0, "reward": 0.5703125, "reward_std": 0.2110169678926468, "rewards/simpleverify_reward/mean": 0.5703125, "rewards/simpleverify_reward/std": 0.49530795216560364, "step": 448 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3260.0, "completions/mean_length": 604.1808471679688, "completions/mean_terminated_length": 560.7796630859375, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 0.041906561900261916, "grad_norm": 0.11264684051275253, "learning_rate": 2e-07, "loss": 0.0016, "num_tokens": 286230807.0, "reward": 0.5078125, "reward_std": 0.17577417194843292, "rewards/simpleverify_reward/mean": 0.5078125, "rewards/simpleverify_reward/std": 0.5002182126045227, "step": 449 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2473.0, "completions/mean_length": 628.6975708007812, "completions/mean_terminated_length": 553.5792236328125, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 0.0419998950002625, "grad_norm": 0.1136738657951355, "learning_rate": 2e-07, "loss": 0.0242, "num_tokens": 286875576.0, "reward": 0.5803571939468384, "reward_std": 0.1789655089378357, "rewards/simpleverify_reward/mean": 0.5803571343421936, "rewards/simpleverify_reward/std": 0.4937761425971985, "step": 450 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 4096.0, "completions/max_terminated_length": 4020.0, "completions/mean_length": 668.0870971679688, "completions/mean_terminated_length": 585.817138671875, "completions/min_length": 120.0, "completions/min_terminated_length": 120.0, "epoch": 0.042093228100263086, "grad_norm": 0.1237485334277153, "learning_rate": 2e-07, "loss": 0.0475, "num_tokens": 287565030.0, "reward": 0.5334821939468384, "reward_std": 0.20846755802631378, "rewards/simpleverify_reward/mean": 0.5334821343421936, "rewards/simpleverify_reward/std": 0.49915629625320435, "step": 451 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3457.0, "completions/mean_length": 601.896240234375, "completions/mean_terminated_length": 534.3196411132812, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 0.042186561200263664, "grad_norm": 0.12210560590028763, "learning_rate": 2e-07, "loss": 0.0101, "num_tokens": 288201905.0, "reward": 0.559151828289032, "reward_std": 0.19639632105827332, "rewards/simpleverify_reward/mean": 0.5591517686843872, "rewards/simpleverify_reward/std": 0.496766060590744, "step": 452 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 4048.0, "completions/mean_length": 633.9285888671875, "completions/mean_terminated_length": 586.93212890625, "completions/min_length": 69.0, "completions/min_terminated_length": 69.0, "epoch": 0.04227989430026425, "grad_norm": 0.10684733092784882, "learning_rate": 2e-07, "loss": 0.0106, "num_tokens": 288862001.0, "reward": 0.5267857313156128, "reward_std": 0.17160965502262115, "rewards/simpleverify_reward/mean": 0.5267857313156128, "rewards/simpleverify_reward/std": 0.4995608627796173, "step": 453 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3786.0, "completions/mean_length": 648.9721069335938, "completions/mean_terminated_length": 594.2573852539062, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 0.042373227400264835, "grad_norm": 0.12619613111019135, "learning_rate": 2e-07, "loss": 0.0349, "num_tokens": 289534472.0, "reward": 0.5703125, "reward_std": 0.21391591429710388, "rewards/simpleverify_reward/mean": 0.5703125, "rewards/simpleverify_reward/std": 0.49530795216560364, "step": 454 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3096.0, "completions/mean_length": 627.8326416015625, "completions/mean_terminated_length": 564.7749633789062, "completions/min_length": 118.0, "completions/min_terminated_length": 118.0, "epoch": 0.04246656050026541, "grad_norm": 0.1102689579129219, "learning_rate": 2e-07, "loss": 0.0155, "num_tokens": 290185618.0, "reward": 0.5915178656578064, "reward_std": 0.1733780950307846, "rewards/simpleverify_reward/mean": 0.5915178656578064, "rewards/simpleverify_reward/std": 0.49182769656181335, "step": 455 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3557.0, "completions/mean_length": 645.3426513671875, "completions/mean_terminated_length": 574.6002807617188, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 0.042559893600266, "grad_norm": 0.11666269600391388, "learning_rate": 2e-07, "loss": 0.0212, "num_tokens": 290856685.0, "reward": 0.5613839626312256, "reward_std": 0.19813157618045807, "rewards/simpleverify_reward/mean": 0.5613839030265808, "rewards/simpleverify_reward/std": 0.496494859457016, "step": 456 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3952.0, "completions/mean_length": 652.4699096679688, "completions/mean_terminated_length": 585.8713989257812, "completions/min_length": 118.0, "completions/min_terminated_length": 118.0, "epoch": 0.04265322670026658, "grad_norm": 0.09967897087335587, "learning_rate": 2e-07, "loss": 0.0271, "num_tokens": 291524914.0, "reward": 0.5636160969734192, "reward_std": 0.16619843244552612, "rewards/simpleverify_reward/mean": 0.5636160969734192, "rewards/simpleverify_reward/std": 0.49621346592903137, "step": 457 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3177.0, "completions/mean_length": 610.6283569335938, "completions/mean_terminated_length": 547.2579345703125, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 0.04274655980026717, "grad_norm": 0.11723867058753967, "learning_rate": 2e-07, "loss": 0.0452, "num_tokens": 292168245.0, "reward": 0.5457589626312256, "reward_std": 0.19730812311172485, "rewards/simpleverify_reward/mean": 0.5457589030265808, "rewards/simpleverify_reward/std": 0.4981798231601715, "step": 458 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 4039.0, "completions/mean_length": 607.4631958007812, "completions/mean_terminated_length": 560.1074829101562, "completions/min_length": 107.0, "completions/min_terminated_length": 107.0, "epoch": 0.04283989290026775, "grad_norm": 0.126228466629982, "learning_rate": 2e-07, "loss": 0.0274, "num_tokens": 292795660.0, "reward": 0.6104910969734192, "reward_std": 0.21563977003097534, "rewards/simpleverify_reward/mean": 0.6104910969734192, "rewards/simpleverify_reward/std": 0.48791125416755676, "step": 459 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2725.0, "completions/mean_length": 606.0670166015625, "completions/mean_terminated_length": 546.6470336914062, "completions/min_length": 95.0, "completions/min_terminated_length": 95.0, "epoch": 0.04293322600026833, "grad_norm": 0.10941781848669052, "learning_rate": 2e-07, "loss": 0.0395, "num_tokens": 293424608.0, "reward": 0.629464328289032, "reward_std": 0.18408571183681488, "rewards/simpleverify_reward/mean": 0.6294642686843872, "rewards/simpleverify_reward/std": 0.4832179844379425, "step": 460 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3891.0, "completions/mean_length": 626.6473388671875, "completions/mean_terminated_length": 571.5782470703125, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 0.04302655910026892, "grad_norm": 0.12515845894813538, "learning_rate": 2e-07, "loss": 0.0238, "num_tokens": 294076428.0, "reward": 0.5479910969734192, "reward_std": 0.2198163866996765, "rewards/simpleverify_reward/mean": 0.5479910969734192, "rewards/simpleverify_reward/std": 0.49796950817108154, "step": 461 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 2253.0, "completions/mean_length": 606.224365234375, "completions/mean_terminated_length": 566.8363647460938, "completions/min_length": 30.0, "completions/min_terminated_length": 30.0, "epoch": 0.0431198922002695, "grad_norm": 0.12299700826406479, "learning_rate": 2e-07, "loss": 0.0112, "num_tokens": 294714437.0, "reward": 0.5212053656578064, "reward_std": 0.21910029649734497, "rewards/simpleverify_reward/mean": 0.5212053656578064, "rewards/simpleverify_reward/std": 0.49982914328575134, "step": 462 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3256.0, "completions/mean_length": 584.5569458007812, "completions/mean_terminated_length": 544.9243774414062, "completions/min_length": 94.0, "completions/min_terminated_length": 94.0, "epoch": 0.04321322530027008, "grad_norm": 0.12432916462421417, "learning_rate": 2e-07, "loss": 0.0323, "num_tokens": 295327720.0, "reward": 0.5970982313156128, "reward_std": 0.20249363780021667, "rewards/simpleverify_reward/mean": 0.5970982313156128, "rewards/simpleverify_reward/std": 0.4907552897930145, "step": 463 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3593.0, "completions/mean_length": 633.5892944335938, "completions/mean_terminated_length": 562.60595703125, "completions/min_length": 117.0, "completions/min_terminated_length": 117.0, "epoch": 0.043306558400270666, "grad_norm": 0.1141306608915329, "learning_rate": 2e-07, "loss": 0.0507, "num_tokens": 295981344.0, "reward": 0.5546875, "reward_std": 0.19166727364063263, "rewards/simpleverify_reward/mean": 0.5546875, "rewards/simpleverify_reward/std": 0.4972778558731079, "step": 464 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2555.0, "completions/mean_length": 611.9486694335938, "completions/mean_terminated_length": 552.6288452148438, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 0.04339989150027125, "grad_norm": 0.12747764587402344, "learning_rate": 2e-07, "loss": 0.018, "num_tokens": 296622146.0, "reward": 0.5368303656578064, "reward_std": 0.20873039960861206, "rewards/simpleverify_reward/mean": 0.5368303656578064, "rewards/simpleverify_reward/std": 0.49892017245292664, "step": 465 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 4039.0, "completions/mean_length": 629.841552734375, "completions/mean_terminated_length": 574.8231201171875, "completions/min_length": 118.0, "completions/min_terminated_length": 118.0, "epoch": 0.04349322460027183, "grad_norm": 0.11171989887952805, "learning_rate": 2e-07, "loss": 0.02, "num_tokens": 297288924.0, "reward": 0.5022321939468384, "reward_std": 0.17836818099021912, "rewards/simpleverify_reward/mean": 0.5022321343421936, "rewards/simpleverify_reward/std": 0.5002742409706116, "step": 466 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2969.0, "completions/mean_length": 593.2433471679688, "completions/mean_terminated_length": 541.673828125, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 0.043586557700272414, "grad_norm": 0.1212693601846695, "learning_rate": 2e-07, "loss": 0.0351, "num_tokens": 297905958.0, "reward": 0.640625, "reward_std": 0.17506878077983856, "rewards/simpleverify_reward/mean": 0.640625, "rewards/simpleverify_reward/std": 0.48008525371551514, "step": 467 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3432.0, "completions/mean_length": 596.0413208007812, "completions/mean_terminated_length": 564.5101318359375, "completions/min_length": 122.0, "completions/min_terminated_length": 122.0, "epoch": 0.043679890800273, "grad_norm": 0.1169850155711174, "learning_rate": 2e-07, "loss": 0.022, "num_tokens": 298529323.0, "reward": 0.5602678656578064, "reward_std": 0.20395858585834503, "rewards/simpleverify_reward/mean": 0.5602678656578064, "rewards/simpleverify_reward/std": 0.4966317117214203, "step": 468 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3959.0, "completions/mean_length": 607.3404541015625, "completions/mean_terminated_length": 535.8189086914062, "completions/min_length": 112.0, "completions/min_terminated_length": 112.0, "epoch": 0.043773223900273585, "grad_norm": 0.13885031640529633, "learning_rate": 2e-07, "loss": 0.041, "num_tokens": 299160116.0, "reward": 0.5814732313156128, "reward_std": 0.22841641306877136, "rewards/simpleverify_reward/mean": 0.5814732313156128, "rewards/simpleverify_reward/std": 0.4935929775238037, "step": 469 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.025669642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3372.0, "completions/mean_length": 637.9207763671875, "completions/mean_terminated_length": 546.814453125, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 0.04386655700027416, "grad_norm": 0.12226605415344238, "learning_rate": 2e-07, "loss": 0.0497, "num_tokens": 299817205.0, "reward": 0.5636160969734192, "reward_std": 0.2260863482952118, "rewards/simpleverify_reward/mean": 0.5636160969734192, "rewards/simpleverify_reward/std": 0.49621346592903137, "step": 470 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2271.0, "completions/mean_length": 639.747802734375, "completions/mean_terminated_length": 572.9032592773438, "completions/min_length": 92.0, "completions/min_terminated_length": 92.0, "epoch": 0.04395989010027475, "grad_norm": 0.12332125753164291, "learning_rate": 2e-07, "loss": 0.0169, "num_tokens": 300478755.0, "reward": 0.5580357313156128, "reward_std": 0.23383155465126038, "rewards/simpleverify_reward/mean": 0.5580357313156128, "rewards/simpleverify_reward/std": 0.49689781665802, "step": 471 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2041.0, "completions/mean_length": 584.8817138671875, "completions/mean_terminated_length": 533.1890869140625, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 0.044053223200275334, "grad_norm": 0.12418516725301743, "learning_rate": 2e-07, "loss": 0.0238, "num_tokens": 301086529.0, "reward": 0.5837053656578064, "reward_std": 0.19050472974777222, "rewards/simpleverify_reward/mean": 0.5837053656578064, "rewards/simpleverify_reward/std": 0.49321895837783813, "step": 472 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3987.0, "completions/mean_length": 584.8616333007812, "completions/mean_terminated_length": 533.168701171875, "completions/min_length": 96.0, "completions/min_terminated_length": 96.0, "epoch": 0.04414655630027592, "grad_norm": 0.12116127461194992, "learning_rate": 2e-07, "loss": 0.023, "num_tokens": 301706429.0, "reward": 0.598214328289032, "reward_std": 0.21508300304412842, "rewards/simpleverify_reward/mean": 0.5982142686843872, "rewards/simpleverify_reward/std": 0.49053287506103516, "step": 473 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0279017857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3503.0, "completions/mean_length": 620.3683471679688, "completions/mean_terminated_length": 520.6084594726562, "completions/min_length": 120.0, "completions/min_terminated_length": 120.0, "epoch": 0.0442398894002765, "grad_norm": 0.11500445008277893, "learning_rate": 2e-07, "loss": 0.0387, "num_tokens": 302351311.0, "reward": 0.5602678656578064, "reward_std": 0.17592570185661316, "rewards/simpleverify_reward/mean": 0.5602678656578064, "rewards/simpleverify_reward/std": 0.4966317415237427, "step": 474 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2274.0, "completions/mean_length": 605.0357666015625, "completions/mean_terminated_length": 549.6235961914062, "completions/min_length": 137.0, "completions/min_terminated_length": 137.0, "epoch": 0.04433322250027708, "grad_norm": 0.12059532850980759, "learning_rate": 2e-07, "loss": 0.0219, "num_tokens": 302984815.0, "reward": 0.5491071939468384, "reward_std": 0.19828453660011292, "rewards/simpleverify_reward/mean": 0.5491071343421936, "rewards/simpleverify_reward/std": 0.49786055088043213, "step": 475 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3793.0, "completions/mean_length": 637.7623291015625, "completions/mean_terminated_length": 602.6730346679688, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 0.04442655560027767, "grad_norm": 0.12264588475227356, "learning_rate": 2e-07, "loss": 0.0333, "num_tokens": 303649202.0, "reward": 0.5479910969734192, "reward_std": 0.23814833164215088, "rewards/simpleverify_reward/mean": 0.5479910969734192, "rewards/simpleverify_reward/std": 0.49796950817108154, "step": 476 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 4096.0, "completions/max_terminated_length": 2646.0, "completions/mean_length": 661.4777221679688, "completions/mean_terminated_length": 579.0491333007812, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 0.04451988870027825, "grad_norm": 0.11240037530660629, "learning_rate": 2e-07, "loss": 0.0235, "num_tokens": 304329134.0, "reward": 0.551339328289032, "reward_std": 0.18870487809181213, "rewards/simpleverify_reward/mean": 0.5513392686843872, "rewards/simpleverify_reward/std": 0.4976350665092468, "step": 477 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4007.0, "completions/mean_length": 593.8326416015625, "completions/mean_terminated_length": 526.10009765625, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 0.04461322180027883, "grad_norm": 0.11945917457342148, "learning_rate": 2e-07, "loss": 0.0289, "num_tokens": 304955640.0, "reward": 0.606026828289032, "reward_std": 0.18723993003368378, "rewards/simpleverify_reward/mean": 0.6060267686843872, "rewards/simpleverify_reward/std": 0.48890191316604614, "step": 478 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3869.0, "completions/mean_length": 585.974365234375, "completions/mean_terminated_length": 542.346923828125, "completions/min_length": 68.0, "completions/min_terminated_length": 68.0, "epoch": 0.044706554900279416, "grad_norm": 0.13876935839653015, "learning_rate": 2e-07, "loss": 0.0392, "num_tokens": 305559665.0, "reward": 0.5758928656578064, "reward_std": 0.23849529027938843, "rewards/simpleverify_reward/mean": 0.5758928656578064, "rewards/simpleverify_reward/std": 0.49448272585868835, "step": 479 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2669.0, "completions/mean_length": 621.1194458007812, "completions/mean_terminated_length": 549.8804321289062, "completions/min_length": 153.0, "completions/min_terminated_length": 153.0, "epoch": 0.04479988800028, "grad_norm": 0.1126650720834732, "learning_rate": 2e-07, "loss": 0.0477, "num_tokens": 306208428.0, "reward": 0.5558035969734192, "reward_std": 0.1788235306739807, "rewards/simpleverify_reward/mean": 0.5558035969734192, "rewards/simpleverify_reward/std": 0.49715372920036316, "step": 480 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3756.0, "completions/mean_length": 628.8817138671875, "completions/mean_terminated_length": 593.7023315429688, "completions/min_length": 103.0, "completions/min_terminated_length": 103.0, "epoch": 0.04489322110028058, "grad_norm": 0.12478847056627274, "learning_rate": 2e-07, "loss": 0.0276, "num_tokens": 306861922.0, "reward": 0.551339328289032, "reward_std": 0.20237527787685394, "rewards/simpleverify_reward/mean": 0.5513392686843872, "rewards/simpleverify_reward/std": 0.4976350665092468, "step": 481 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3565.0, "completions/mean_length": 602.3705444335938, "completions/mean_terminated_length": 562.9390869140625, "completions/min_length": 119.0, "completions/min_terminated_length": 119.0, "epoch": 0.044986554200281165, "grad_norm": 0.13599878549575806, "learning_rate": 2e-07, "loss": 0.0279, "num_tokens": 307496102.0, "reward": 0.5970982313156128, "reward_std": 0.23743335902690887, "rewards/simpleverify_reward/mean": 0.5970982313156128, "rewards/simpleverify_reward/std": 0.49075525999069214, "step": 482 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3536.0, "completions/mean_length": 608.9029541015625, "completions/mean_terminated_length": 581.4454956054688, "completions/min_length": 162.0, "completions/min_terminated_length": 162.0, "epoch": 0.04507988730028175, "grad_norm": 0.11717932671308517, "learning_rate": 2e-07, "loss": 0.0292, "num_tokens": 308123135.0, "reward": 0.5290178656578064, "reward_std": 0.20094947516918182, "rewards/simpleverify_reward/mean": 0.5290178656578064, "rewards/simpleverify_reward/std": 0.49943602085113525, "step": 483 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3867.0, "completions/mean_length": 599.4285888671875, "completions/mean_terminated_length": 547.9501342773438, "completions/min_length": 93.0, "completions/min_terminated_length": 93.0, "epoch": 0.045173220400282335, "grad_norm": 0.10927430540323257, "learning_rate": 2e-07, "loss": 0.0391, "num_tokens": 308752991.0, "reward": 0.6361607313156128, "reward_std": 0.16453665494918823, "rewards/simpleverify_reward/mean": 0.6361607313156128, "rewards/simpleverify_reward/std": 0.4813718795776367, "step": 484 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3511.0, "completions/mean_length": 592.3850708007812, "completions/mean_terminated_length": 544.82470703125, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 0.04526655350028291, "grad_norm": 0.12929199635982513, "learning_rate": 2e-07, "loss": 0.0488, "num_tokens": 309370264.0, "reward": 0.5948660969734192, "reward_std": 0.2361188381910324, "rewards/simpleverify_reward/mean": 0.5948660969734192, "rewards/simpleverify_reward/std": 0.49119213223457336, "step": 485 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 1875.0, "completions/mean_length": 593.1529541015625, "completions/mean_terminated_length": 549.6146850585938, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 0.0453598866002835, "grad_norm": 0.13190048933029175, "learning_rate": 2e-07, "loss": 0.0306, "num_tokens": 309992817.0, "reward": 0.5569196939468384, "reward_std": 0.2215081751346588, "rewards/simpleverify_reward/mean": 0.5569196343421936, "rewards/simpleverify_reward/std": 0.4970270097255707, "step": 486 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3743.0, "completions/mean_length": 588.5692138671875, "completions/mean_terminated_length": 560.95166015625, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 0.045453219700284084, "grad_norm": 0.11333946883678436, "learning_rate": 2e-07, "loss": 0.0057, "num_tokens": 310608503.0, "reward": 0.5491071939468384, "reward_std": 0.17836566269397736, "rewards/simpleverify_reward/mean": 0.5491071343421936, "rewards/simpleverify_reward/std": 0.49786055088043213, "step": 487 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 4096.0, "completions/max_terminated_length": 2287.0, "completions/mean_length": 631.8370971679688, "completions/mean_terminated_length": 548.6971435546875, "completions/min_length": 122.0, "completions/min_terminated_length": 122.0, "epoch": 0.04554655280028467, "grad_norm": 0.11566179245710373, "learning_rate": 2e-07, "loss": 0.0448, "num_tokens": 311257877.0, "reward": 0.578125, "reward_std": 0.18144892156124115, "rewards/simpleverify_reward/mean": 0.578125, "rewards/simpleverify_reward/std": 0.4941346049308777, "step": 488 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3526.0, "completions/mean_length": 646.7522583007812, "completions/mean_terminated_length": 563.9702758789062, "completions/min_length": 40.0, "completions/min_terminated_length": 40.0, "epoch": 0.04563988590028525, "grad_norm": 0.11878056079149246, "learning_rate": 2e-07, "loss": 0.027, "num_tokens": 311931847.0, "reward": 0.578125, "reward_std": 0.20399925112724304, "rewards/simpleverify_reward/mean": 0.578125, "rewards/simpleverify_reward/std": 0.4941346049308777, "step": 489 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 2904.0, "completions/mean_length": 637.6004638671875, "completions/mean_terminated_length": 598.5665893554688, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 0.04573321900028583, "grad_norm": 0.13192987442016602, "learning_rate": 2e-07, "loss": 0.0368, "num_tokens": 312587449.0, "reward": 0.494419664144516, "reward_std": 0.222820445895195, "rewards/simpleverify_reward/mean": 0.4944196343421936, "rewards/simpleverify_reward/std": 0.5002480745315552, "step": 490 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3323.0, "completions/mean_length": 626.2467041015625, "completions/mean_terminated_length": 567.1702880859375, "completions/min_length": 85.0, "completions/min_terminated_length": 85.0, "epoch": 0.04582655210028642, "grad_norm": 0.133034810423851, "learning_rate": 2e-07, "loss": 0.0273, "num_tokens": 313231638.0, "reward": 0.5703125, "reward_std": 0.22642266750335693, "rewards/simpleverify_reward/mean": 0.5703125, "rewards/simpleverify_reward/std": 0.49530795216560364, "step": 491 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3126.0, "completions/mean_length": 581.2645263671875, "completions/mean_terminated_length": 533.5531616210938, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 0.045919885200286996, "grad_norm": 0.13088074326515198, "learning_rate": 2e-07, "loss": 0.0386, "num_tokens": 313843267.0, "reward": 0.5859375, "reward_std": 0.2192857563495636, "rewards/simpleverify_reward/mean": 0.5859375, "rewards/simpleverify_reward/std": 0.4928344786167145, "step": 492 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2498.0, "completions/mean_length": 648.2980346679688, "completions/mean_terminated_length": 577.6162109375, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 0.04601321830028758, "grad_norm": 0.10998985171318054, "learning_rate": 2e-07, "loss": 0.0454, "num_tokens": 314522214.0, "reward": 0.5691964626312256, "reward_std": 0.19121263921260834, "rewards/simpleverify_reward/mean": 0.5691964030265808, "rewards/simpleverify_reward/std": 0.4954652786254883, "step": 493 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3096.0, "completions/mean_length": 623.2265625, "completions/mean_terminated_length": 560.085205078125, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 0.046106551400288166, "grad_norm": 0.11237216740846634, "learning_rate": 2e-07, "loss": 0.0342, "num_tokens": 315177441.0, "reward": 0.5022321939468384, "reward_std": 0.1871650665998459, "rewards/simpleverify_reward/mean": 0.5022321343421936, "rewards/simpleverify_reward/std": 0.5002743005752563, "step": 494 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2411.0, "completions/mean_length": 624.1428833007812, "completions/mean_terminated_length": 569.0339965820312, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 0.04619988450028875, "grad_norm": 0.1132131889462471, "learning_rate": 2e-07, "loss": 0.0231, "num_tokens": 315832553.0, "reward": 0.5546875, "reward_std": 0.1880227029323578, "rewards/simpleverify_reward/mean": 0.5546875, "rewards/simpleverify_reward/std": 0.4972778558731079, "step": 495 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3215.0, "completions/mean_length": 598.8058471679688, "completions/mean_terminated_length": 543.2947998046875, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 0.04629321760028933, "grad_norm": 0.1307394653558731, "learning_rate": 2e-07, "loss": 0.0333, "num_tokens": 316458259.0, "reward": 0.551339328289032, "reward_std": 0.215717151761055, "rewards/simpleverify_reward/mean": 0.5513392686843872, "rewards/simpleverify_reward/std": 0.4976350665092468, "step": 496 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3833.0, "completions/mean_length": 630.6808471679688, "completions/mean_terminated_length": 555.60546875, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 0.046386550700289915, "grad_norm": 0.12283704429864883, "learning_rate": 2e-07, "loss": 0.0165, "num_tokens": 317116533.0, "reward": 0.543526828289032, "reward_std": 0.20967289805412292, "rewards/simpleverify_reward/mean": 0.5435267686843872, "rewards/simpleverify_reward/std": 0.49838000535964966, "step": 497 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2363.0, "completions/mean_length": 648.372802734375, "completions/mean_terminated_length": 593.6485595703125, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 0.0464798838002905, "grad_norm": 0.11293631047010422, "learning_rate": 2e-07, "loss": 0.0181, "num_tokens": 317783315.0, "reward": 0.5691964626312256, "reward_std": 0.20421679317951202, "rewards/simpleverify_reward/mean": 0.5691964030265808, "rewards/simpleverify_reward/std": 0.4954652488231659, "step": 498 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3224.0, "completions/mean_length": 605.0089721679688, "completions/mean_terminated_length": 561.6181030273438, "completions/min_length": 108.0, "completions/min_terminated_length": 108.0, "epoch": 0.046573216900291085, "grad_norm": 0.132080078125, "learning_rate": 2e-07, "loss": 0.0375, "num_tokens": 318412763.0, "reward": 0.5636160969734192, "reward_std": 0.20467397570610046, "rewards/simpleverify_reward/mean": 0.5636160969734192, "rewards/simpleverify_reward/std": 0.49621346592903137, "step": 499 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3818.0, "completions/mean_length": 649.4710083007812, "completions/mean_terminated_length": 578.813232421875, "completions/min_length": 121.0, "completions/min_terminated_length": 121.0, "epoch": 0.046666550000291664, "grad_norm": 0.12655441462993622, "learning_rate": 2e-07, "loss": 0.0557, "num_tokens": 319081369.0, "reward": 0.59375, "reward_std": 0.21365557610988617, "rewards/simpleverify_reward/mean": 0.59375, "rewards/simpleverify_reward/std": 0.4914066195487976, "step": 500 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.024553571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3236.0, "completions/mean_length": 679.575927734375, "completions/mean_terminated_length": 593.5789184570312, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 0.04675988310029225, "grad_norm": 0.11210490018129349, "learning_rate": 2e-07, "loss": 0.0337, "num_tokens": 319771325.0, "reward": 0.6071428656578064, "reward_std": 0.2117016762495041, "rewards/simpleverify_reward/mean": 0.6071428656578064, "rewards/simpleverify_reward/std": 0.48865824937820435, "step": 501 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3085.0, "completions/mean_length": 584.6685791015625, "completions/mean_terminated_length": 553.034912109375, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 0.046853216200292834, "grad_norm": 0.11570018529891968, "learning_rate": 2e-07, "loss": 0.0138, "num_tokens": 320376060.0, "reward": 0.5870535969734192, "reward_std": 0.177769735455513, "rewards/simpleverify_reward/mean": 0.5870535969734192, "rewards/simpleverify_reward/std": 0.49263837933540344, "step": 502 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3907.0, "completions/mean_length": 637.1864013671875, "completions/mean_terminated_length": 562.251953125, "completions/min_length": 120.0, "completions/min_terminated_length": 120.0, "epoch": 0.04694654930029342, "grad_norm": 0.130585178732872, "learning_rate": 2e-07, "loss": 0.0253, "num_tokens": 321032963.0, "reward": 0.582589328289032, "reward_std": 0.23665127158164978, "rewards/simpleverify_reward/mean": 0.5825892686843872, "rewards/simpleverify_reward/std": 0.4934072494506836, "step": 503 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3020.0, "completions/mean_length": 597.1864013671875, "completions/mean_terminated_length": 561.6854248046875, "completions/min_length": 5.0, "completions/min_terminated_length": 5.0, "epoch": 0.047039882400294, "grad_norm": 0.1112976148724556, "learning_rate": 2e-07, "loss": 0.004, "num_tokens": 321657330.0, "reward": 0.5524553656578064, "reward_std": 0.15654462575912476, "rewards/simpleverify_reward/mean": 0.5524553656578064, "rewards/simpleverify_reward/std": 0.49751853942871094, "step": 504 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3159.0, "completions/mean_length": 624.2332763671875, "completions/mean_terminated_length": 573.1200561523438, "completions/min_length": 167.0, "completions/min_terminated_length": 167.0, "epoch": 0.04713321550029458, "grad_norm": 0.1249091699719429, "learning_rate": 2e-07, "loss": 0.0217, "num_tokens": 322309043.0, "reward": 0.59375, "reward_std": 0.2183835208415985, "rewards/simpleverify_reward/mean": 0.59375, "rewards/simpleverify_reward/std": 0.4914066195487976, "step": 505 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3659.0, "completions/mean_length": 594.3125, "completions/mean_terminated_length": 530.6454467773438, "completions/min_length": 97.0, "completions/min_terminated_length": 97.0, "epoch": 0.04722654860029517, "grad_norm": 0.11771149188280106, "learning_rate": 2e-07, "loss": 0.0286, "num_tokens": 322929131.0, "reward": 0.5926339626312256, "reward_std": 0.16773685812950134, "rewards/simpleverify_reward/mean": 0.5926339030265808, "rewards/simpleverify_reward/std": 0.49161845445632935, "step": 506 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3431.0, "completions/mean_length": 668.4342041015625, "completions/mean_terminated_length": 598.1651611328125, "completions/min_length": 104.0, "completions/min_terminated_length": 104.0, "epoch": 0.047319881700295746, "grad_norm": 0.12119530886411667, "learning_rate": 2e-07, "loss": 0.0311, "num_tokens": 323608184.0, "reward": 0.5602678656578064, "reward_std": 0.23383015394210815, "rewards/simpleverify_reward/mean": 0.5602678656578064, "rewards/simpleverify_reward/std": 0.4966317415237427, "step": 507 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3683.0, "completions/mean_length": 639.2522583007812, "completions/mean_terminated_length": 580.3972778320312, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 0.04741321480029633, "grad_norm": 0.13020774722099304, "learning_rate": 2e-07, "loss": 0.0262, "num_tokens": 324294130.0, "reward": 0.5022321939468384, "reward_std": 0.23502619564533234, "rewards/simpleverify_reward/mean": 0.5022321343421936, "rewards/simpleverify_reward/std": 0.5002742409706116, "step": 508 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2092.0, "completions/mean_length": 555.3460083007812, "completions/mean_terminated_length": 519.4204711914062, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 0.04750654790029692, "grad_norm": 0.12101134657859802, "learning_rate": 2e-07, "loss": 0.0236, "num_tokens": 324879824.0, "reward": 0.6238839626312256, "reward_std": 0.16160885989665985, "rewards/simpleverify_reward/mean": 0.6238839030265808, "rewards/simpleverify_reward/std": 0.4846802353858948, "step": 509 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3329.0, "completions/mean_length": 599.0636596679688, "completions/mean_terminated_length": 555.598876953125, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 0.0475998810002975, "grad_norm": 0.1180044412612915, "learning_rate": 2e-07, "loss": 0.0228, "num_tokens": 325511921.0, "reward": 0.5647321939468384, "reward_std": 0.18464843928813934, "rewards/simpleverify_reward/mean": 0.5647321343421936, "rewards/simpleverify_reward/std": 0.49606895446777344, "step": 510 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.004464285714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2910.0, "completions/mean_length": 570.9207763671875, "completions/mean_terminated_length": 555.11328125, "completions/min_length": 120.0, "completions/min_terminated_length": 120.0, "epoch": 0.04769321410029808, "grad_norm": 0.1272277981042862, "learning_rate": 2e-07, "loss": 0.0183, "num_tokens": 326113730.0, "reward": 0.5703125, "reward_std": 0.1834430992603302, "rewards/simpleverify_reward/mean": 0.5703125, "rewards/simpleverify_reward/std": 0.49530795216560364, "step": 511 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 4043.0, "completions/mean_length": 634.0435791015625, "completions/mean_terminated_length": 579.0918579101562, "completions/min_length": 33.0, "completions/min_terminated_length": 33.0, "epoch": 0.047786547200298665, "grad_norm": 0.1197831779718399, "learning_rate": 2e-07, "loss": 0.0269, "num_tokens": 326759865.0, "reward": 0.5524553656578064, "reward_std": 0.19782987236976624, "rewards/simpleverify_reward/mean": 0.5524553656578064, "rewards/simpleverify_reward/std": 0.49751853942871094, "step": 512 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 1896.0, "completions/mean_length": 580.3638916015625, "completions/mean_terminated_length": 536.6666870117188, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 0.04787988030029925, "grad_norm": 0.12518839538097382, "learning_rate": 2e-07, "loss": 0.0069, "num_tokens": 327370095.0, "reward": 0.582589328289032, "reward_std": 0.1899847835302353, "rewards/simpleverify_reward/mean": 0.5825892686843872, "rewards/simpleverify_reward/std": 0.493407279253006, "step": 513 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3768.0, "completions/mean_length": 599.0178833007812, "completions/mean_terminated_length": 535.4363403320312, "completions/min_length": 83.0, "completions/min_terminated_length": 83.0, "epoch": 0.047973213400299836, "grad_norm": 0.1252635270357132, "learning_rate": 2e-07, "loss": 0.0182, "num_tokens": 328002303.0, "reward": 0.5658482313156128, "reward_std": 0.17517824470996857, "rewards/simpleverify_reward/mean": 0.5658482313156128, "rewards/simpleverify_reward/std": 0.49592188000679016, "step": 514 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2570.0, "completions/mean_length": 636.091552734375, "completions/mean_terminated_length": 593.0870361328125, "completions/min_length": 133.0, "completions/min_terminated_length": 133.0, "epoch": 0.048066546500300414, "grad_norm": 0.11797919869422913, "learning_rate": 2e-07, "loss": 0.0382, "num_tokens": 328653169.0, "reward": 0.5736607313156128, "reward_std": 0.201775461435318, "rewards/simpleverify_reward/mean": 0.5736607313156128, "rewards/simpleverify_reward/std": 0.4948205351829529, "step": 515 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.006696428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3274.0, "completions/mean_length": 590.0167846679688, "completions/mean_terminated_length": 566.3809204101562, "completions/min_length": 52.0, "completions/min_terminated_length": 52.0, "epoch": 0.048159879600301, "grad_norm": 0.11856833845376968, "learning_rate": 2e-07, "loss": 0.0271, "num_tokens": 329270000.0, "reward": 0.5546875, "reward_std": 0.17856179177761078, "rewards/simpleverify_reward/mean": 0.5546875, "rewards/simpleverify_reward/std": 0.4972778558731079, "step": 516 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3574.0, "completions/mean_length": 675.1685791015625, "completions/mean_terminated_length": 605.03759765625, "completions/min_length": 172.0, "completions/min_terminated_length": 172.0, "epoch": 0.048253212700301584, "grad_norm": 0.10941430181264877, "learning_rate": 2e-07, "loss": 0.0183, "num_tokens": 329977671.0, "reward": 0.5078125, "reward_std": 0.18709047138690948, "rewards/simpleverify_reward/mean": 0.5078125, "rewards/simpleverify_reward/std": 0.5002182126045227, "step": 517 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3972.0, "completions/mean_length": 619.4944458007812, "completions/mean_terminated_length": 580.2562255859375, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 0.04834654580030216, "grad_norm": 0.12357570976018906, "learning_rate": 2e-07, "loss": 0.0389, "num_tokens": 330628018.0, "reward": 0.5401785969734192, "reward_std": 0.22338244318962097, "rewards/simpleverify_reward/mean": 0.5401785969734192, "rewards/simpleverify_reward/std": 0.49866142868995667, "step": 518 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2787.0, "completions/mean_length": 578.2076416015625, "completions/mean_terminated_length": 534.483642578125, "completions/min_length": 105.0, "completions/min_terminated_length": 105.0, "epoch": 0.04843987890030275, "grad_norm": 0.11210503429174423, "learning_rate": 2e-07, "loss": 0.015, "num_tokens": 331231700.0, "reward": 0.5580357313156128, "reward_std": 0.17156507074832916, "rewards/simpleverify_reward/mean": 0.5580357313156128, "rewards/simpleverify_reward/std": 0.49689781665802, "step": 519 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 4042.0, "completions/mean_length": 624.591552734375, "completions/mean_terminated_length": 573.4835815429688, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 0.04853321200030333, "grad_norm": 0.12524333596229553, "learning_rate": 2e-07, "loss": 0.0364, "num_tokens": 331881366.0, "reward": 0.5859375, "reward_std": 0.22094543278217316, "rewards/simpleverify_reward/mean": 0.5859375, "rewards/simpleverify_reward/std": 0.4928344786167145, "step": 520 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3575.0, "completions/mean_length": 615.5614013671875, "completions/mean_terminated_length": 564.3204956054688, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 0.04862654510030392, "grad_norm": 0.1329009234905243, "learning_rate": 2e-07, "loss": 0.0296, "num_tokens": 332513389.0, "reward": 0.5870535969734192, "reward_std": 0.26712271571159363, "rewards/simpleverify_reward/mean": 0.5870535969734192, "rewards/simpleverify_reward/std": 0.49263837933540344, "step": 521 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2690.0, "completions/mean_length": 675.6160888671875, "completions/mean_terminated_length": 601.5142211914062, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 0.048719878200304496, "grad_norm": 0.10639889538288116, "learning_rate": 2e-07, "loss": 0.0417, "num_tokens": 333214565.0, "reward": 0.5580357313156128, "reward_std": 0.20868511497974396, "rewards/simpleverify_reward/mean": 0.5580357313156128, "rewards/simpleverify_reward/std": 0.49689778685569763, "step": 522 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.024553571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3895.0, "completions/mean_length": 679.6239013671875, "completions/mean_terminated_length": 593.6281127929688, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 0.04881321130030508, "grad_norm": 0.1092965230345726, "learning_rate": 2e-07, "loss": 0.0238, "num_tokens": 333915372.0, "reward": 0.5379464626312256, "reward_std": 0.19554008543491364, "rewards/simpleverify_reward/mean": 0.5379464030265808, "rewards/simpleverify_reward/std": 0.4988364279270172, "step": 523 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.024553571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3461.0, "completions/mean_length": 685.0714721679688, "completions/mean_terminated_length": 599.2127685546875, "completions/min_length": 121.0, "completions/min_terminated_length": 121.0, "epoch": 0.04890654440030567, "grad_norm": 0.11850214749574661, "learning_rate": 2e-07, "loss": 0.0523, "num_tokens": 334615092.0, "reward": 0.515625, "reward_std": 0.2358585149049759, "rewards/simpleverify_reward/mean": 0.515625, "rewards/simpleverify_reward/std": 0.5000349283218384, "step": 524 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.004464285714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3960.0, "completions/mean_length": 594.685302734375, "completions/mean_terminated_length": 578.9843139648438, "completions/min_length": 119.0, "completions/min_terminated_length": 119.0, "epoch": 0.04899987750030625, "grad_norm": 0.11345477402210236, "learning_rate": 2e-07, "loss": 0.0129, "num_tokens": 335236626.0, "reward": 0.5714285969734192, "reward_std": 0.1625838577747345, "rewards/simpleverify_reward/mean": 0.5714285969734192, "rewards/simpleverify_reward/std": 0.49514803290367126, "step": 525 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3332.0, "completions/mean_length": 612.6741333007812, "completions/mean_terminated_length": 549.3408813476562, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 0.04909321060030683, "grad_norm": 0.11742356419563293, "learning_rate": 2e-07, "loss": 0.0321, "num_tokens": 335873318.0, "reward": 0.5848214626312256, "reward_std": 0.1854286938905716, "rewards/simpleverify_reward/mean": 0.5848214030265808, "rewards/simpleverify_reward/std": 0.49302801489830017, "step": 526 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3326.0, "completions/mean_length": 619.0904541015625, "completions/mean_terminated_length": 567.9014282226562, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 0.049186543700307415, "grad_norm": 0.12560254335403442, "learning_rate": 2e-07, "loss": 0.0249, "num_tokens": 336522935.0, "reward": 0.5446428656578064, "reward_std": 0.21121451258659363, "rewards/simpleverify_reward/mean": 0.5446428656578064, "rewards/simpleverify_reward/std": 0.49828118085861206, "step": 527 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3407.0, "completions/mean_length": 606.0324096679688, "completions/mean_terminated_length": 562.6542358398438, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 0.049279876800308, "grad_norm": 0.12257523834705353, "learning_rate": 2e-07, "loss": 0.0209, "num_tokens": 337157972.0, "reward": 0.5837053656578064, "reward_std": 0.22117073833942413, "rewards/simpleverify_reward/mean": 0.5837053656578064, "rewards/simpleverify_reward/std": 0.49321895837783813, "step": 528 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 4039.0, "completions/mean_length": 619.5502319335938, "completions/mean_terminated_length": 556.342041015625, "completions/min_length": 88.0, "completions/min_terminated_length": 88.0, "epoch": 0.049373209900308586, "grad_norm": 0.12803836166858673, "learning_rate": 2e-07, "loss": 0.0339, "num_tokens": 337791881.0, "reward": 0.5491071939468384, "reward_std": 0.23195292055606842, "rewards/simpleverify_reward/mean": 0.5491071343421936, "rewards/simpleverify_reward/std": 0.49786055088043213, "step": 529 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3974.0, "completions/mean_length": 608.4308471679688, "completions/mean_terminated_length": 557.0848999023438, "completions/min_length": 164.0, "completions/min_terminated_length": 164.0, "epoch": 0.049466543000309164, "grad_norm": 0.11878123134374619, "learning_rate": 2e-07, "loss": 0.0146, "num_tokens": 338426611.0, "reward": 0.6261160969734192, "reward_std": 0.22059917449951172, "rewards/simpleverify_reward/mean": 0.6261160969734192, "rewards/simpleverify_reward/std": 0.48410359025001526, "step": 530 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.004464285714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 4019.0, "completions/mean_length": 546.302490234375, "completions/mean_terminated_length": 530.3845825195312, "completions/min_length": 157.0, "completions/min_terminated_length": 157.0, "epoch": 0.04955987610030975, "grad_norm": 0.12894637882709503, "learning_rate": 2e-07, "loss": 0.0341, "num_tokens": 338997322.0, "reward": 0.6026785969734192, "reward_std": 0.17829149961471558, "rewards/simpleverify_reward/mean": 0.6026785969734192, "rewards/simpleverify_reward/std": 0.48961687088012695, "step": 531 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3797.0, "completions/mean_length": 692.0156860351562, "completions/mean_terminated_length": 622.2301025390625, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 0.049653209200310335, "grad_norm": 0.11384526640176773, "learning_rate": 2e-07, "loss": 0.0163, "num_tokens": 339704824.0, "reward": 0.5234375, "reward_std": 0.2183828055858612, "rewards/simpleverify_reward/mean": 0.5234375, "rewards/simpleverify_reward/std": 0.49972933530807495, "step": 532 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 4020.0, "completions/mean_length": 540.8984375, "completions/mean_terminated_length": 512.905517578125, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 0.04974654230031091, "grad_norm": 0.14210230112075806, "learning_rate": 2e-07, "loss": 0.0215, "num_tokens": 340276341.0, "reward": 0.590401828289032, "reward_std": 0.2078631967306137, "rewards/simpleverify_reward/mean": 0.5904017686843872, "rewards/simpleverify_reward/std": 0.49203425645828247, "step": 533 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3600.0, "completions/mean_length": 586.0022583007812, "completions/mean_terminated_length": 530.2879638671875, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 0.0498398754003115, "grad_norm": 0.127777099609375, "learning_rate": 2e-07, "loss": 0.0189, "num_tokens": 340884095.0, "reward": 0.5301339626312256, "reward_std": 0.18442019820213318, "rewards/simpleverify_reward/mean": 0.5301339030265808, "rewards/simpleverify_reward/std": 0.49936985969543457, "step": 534 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3196.0, "completions/mean_length": 632.5592041015625, "completions/mean_terminated_length": 561.5546875, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 0.04993320850031208, "grad_norm": 0.11997145414352417, "learning_rate": 2e-07, "loss": 0.0163, "num_tokens": 341531236.0, "reward": 0.5602678656578064, "reward_std": 0.21286283433437347, "rewards/simpleverify_reward/mean": 0.5602678656578064, "rewards/simpleverify_reward/std": 0.4966317117214203, "step": 535 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3713.0, "completions/mean_length": 606.3381958007812, "completions/mean_terminated_length": 574.8997802734375, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 0.05002654160031267, "grad_norm": 0.1221434473991394, "learning_rate": 2e-07, "loss": 0.0297, "num_tokens": 342163323.0, "reward": 0.5345982313156128, "reward_std": 0.17623315751552582, "rewards/simpleverify_reward/mean": 0.5345982313156128, "rewards/simpleverify_reward/std": 0.4990801215171814, "step": 536 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 2058.0, "completions/mean_length": 603.9107666015625, "completions/mean_terminated_length": 576.4140014648438, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 0.05011987470031325, "grad_norm": 0.12531979382038116, "learning_rate": 2e-07, "loss": 0.0261, "num_tokens": 342791667.0, "reward": 0.5770089626312256, "reward_std": 0.23855990171432495, "rewards/simpleverify_reward/mean": 0.5770089030265808, "rewards/simpleverify_reward/std": 0.4943099319934845, "step": 537 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.006696428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2666.0, "completions/mean_length": 568.8326416015625, "completions/mean_terminated_length": 545.053955078125, "completions/min_length": 163.0, "completions/min_terminated_length": 163.0, "epoch": 0.05021320780031383, "grad_norm": 0.12157143652439117, "learning_rate": 2e-07, "loss": 0.0135, "num_tokens": 343391053.0, "reward": 0.582589328289032, "reward_std": 0.1982831209897995, "rewards/simpleverify_reward/mean": 0.5825892686843872, "rewards/simpleverify_reward/std": 0.493407279253006, "step": 538 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3539.0, "completions/mean_length": 697.7656860351562, "completions/mean_terminated_length": 616.2080078125, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 0.05030654090031442, "grad_norm": 0.11397805064916611, "learning_rate": 2e-07, "loss": 0.0364, "num_tokens": 344108195.0, "reward": 0.5245535969734192, "reward_std": 0.2296217381954193, "rewards/simpleverify_reward/mean": 0.5245535969734192, "rewards/simpleverify_reward/std": 0.4996756613254547, "step": 539 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0267857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4084.0, "completions/mean_length": 659.5692138671875, "completions/mean_terminated_length": 564.988525390625, "completions/min_length": 5.0, "completions/min_terminated_length": 5.0, "epoch": 0.050399874000315, "grad_norm": 0.12903417646884918, "learning_rate": 2e-07, "loss": 0.0419, "num_tokens": 344795297.0, "reward": 0.5, "reward_std": 0.2292751669883728, "rewards/simpleverify_reward/mean": 0.5, "rewards/simpleverify_reward/std": 0.5002792477607727, "step": 540 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.006696428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2098.0, "completions/mean_length": 561.458740234375, "completions/mean_terminated_length": 537.63037109375, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 0.05049320710031558, "grad_norm": 0.12681055068969727, "learning_rate": 2e-07, "loss": 0.0261, "num_tokens": 345386012.0, "reward": 0.5848214626312256, "reward_std": 0.2004634588956833, "rewards/simpleverify_reward/mean": 0.5848214030265808, "rewards/simpleverify_reward/std": 0.49302801489830017, "step": 541 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3070.0, "completions/mean_length": 606.8839721679688, "completions/mean_terminated_length": 547.4779052734375, "completions/min_length": 120.0, "completions/min_terminated_length": 120.0, "epoch": 0.050586540200316166, "grad_norm": 0.11643163859844208, "learning_rate": 2e-07, "loss": 0.0126, "num_tokens": 346021652.0, "reward": 0.5613839626312256, "reward_std": 0.178443044424057, "rewards/simpleverify_reward/mean": 0.5613839030265808, "rewards/simpleverify_reward/std": 0.496494859457016, "step": 542 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.005580357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2721.0, "completions/mean_length": 561.607177734375, "completions/mean_terminated_length": 541.7733154296875, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 0.05067987330031675, "grad_norm": 0.11725421249866486, "learning_rate": 2e-07, "loss": 0.0176, "num_tokens": 346621860.0, "reward": 0.6071428656578064, "reward_std": 0.1692018061876297, "rewards/simpleverify_reward/mean": 0.6071428656578064, "rewards/simpleverify_reward/std": 0.48865827918052673, "step": 543 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.024553571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3936.0, "completions/mean_length": 614.4642944335938, "completions/mean_terminated_length": 526.828369140625, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 0.05077320640031733, "grad_norm": 0.12071819603443146, "learning_rate": 2e-07, "loss": 0.0431, "num_tokens": 347259428.0, "reward": 0.652901828289032, "reward_std": 0.18039585649967194, "rewards/simpleverify_reward/mean": 0.6529017686843872, "rewards/simpleverify_reward/std": 0.47631317377090454, "step": 544 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 4096.0, "completions/max_terminated_length": 2533.0, "completions/mean_length": 641.7455444335938, "completions/mean_terminated_length": 558.8434448242188, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 0.050866539500317914, "grad_norm": 0.10964108258485794, "learning_rate": 2e-07, "loss": 0.0162, "num_tokens": 347928232.0, "reward": 0.5926339626312256, "reward_std": 0.19102789461612701, "rewards/simpleverify_reward/mean": 0.5926339030265808, "rewards/simpleverify_reward/std": 0.49161845445632935, "step": 545 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3397.0, "completions/mean_length": 623.0546875, "completions/mean_terminated_length": 579.8881225585938, "completions/min_length": 111.0, "completions/min_terminated_length": 111.0, "epoch": 0.0509598726003185, "grad_norm": 0.12145282328128815, "learning_rate": 2e-07, "loss": 0.0104, "num_tokens": 348579121.0, "reward": 0.559151828289032, "reward_std": 0.2023439109325409, "rewards/simpleverify_reward/mean": 0.5591517686843872, "rewards/simpleverify_reward/std": 0.496766060590744, "step": 546 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2467.0, "completions/mean_length": 643.6663208007812, "completions/mean_terminated_length": 596.8020629882812, "completions/min_length": 119.0, "completions/min_terminated_length": 119.0, "epoch": 0.051053205700319085, "grad_norm": 0.12508799135684967, "learning_rate": 2e-07, "loss": 0.0378, "num_tokens": 349253054.0, "reward": 0.478794664144516, "reward_std": 0.23909051716327667, "rewards/simpleverify_reward/mean": 0.4787946343421936, "rewards/simpleverify_reward/std": 0.49982911348342896, "step": 547 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2196.0, "completions/mean_length": 629.3984375, "completions/mean_terminated_length": 562.3538208007812, "completions/min_length": 87.0, "completions/min_terminated_length": 87.0, "epoch": 0.05114653880031966, "grad_norm": 0.11566802859306335, "learning_rate": 2e-07, "loss": 0.0184, "num_tokens": 349902467.0, "reward": 0.5602678656578064, "reward_std": 0.193696066737175, "rewards/simpleverify_reward/mean": 0.5602678656578064, "rewards/simpleverify_reward/std": 0.4966317415237427, "step": 548 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 2064.0, "completions/mean_length": 596.404052734375, "completions/mean_terminated_length": 556.9052124023438, "completions/min_length": 117.0, "completions/min_terminated_length": 117.0, "epoch": 0.05123987190032025, "grad_norm": 0.12139381468296051, "learning_rate": 2e-07, "loss": 0.0278, "num_tokens": 350519781.0, "reward": 0.598214328289032, "reward_std": 0.2144344449043274, "rewards/simpleverify_reward/mean": 0.5982142686843872, "rewards/simpleverify_reward/std": 0.49053287506103516, "step": 549 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2617.0, "completions/mean_length": 620.0234375, "completions/mean_terminated_length": 560.8411254882812, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 0.05133320500032083, "grad_norm": 0.1131553202867508, "learning_rate": 2e-07, "loss": 0.0238, "num_tokens": 351157082.0, "reward": 0.5379464626312256, "reward_std": 0.18888893723487854, "rewards/simpleverify_reward/mean": 0.5379464030265808, "rewards/simpleverify_reward/std": 0.4988364279270172, "step": 550 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0267857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3725.0, "completions/mean_length": 673.2064819335938, "completions/mean_terminated_length": 579.0010986328125, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 0.05142653810032142, "grad_norm": 0.12076769024133682, "learning_rate": 2e-07, "loss": 0.0289, "num_tokens": 351852971.0, "reward": 0.5267857313156128, "reward_std": 0.20347748696804047, "rewards/simpleverify_reward/mean": 0.5267857313156128, "rewards/simpleverify_reward/std": 0.4995608627796173, "step": 551 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3134.0, "completions/mean_length": 606.1908569335938, "completions/mean_terminated_length": 566.802490234375, "completions/min_length": 86.0, "completions/min_terminated_length": 86.0, "epoch": 0.051519871200322, "grad_norm": 0.11417798697948456, "learning_rate": 2e-07, "loss": 0.0149, "num_tokens": 352482230.0, "reward": 0.543526828289032, "reward_std": 0.18550579249858856, "rewards/simpleverify_reward/mean": 0.5435267686843872, "rewards/simpleverify_reward/std": 0.49838003516197205, "step": 552 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3862.0, "completions/mean_length": 652.1339721679688, "completions/mean_terminated_length": 573.5068359375, "completions/min_length": 157.0, "completions/min_terminated_length": 157.0, "epoch": 0.05161320430032258, "grad_norm": 0.12550392746925354, "learning_rate": 2e-07, "loss": 0.0383, "num_tokens": 353158934.0, "reward": 0.5870535969734192, "reward_std": 0.20639853179454803, "rewards/simpleverify_reward/mean": 0.5870535969734192, "rewards/simpleverify_reward/std": 0.49263834953308105, "step": 553 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3541.0, "completions/mean_length": 578.107177734375, "completions/mean_terminated_length": 538.4017944335938, "completions/min_length": 103.0, "completions/min_terminated_length": 103.0, "epoch": 0.05170653740032317, "grad_norm": 0.11646903306245804, "learning_rate": 2e-07, "loss": 0.0206, "num_tokens": 353762526.0, "reward": 0.5658482313156128, "reward_std": 0.19061487913131714, "rewards/simpleverify_reward/mean": 0.5658482313156128, "rewards/simpleverify_reward/std": 0.49592188000679016, "step": 554 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3957.0, "completions/mean_length": 613.1640625, "completions/mean_terminated_length": 565.8857421875, "completions/min_length": 157.0, "completions/min_terminated_length": 157.0, "epoch": 0.05179987050032375, "grad_norm": 0.11878780275583267, "learning_rate": 2e-07, "loss": 0.0173, "num_tokens": 354402929.0, "reward": 0.5446428656578064, "reward_std": 0.20451989769935608, "rewards/simpleverify_reward/mean": 0.5446428656578064, "rewards/simpleverify_reward/std": 0.49828118085861206, "step": 555 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3903.0, "completions/mean_length": 602.4464721679688, "completions/mean_terminated_length": 555.0226440429688, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 0.05189320360032433, "grad_norm": 0.12419543415307999, "learning_rate": 2e-07, "loss": 0.0265, "num_tokens": 355031057.0, "reward": 0.578125, "reward_std": 0.19723327457904816, "rewards/simpleverify_reward/mean": 0.578125, "rewards/simpleverify_reward/std": 0.4941346049308777, "step": 556 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 4037.0, "completions/mean_length": 615.7745971679688, "completions/mean_terminated_length": 556.5198974609375, "completions/min_length": 103.0, "completions/min_terminated_length": 103.0, "epoch": 0.051986536700324916, "grad_norm": 0.11977619677782059, "learning_rate": 2e-07, "loss": 0.0294, "num_tokens": 355672031.0, "reward": 0.5814732313156128, "reward_std": 0.19723255932331085, "rewards/simpleverify_reward/mean": 0.5814732313156128, "rewards/simpleverify_reward/std": 0.4935929775238037, "step": 557 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3708.0, "completions/mean_length": 585.1629638671875, "completions/mean_terminated_length": 549.5399780273438, "completions/min_length": 96.0, "completions/min_terminated_length": 96.0, "epoch": 0.0520798698003255, "grad_norm": 0.12382011860609055, "learning_rate": 2e-07, "loss": 0.0128, "num_tokens": 356280705.0, "reward": 0.5892857313156128, "reward_std": 0.18851056694984436, "rewards/simpleverify_reward/mean": 0.5892857313156128, "rewards/simpleverify_reward/std": 0.49223825335502625, "step": 558 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3911.0, "completions/mean_length": 688.7745971679688, "completions/mean_terminated_length": 614.957763671875, "completions/min_length": 85.0, "completions/min_terminated_length": 85.0, "epoch": 0.05217320290032608, "grad_norm": 0.126596137881279, "learning_rate": 2e-07, "loss": 0.0269, "num_tokens": 356993967.0, "reward": 0.5078125, "reward_std": 0.23289088904857635, "rewards/simpleverify_reward/mean": 0.5078125, "rewards/simpleverify_reward/std": 0.5002182126045227, "step": 559 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2390.0, "completions/mean_length": 597.6674194335938, "completions/mean_terminated_length": 562.1713256835938, "completions/min_length": 172.0, "completions/min_terminated_length": 172.0, "epoch": 0.052266536000326665, "grad_norm": 0.11852888017892838, "learning_rate": 2e-07, "loss": 0.0275, "num_tokens": 357620941.0, "reward": 0.5390625, "reward_std": 0.2159096598625183, "rewards/simpleverify_reward/mean": 0.5390625, "rewards/simpleverify_reward/std": 0.4987502098083496, "step": 560 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.005580357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3970.0, "completions/mean_length": 508.0792541503906, "completions/mean_terminated_length": 487.9450378417969, "completions/min_length": 34.0, "completions/min_terminated_length": 34.0, "epoch": 0.05235986910032725, "grad_norm": 0.15240824222564697, "learning_rate": 2e-07, "loss": 0.0425, "num_tokens": 358154668.0, "reward": 0.6350446939468384, "reward_std": 0.19508682191371918, "rewards/simpleverify_reward/mean": 0.6350446343421936, "rewards/simpleverify_reward/std": 0.4816865026950836, "step": 561 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3237.0, "completions/mean_length": 609.646240234375, "completions/mean_terminated_length": 566.31298828125, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 0.052453202200327835, "grad_norm": 0.11604447662830353, "learning_rate": 2e-07, "loss": 0.0172, "num_tokens": 358787927.0, "reward": 0.606026828289032, "reward_std": 0.16953741014003754, "rewards/simpleverify_reward/mean": 0.6060267686843872, "rewards/simpleverify_reward/std": 0.48890194296836853, "step": 562 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2265.0, "completions/mean_length": 607.1295166015625, "completions/mean_terminated_length": 555.764404296875, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 0.05254653530032841, "grad_norm": 0.11798493564128876, "learning_rate": 2e-07, "loss": 0.0252, "num_tokens": 359430755.0, "reward": 0.543526828289032, "reward_std": 0.20377102494239807, "rewards/simpleverify_reward/mean": 0.5435267686843872, "rewards/simpleverify_reward/std": 0.49838000535964966, "step": 563 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2998.0, "completions/mean_length": 580.7210083007812, "completions/mean_terminated_length": 549.0518188476562, "completions/min_length": 114.0, "completions/min_terminated_length": 114.0, "epoch": 0.052639868400329, "grad_norm": 0.13548308610916138, "learning_rate": 2e-07, "loss": 0.0192, "num_tokens": 360038681.0, "reward": 0.5558035969734192, "reward_std": 0.20917478203773499, "rewards/simpleverify_reward/mean": 0.5558035969734192, "rewards/simpleverify_reward/std": 0.49715369939804077, "step": 564 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3959.0, "completions/mean_length": 569.4163208007812, "completions/mean_terminated_length": 541.64794921875, "completions/min_length": 161.0, "completions/min_terminated_length": 161.0, "epoch": 0.052733201500329584, "grad_norm": 0.12500417232513428, "learning_rate": 2e-07, "loss": 0.025, "num_tokens": 360647310.0, "reward": 0.582589328289032, "reward_std": 0.1784423440694809, "rewards/simpleverify_reward/mean": 0.5825892686843872, "rewards/simpleverify_reward/std": 0.4934072494506836, "step": 565 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2537.0, "completions/mean_length": 620.3616333007812, "completions/mean_terminated_length": 565.1927490234375, "completions/min_length": 27.0, "completions/min_terminated_length": 27.0, "epoch": 0.05282653460033017, "grad_norm": 0.11948613077402115, "learning_rate": 2e-07, "loss": 0.033, "num_tokens": 361294858.0, "reward": 0.5479910969734192, "reward_std": 0.2084682732820511, "rewards/simpleverify_reward/mean": 0.5479910969734192, "rewards/simpleverify_reward/std": 0.49796947836875916, "step": 566 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3981.0, "completions/mean_length": 647.7098388671875, "completions/mean_terminated_length": 600.9004516601562, "completions/min_length": 120.0, "completions/min_terminated_length": 120.0, "epoch": 0.05291986770033075, "grad_norm": 0.1160811185836792, "learning_rate": 2e-07, "loss": 0.006, "num_tokens": 361968622.0, "reward": 0.5770089626312256, "reward_std": 0.19779528677463531, "rewards/simpleverify_reward/mean": 0.5770089030265808, "rewards/simpleverify_reward/std": 0.4943099617958069, "step": 567 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3052.0, "completions/mean_length": 581.1685791015625, "completions/mean_terminated_length": 537.4813842773438, "completions/min_length": 107.0, "completions/min_terminated_length": 107.0, "epoch": 0.05301320080033133, "grad_norm": 0.12859226763248444, "learning_rate": 2e-07, "loss": 0.0204, "num_tokens": 362574285.0, "reward": 0.6194196939468384, "reward_std": 0.19779780507087708, "rewards/simpleverify_reward/mean": 0.6194196343421936, "rewards/simpleverify_reward/std": 0.48580074310302734, "step": 568 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3123.0, "completions/mean_length": 582.474365234375, "completions/mean_terminated_length": 522.6527099609375, "completions/min_length": 27.0, "completions/min_terminated_length": 27.0, "epoch": 0.05310653390033192, "grad_norm": 0.1206410825252533, "learning_rate": 2e-07, "loss": 0.016, "num_tokens": 363185478.0, "reward": 0.578125, "reward_std": 0.19854526221752167, "rewards/simpleverify_reward/mean": 0.578125, "rewards/simpleverify_reward/std": 0.4941346049308777, "step": 569 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3731.0, "completions/mean_length": 606.739990234375, "completions/mean_terminated_length": 555.3692016601562, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 0.053199867000332496, "grad_norm": 0.12759262323379517, "learning_rate": 2e-07, "loss": 0.0118, "num_tokens": 363814149.0, "reward": 0.5870535969734192, "reward_std": 0.22698719799518585, "rewards/simpleverify_reward/mean": 0.5870535969734192, "rewards/simpleverify_reward/std": 0.49263837933540344, "step": 570 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 2866.0, "completions/mean_length": 623.0714721679688, "completions/mean_terminated_length": 583.8735961914062, "completions/min_length": 92.0, "completions/min_terminated_length": 92.0, "epoch": 0.05329320010033308, "grad_norm": 0.12601329386234283, "learning_rate": 2e-07, "loss": 0.0322, "num_tokens": 364469277.0, "reward": 0.5558035969734192, "reward_std": 0.2428891807794571, "rewards/simpleverify_reward/mean": 0.5558035969734192, "rewards/simpleverify_reward/std": 0.49715372920036316, "step": 571 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 4086.0, "completions/mean_length": 698.6361694335938, "completions/mean_terminated_length": 621.0707397460938, "completions/min_length": 110.0, "completions/min_terminated_length": 110.0, "epoch": 0.053386533200333666, "grad_norm": 0.10547295957803726, "learning_rate": 2e-07, "loss": 0.0199, "num_tokens": 365187423.0, "reward": 0.4776785969734192, "reward_std": 0.21647420525550842, "rewards/simpleverify_reward/mean": 0.4776785671710968, "rewards/simpleverify_reward/std": 0.4997805058956146, "step": 572 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2132.0, "completions/mean_length": 581.7310791015625, "completions/mean_terminated_length": 538.0508422851562, "completions/min_length": 62.0, "completions/min_terminated_length": 62.0, "epoch": 0.05347986630033425, "grad_norm": 0.12488096207380295, "learning_rate": 2e-07, "loss": 0.004, "num_tokens": 365801190.0, "reward": 0.5535714626312256, "reward_std": 0.18866251409053802, "rewards/simpleverify_reward/mean": 0.5535714030265808, "rewards/simpleverify_reward/std": 0.4973995089530945, "step": 573 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3381.0, "completions/mean_length": 648.6015625, "completions/mean_terminated_length": 597.8471069335938, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 0.05357319940033483, "grad_norm": 0.12697665393352509, "learning_rate": 2e-07, "loss": 0.0303, "num_tokens": 366464257.0, "reward": 0.5368303656578064, "reward_std": 0.22770145535469055, "rewards/simpleverify_reward/mean": 0.5368303656578064, "rewards/simpleverify_reward/std": 0.49892017245292664, "step": 574 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2087.0, "completions/mean_length": 625.5736694335938, "completions/mean_terminated_length": 566.48583984375, "completions/min_length": 81.0, "completions/min_terminated_length": 81.0, "epoch": 0.053666532500335415, "grad_norm": 0.12527042627334595, "learning_rate": 2e-07, "loss": 0.0358, "num_tokens": 367109211.0, "reward": 0.566964328289032, "reward_std": 0.21421580016613007, "rewards/simpleverify_reward/mean": 0.5669642686843872, "rewards/simpleverify_reward/std": 0.49577224254608154, "step": 575 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3425.0, "completions/mean_length": 618.2142944335938, "completions/mean_terminated_length": 559.0011596679688, "completions/min_length": 112.0, "completions/min_terminated_length": 112.0, "epoch": 0.053759865600336, "grad_norm": 0.11679767072200775, "learning_rate": 2e-07, "loss": 0.0159, "num_tokens": 367751931.0, "reward": 0.5301339626312256, "reward_std": 0.18370412290096283, "rewards/simpleverify_reward/mean": 0.5301339030265808, "rewards/simpleverify_reward/std": 0.49936985969543457, "step": 576 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2316.0, "completions/mean_length": 612.6395263671875, "completions/mean_terminated_length": 553.3314819335938, "completions/min_length": 161.0, "completions/min_terminated_length": 161.0, "epoch": 0.053853198700336585, "grad_norm": 0.11011343449354172, "learning_rate": 2e-07, "loss": 0.0216, "num_tokens": 368379456.0, "reward": 0.5970982313156128, "reward_std": 0.20357558131217957, "rewards/simpleverify_reward/mean": 0.5970982313156128, "rewards/simpleverify_reward/std": 0.4907552897930145, "step": 577 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3315.0, "completions/mean_length": 612.9152221679688, "completions/mean_terminated_length": 569.6226196289062, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 0.053946531800337164, "grad_norm": 0.1258300393819809, "learning_rate": 2e-07, "loss": 0.03, "num_tokens": 369008580.0, "reward": 0.5714285969734192, "reward_std": 0.232962965965271, "rewards/simpleverify_reward/mean": 0.5714285969734192, "rewards/simpleverify_reward/std": 0.49514803290367126, "step": 578 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 4096.0, "completions/max_terminated_length": 2170.0, "completions/mean_length": 658.4152221679688, "completions/mean_terminated_length": 575.9131469726562, "completions/min_length": 122.0, "completions/min_terminated_length": 122.0, "epoch": 0.05403986490033775, "grad_norm": 0.11655788868665695, "learning_rate": 2e-07, "loss": 0.0305, "num_tokens": 369684920.0, "reward": 0.5267857313156128, "reward_std": 0.1899747997522354, "rewards/simpleverify_reward/mean": 0.5267857313156128, "rewards/simpleverify_reward/std": 0.4995608627796173, "step": 579 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 4096.0, "completions/max_terminated_length": 2599.0, "completions/mean_length": 670.40625, "completions/mean_terminated_length": 588.1920166015625, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 0.054133198000338334, "grad_norm": 0.11913115531206131, "learning_rate": 2e-07, "loss": 0.0271, "num_tokens": 370377716.0, "reward": 0.5178571939468384, "reward_std": 0.2112809270620346, "rewards/simpleverify_reward/mean": 0.5178571343421936, "rewards/simpleverify_reward/std": 0.4999600946903229, "step": 580 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3386.0, "completions/mean_length": 633.3761596679688, "completions/mean_terminated_length": 590.337890625, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 0.05422653110033892, "grad_norm": 0.11710347980260849, "learning_rate": 2e-07, "loss": 0.0347, "num_tokens": 371038037.0, "reward": 0.5223214626312256, "reward_std": 0.20433583855628967, "rewards/simpleverify_reward/mean": 0.5223214030265808, "rewards/simpleverify_reward/std": 0.49978047609329224, "step": 581 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0033482142857143016, "completions/max_length": 4096.0, "completions/max_terminated_length": 3468.0, "completions/mean_length": 533.6908569335938, "completions/mean_terminated_length": 521.7234497070312, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 0.0543198642003395, "grad_norm": 0.13417790830135345, "learning_rate": 2e-07, "loss": 0.0188, "num_tokens": 371599400.0, "reward": 0.6484375, "reward_std": 0.20110353827476501, "rewards/simpleverify_reward/mean": 0.6484375, "rewards/simpleverify_reward/std": 0.4777248501777649, "step": 582 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3593.0, "completions/mean_length": 628.677490234375, "completions/mean_terminated_length": 561.618896484375, "completions/min_length": 110.0, "completions/min_terminated_length": 110.0, "epoch": 0.05441319730034008, "grad_norm": 0.13010862469673157, "learning_rate": 2e-07, "loss": 0.0356, "num_tokens": 372253079.0, "reward": 0.6049107313156128, "reward_std": 0.21394728124141693, "rewards/simpleverify_reward/mean": 0.6049107313156128, "rewards/simpleverify_reward/std": 0.48914292454719543, "step": 583 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 1890.0, "completions/mean_length": 592.458740234375, "completions/mean_terminated_length": 548.911865234375, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 0.05450653040034067, "grad_norm": 0.11604160815477371, "learning_rate": 2e-07, "loss": 0.0232, "num_tokens": 372881354.0, "reward": 0.5569196939468384, "reward_std": 0.17217418551445007, "rewards/simpleverify_reward/mean": 0.5569196343421936, "rewards/simpleverify_reward/std": 0.4970270097255707, "step": 584 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2274.0, "completions/mean_length": 623.2042846679688, "completions/mean_terminated_length": 568.0805053710938, "completions/min_length": 179.0, "completions/min_terminated_length": 179.0, "epoch": 0.054599863500341246, "grad_norm": 0.12564797699451447, "learning_rate": 2e-07, "loss": 0.0256, "num_tokens": 373532297.0, "reward": 0.5178571939468384, "reward_std": 0.2185368835926056, "rewards/simpleverify_reward/mean": 0.5178571343421936, "rewards/simpleverify_reward/std": 0.4999600946903229, "step": 585 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2900.0, "completions/mean_length": 626.1239013671875, "completions/mean_terminated_length": 567.04541015625, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 0.05469319660034183, "grad_norm": 0.1198698952794075, "learning_rate": 2e-07, "loss": 0.0188, "num_tokens": 374173048.0, "reward": 0.6104910969734192, "reward_std": 0.21996724605560303, "rewards/simpleverify_reward/mean": 0.6104910969734192, "rewards/simpleverify_reward/std": 0.48791125416755676, "step": 586 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3036.0, "completions/mean_length": 588.9364013671875, "completions/mean_terminated_length": 557.3412475585938, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 0.054786529700342416, "grad_norm": 0.12188099324703217, "learning_rate": 2e-07, "loss": 0.0096, "num_tokens": 374795863.0, "reward": 0.6049107313156128, "reward_std": 0.19415001571178436, "rewards/simpleverify_reward/mean": 0.6049107313156128, "rewards/simpleverify_reward/std": 0.48914292454719543, "step": 587 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.029017857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3569.0, "completions/mean_length": 659.2064819335938, "completions/mean_terminated_length": 556.4976806640625, "completions/min_length": 133.0, "completions/min_terminated_length": 133.0, "epoch": 0.054879862800343, "grad_norm": 0.13495682179927826, "learning_rate": 2e-07, "loss": 0.0372, "num_tokens": 375483144.0, "reward": 0.535714328289032, "reward_std": 0.23157566785812378, "rewards/simpleverify_reward/mean": 0.5357142686843872, "rewards/simpleverify_reward/std": 0.4990014135837555, "step": 588 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3786.0, "completions/mean_length": 615.6964721679688, "completions/mean_terminated_length": 576.4153442382812, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 0.05497319590034358, "grad_norm": 0.10832487046718597, "learning_rate": 2e-07, "loss": 0.0191, "num_tokens": 376119232.0, "reward": 0.5178571939468384, "reward_std": 0.16920039057731628, "rewards/simpleverify_reward/mean": 0.5178571343421936, "rewards/simpleverify_reward/std": 0.4999600946903229, "step": 589 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3856.0, "completions/mean_length": 613.9486694335938, "completions/mean_terminated_length": 542.5626831054688, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 0.055066529000344165, "grad_norm": 0.132821723818779, "learning_rate": 2e-07, "loss": 0.0251, "num_tokens": 376761570.0, "reward": 0.551339328289032, "reward_std": 0.2078292816877365, "rewards/simpleverify_reward/mean": 0.5513392686843872, "rewards/simpleverify_reward/std": 0.4976350665092468, "step": 590 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2583.0, "completions/mean_length": 658.578125, "completions/mean_terminated_length": 607.9705200195312, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 0.05515986210034475, "grad_norm": 0.10809297859668732, "learning_rate": 2e-07, "loss": 0.0193, "num_tokens": 377441696.0, "reward": 0.5301339626312256, "reward_std": 0.18952901661396027, "rewards/simpleverify_reward/mean": 0.5301339030265808, "rewards/simpleverify_reward/std": 0.49936985969543457, "step": 591 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 4087.0, "completions/mean_length": 647.2611694335938, "completions/mean_terminated_length": 568.5228271484375, "completions/min_length": 120.0, "completions/min_terminated_length": 120.0, "epoch": 0.055253195200345336, "grad_norm": 0.13551926612854004, "learning_rate": 2e-07, "loss": 0.026, "num_tokens": 378110714.0, "reward": 0.5133928656578064, "reward_std": 0.23330700397491455, "rewards/simpleverify_reward/mean": 0.5133928656578064, "rewards/simpleverify_reward/std": 0.500099778175354, "step": 592 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.025669642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 4032.0, "completions/mean_length": 663.2600708007812, "completions/mean_terminated_length": 572.8213500976562, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 0.055346528300345914, "grad_norm": 0.10831553488969803, "learning_rate": 2e-07, "loss": 0.0249, "num_tokens": 378794027.0, "reward": 0.5502232313156128, "reward_std": 0.18347519636154175, "rewards/simpleverify_reward/mean": 0.5502232313156128, "rewards/simpleverify_reward/std": 0.49774909019470215, "step": 593 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3061.0, "completions/mean_length": 603.546875, "completions/mean_terminated_length": 536.0022583007812, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 0.0554398614003465, "grad_norm": 0.12552040815353394, "learning_rate": 2e-07, "loss": 0.0189, "num_tokens": 379419309.0, "reward": 0.6104910969734192, "reward_std": 0.21722419559955597, "rewards/simpleverify_reward/mean": 0.6104910969734192, "rewards/simpleverify_reward/std": 0.48791128396987915, "step": 594 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 4032.0, "completions/mean_length": 664.7589721679688, "completions/mean_terminated_length": 586.4200439453125, "completions/min_length": 120.0, "completions/min_terminated_length": 120.0, "epoch": 0.055533194500347084, "grad_norm": 0.11068172007799149, "learning_rate": 2e-07, "loss": 0.0245, "num_tokens": 380106317.0, "reward": 0.5613839626312256, "reward_std": 0.2070065289735794, "rewards/simpleverify_reward/mean": 0.5613839030265808, "rewards/simpleverify_reward/std": 0.496494859457016, "step": 595 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.029017857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3994.0, "completions/mean_length": 680.9453125, "completions/mean_terminated_length": 578.88623046875, "completions/min_length": 119.0, "completions/min_terminated_length": 119.0, "epoch": 0.05562652760034766, "grad_norm": 0.12762179970741272, "learning_rate": 2e-07, "loss": 0.0354, "num_tokens": 380809524.0, "reward": 0.5770089626312256, "reward_std": 0.22308118641376495, "rewards/simpleverify_reward/mean": 0.5770089030265808, "rewards/simpleverify_reward/std": 0.4943099617958069, "step": 596 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3997.0, "completions/mean_length": 636.9085083007812, "completions/mean_terminated_length": 582.0022583007812, "completions/min_length": 83.0, "completions/min_terminated_length": 83.0, "epoch": 0.05571986070034825, "grad_norm": 0.11421423405408859, "learning_rate": 2e-07, "loss": 0.0158, "num_tokens": 381466074.0, "reward": 0.546875, "reward_std": 0.18201276659965515, "rewards/simpleverify_reward/mean": 0.546875, "rewards/simpleverify_reward/std": 0.4980759024620056, "step": 597 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2885.0, "completions/mean_length": 618.8671875, "completions/mean_terminated_length": 543.535888671875, "completions/min_length": 90.0, "completions/min_terminated_length": 90.0, "epoch": 0.05581319380034883, "grad_norm": 0.11620396375656128, "learning_rate": 2e-07, "loss": 0.069, "num_tokens": 382120203.0, "reward": 0.5491071939468384, "reward_std": 0.1970057189464569, "rewards/simpleverify_reward/mean": 0.5491071343421936, "rewards/simpleverify_reward/std": 0.49786055088043213, "step": 598 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 4050.0, "completions/mean_length": 610.6964721679688, "completions/mean_terminated_length": 575.3325805664062, "completions/min_length": 111.0, "completions/min_terminated_length": 111.0, "epoch": 0.05590652690034942, "grad_norm": 0.12711577117443085, "learning_rate": 2e-07, "loss": 0.0244, "num_tokens": 382752195.0, "reward": 0.5792410969734192, "reward_std": 0.22796499729156494, "rewards/simpleverify_reward/mean": 0.5792410969734192, "rewards/simpleverify_reward/std": 0.49395665526390076, "step": 599 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2255.0, "completions/mean_length": 619.7142944335938, "completions/mean_terminated_length": 544.4013671875, "completions/min_length": 157.0, "completions/min_terminated_length": 157.0, "epoch": 0.055999860000349996, "grad_norm": 0.12158510833978653, "learning_rate": 2e-07, "loss": 0.0337, "num_tokens": 383399675.0, "reward": 0.5803571939468384, "reward_std": 0.2072986662387848, "rewards/simpleverify_reward/mean": 0.5803571343421936, "rewards/simpleverify_reward/std": 0.4937761127948761, "step": 600 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2837.0, "completions/mean_length": 626.958740234375, "completions/mean_terminated_length": 583.8406982421875, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 0.05609319310035058, "grad_norm": 0.12238435447216034, "learning_rate": 2e-07, "loss": 0.0079, "num_tokens": 384049086.0, "reward": 0.5033482313156128, "reward_std": 0.18355439603328705, "rewards/simpleverify_reward/mean": 0.5033482313156128, "rewards/simpleverify_reward/std": 0.5002680420875549, "step": 601 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3224.0, "completions/mean_length": 598.1953125, "completions/mean_terminated_length": 542.6746215820312, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 0.05618652620035117, "grad_norm": 0.11682475358247757, "learning_rate": 2e-07, "loss": 0.028, "num_tokens": 384679261.0, "reward": 0.574776828289032, "reward_std": 0.172803595662117, "rewards/simpleverify_reward/mean": 0.5747767686843872, "rewards/simpleverify_reward/std": 0.49465295672416687, "step": 602 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2463.0, "completions/mean_length": 572.2377319335938, "completions/mean_terminated_length": 540.4921264648438, "completions/min_length": 106.0, "completions/min_terminated_length": 106.0, "epoch": 0.05627985930035175, "grad_norm": 0.11567160487174988, "learning_rate": 2e-07, "loss": 0.0207, "num_tokens": 385276930.0, "reward": 0.5792410969734192, "reward_std": 0.18419378995895386, "rewards/simpleverify_reward/mean": 0.5792410969734192, "rewards/simpleverify_reward/std": 0.49395665526390076, "step": 603 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2384.0, "completions/mean_length": 623.6875, "completions/mean_terminated_length": 552.5011596679688, "completions/min_length": 137.0, "completions/min_terminated_length": 137.0, "epoch": 0.05637319240035233, "grad_norm": 0.11994556337594986, "learning_rate": 2e-07, "loss": 0.0417, "num_tokens": 385928330.0, "reward": 0.5814732313156128, "reward_std": 0.18524616956710815, "rewards/simpleverify_reward/mean": 0.5814732313156128, "rewards/simpleverify_reward/std": 0.4935929775238037, "step": 604 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0345982142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3413.0, "completions/mean_length": 702.7299194335938, "completions/mean_terminated_length": 581.1213989257812, "completions/min_length": 96.0, "completions/min_terminated_length": 96.0, "epoch": 0.056466525500352915, "grad_norm": 0.11889339983463287, "learning_rate": 2e-07, "loss": 0.0278, "num_tokens": 386649152.0, "reward": 0.4921875298023224, "reward_std": 0.2296210527420044, "rewards/simpleverify_reward/mean": 0.4921875, "rewards/simpleverify_reward/std": 0.5002182126045227, "step": 605 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3716.0, "completions/mean_length": 637.4296875, "completions/mean_terminated_length": 566.5250854492188, "completions/min_length": 118.0, "completions/min_terminated_length": 118.0, "epoch": 0.0565598586003535, "grad_norm": 0.10627752542495728, "learning_rate": 2e-07, "loss": 0.014, "num_tokens": 387314337.0, "reward": 0.5558035969734192, "reward_std": 0.1745292842388153, "rewards/simpleverify_reward/mean": 0.5558035969734192, "rewards/simpleverify_reward/std": 0.49715369939804077, "step": 606 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3009.0, "completions/mean_length": 587.810302734375, "completions/mean_terminated_length": 556.2049560546875, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 0.056653191700354086, "grad_norm": 0.12059495598077774, "learning_rate": 2e-07, "loss": 0.0273, "num_tokens": 387924375.0, "reward": 0.5558035969734192, "reward_std": 0.20136500895023346, "rewards/simpleverify_reward/mean": 0.5558035969734192, "rewards/simpleverify_reward/std": 0.49715372920036316, "step": 607 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2742.0, "completions/mean_length": 656.7377319335938, "completions/mean_terminated_length": 586.2289428710938, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 0.056746524800354664, "grad_norm": 0.12503787875175476, "learning_rate": 2e-07, "loss": 0.0217, "num_tokens": 388617260.0, "reward": 0.5803571939468384, "reward_std": 0.22048857808113098, "rewards/simpleverify_reward/mean": 0.5803571343421936, "rewards/simpleverify_reward/std": 0.4937761425971985, "step": 608 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3621.0, "completions/mean_length": 603.2957763671875, "completions/mean_terminated_length": 567.8568115234375, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 0.05683985790035525, "grad_norm": 0.116832435131073, "learning_rate": 2e-07, "loss": 0.0259, "num_tokens": 389251613.0, "reward": 0.5580357313156128, "reward_std": 0.21621206402778625, "rewards/simpleverify_reward/mean": 0.5580357313156128, "rewards/simpleverify_reward/std": 0.49689778685569763, "step": 609 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2308.0, "completions/mean_length": 640.9788208007812, "completions/mean_terminated_length": 570.14697265625, "completions/min_length": 106.0, "completions/min_terminated_length": 106.0, "epoch": 0.056933191000355834, "grad_norm": 0.12268880009651184, "learning_rate": 2e-07, "loss": 0.0342, "num_tokens": 389921154.0, "reward": 0.6049107313156128, "reward_std": 0.2210524082183838, "rewards/simpleverify_reward/mean": 0.6049107313156128, "rewards/simpleverify_reward/std": 0.48914292454719543, "step": 610 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2613.0, "completions/mean_length": 648.8995971679688, "completions/mean_terminated_length": 606.0542602539062, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 0.05702652410035641, "grad_norm": 0.11804910004138947, "learning_rate": 2e-07, "loss": 0.0116, "num_tokens": 390590872.0, "reward": 0.5792410969734192, "reward_std": 0.19325028359889984, "rewards/simpleverify_reward/mean": 0.5792410969734192, "rewards/simpleverify_reward/std": 0.49395665526390076, "step": 611 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3120.0, "completions/mean_length": 579.0725708007812, "completions/mean_terminated_length": 539.3781127929688, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 0.057119857200357, "grad_norm": 0.1251228004693985, "learning_rate": 2e-07, "loss": 0.0159, "num_tokens": 391197729.0, "reward": 0.6071428656578064, "reward_std": 0.22980578243732452, "rewards/simpleverify_reward/mean": 0.6071428656578064, "rewards/simpleverify_reward/std": 0.48865827918052673, "step": 612 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2842.0, "completions/mean_length": 625.2176513671875, "completions/mean_terminated_length": 590.0010986328125, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 0.05721319030035758, "grad_norm": 0.11870034784078598, "learning_rate": 2e-07, "loss": 0.013, "num_tokens": 391845476.0, "reward": 0.5524553656578064, "reward_std": 0.208427295088768, "rewards/simpleverify_reward/mean": 0.5524553656578064, "rewards/simpleverify_reward/std": 0.49751853942871094, "step": 613 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3989.0, "completions/mean_length": 680.7678833007812, "completions/mean_terminated_length": 618.6727294921875, "completions/min_length": 50.0, "completions/min_terminated_length": 50.0, "epoch": 0.05730652340035817, "grad_norm": 0.11525920778512955, "learning_rate": 2e-07, "loss": 0.0236, "num_tokens": 392546668.0, "reward": 0.5189732313156128, "reward_std": 0.23300346732139587, "rewards/simpleverify_reward/mean": 0.5189732313156128, "rewards/simpleverify_reward/std": 0.49991893768310547, "step": 614 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4059.0, "completions/mean_length": 626.828125, "completions/mean_terminated_length": 555.7061767578125, "completions/min_length": 115.0, "completions/min_terminated_length": 115.0, "epoch": 0.05739985650035875, "grad_norm": 0.11456350982189178, "learning_rate": 2e-07, "loss": 0.0221, "num_tokens": 393197234.0, "reward": 0.578125, "reward_std": 0.20278717577457428, "rewards/simpleverify_reward/mean": 0.578125, "rewards/simpleverify_reward/std": 0.4941346049308777, "step": 615 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3380.0, "completions/mean_length": 619.5904541015625, "completions/mean_terminated_length": 568.4088134765625, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 0.05749318960035933, "grad_norm": 0.12592877447605133, "learning_rate": 2e-07, "loss": 0.0472, "num_tokens": 393840755.0, "reward": 0.5491071939468384, "reward_std": 0.1782582849264145, "rewards/simpleverify_reward/mean": 0.5491071343421936, "rewards/simpleverify_reward/std": 0.49786055088043213, "step": 616 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3150.0, "completions/mean_length": 646.3605346679688, "completions/mean_terminated_length": 583.6397705078125, "completions/min_length": 161.0, "completions/min_terminated_length": 161.0, "epoch": 0.05758652270035992, "grad_norm": 0.12221542000770569, "learning_rate": 2e-07, "loss": 0.0281, "num_tokens": 394501150.0, "reward": 0.5725446939468384, "reward_std": 0.24186930060386658, "rewards/simpleverify_reward/mean": 0.5725446343421936, "rewards/simpleverify_reward/std": 0.49498558044433594, "step": 617 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 4078.0, "completions/mean_length": 581.234375, "completions/mean_terminated_length": 537.5480346679688, "completions/min_length": 117.0, "completions/min_terminated_length": 117.0, "epoch": 0.0576798558003605, "grad_norm": 0.12779873609542847, "learning_rate": 2e-07, "loss": 0.015, "num_tokens": 395118320.0, "reward": 0.6015625, "reward_std": 0.19558033347129822, "rewards/simpleverify_reward/mean": 0.6015625, "rewards/simpleverify_reward/std": 0.48984986543655396, "step": 618 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 4090.0, "completions/mean_length": 617.9732666015625, "completions/mean_terminated_length": 586.6396484375, "completions/min_length": 102.0, "completions/min_terminated_length": 102.0, "epoch": 0.05777318890036108, "grad_norm": 0.13142085075378418, "learning_rate": 2e-07, "loss": 0.0235, "num_tokens": 395761912.0, "reward": 0.5658482313156128, "reward_std": 0.2570872902870178, "rewards/simpleverify_reward/mean": 0.5658482313156128, "rewards/simpleverify_reward/std": 0.49592188000679016, "step": 619 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2940.0, "completions/mean_length": 632.3828125, "completions/mean_terminated_length": 557.3443603515625, "completions/min_length": 122.0, "completions/min_terminated_length": 122.0, "epoch": 0.057866522000361666, "grad_norm": 0.13259746134281158, "learning_rate": 2e-07, "loss": 0.0333, "num_tokens": 396429639.0, "reward": 0.5580357313156128, "reward_std": 0.24694563448429108, "rewards/simpleverify_reward/mean": 0.5580357313156128, "rewards/simpleverify_reward/std": 0.49689781665802, "step": 620 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3587.0, "completions/mean_length": 718.2098388671875, "completions/mean_terminated_length": 641.09130859375, "completions/min_length": 119.0, "completions/min_terminated_length": 119.0, "epoch": 0.05795985510036225, "grad_norm": 0.10511068999767303, "learning_rate": 2e-07, "loss": 0.0192, "num_tokens": 397173995.0, "reward": 0.4955357313156128, "reward_std": 0.17762142419815063, "rewards/simpleverify_reward/mean": 0.4955357015132904, "rewards/simpleverify_reward/std": 0.500259280204773, "step": 621 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3380.0, "completions/mean_length": 606.6082763671875, "completions/mean_terminated_length": 559.240966796875, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 0.05805318820036283, "grad_norm": 0.12619547545909882, "learning_rate": 2e-07, "loss": 0.0192, "num_tokens": 397810908.0, "reward": 0.5636160969734192, "reward_std": 0.2177102267742157, "rewards/simpleverify_reward/mean": 0.5636160969734192, "rewards/simpleverify_reward/std": 0.49621346592903137, "step": 622 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 4074.0, "completions/mean_length": 612.4721069335938, "completions/mean_terminated_length": 561.1857299804688, "completions/min_length": 68.0, "completions/min_terminated_length": 68.0, "epoch": 0.058146521300363414, "grad_norm": 0.10668329894542694, "learning_rate": 2e-07, "loss": 0.0134, "num_tokens": 398450827.0, "reward": 0.5345982313156128, "reward_std": 0.16532759368419647, "rewards/simpleverify_reward/mean": 0.5345982313156128, "rewards/simpleverify_reward/std": 0.4990801215171814, "step": 623 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 4080.0, "completions/mean_length": 611.5346069335938, "completions/mean_terminated_length": 560.2344360351562, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 0.058239854400364, "grad_norm": 0.12342949956655502, "learning_rate": 2e-07, "loss": 0.0509, "num_tokens": 399091594.0, "reward": 0.582589328289032, "reward_std": 0.20403386652469635, "rewards/simpleverify_reward/mean": 0.5825892686843872, "rewards/simpleverify_reward/std": 0.493407279253006, "step": 624 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3856.0, "completions/mean_length": 635.591552734375, "completions/mean_terminated_length": 592.580810546875, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 0.058333187500364585, "grad_norm": 0.12455419450998306, "learning_rate": 2e-07, "loss": 0.0262, "num_tokens": 399744332.0, "reward": 0.546875, "reward_std": 0.2070058435201645, "rewards/simpleverify_reward/mean": 0.546875, "rewards/simpleverify_reward/std": 0.4980759024620056, "step": 625 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3213.0, "completions/mean_length": 595.4308471679688, "completions/mean_terminated_length": 567.8673095703125, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 0.05842652060036516, "grad_norm": 0.12367159873247147, "learning_rate": 2e-07, "loss": 0.0468, "num_tokens": 400372734.0, "reward": 0.5625, "reward_std": 0.2076042741537094, "rewards/simpleverify_reward/mean": 0.5625, "rewards/simpleverify_reward/std": 0.49635544419288635, "step": 626 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2812.0, "completions/mean_length": 624.7489013671875, "completions/mean_terminated_length": 569.649658203125, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 0.05851985370036575, "grad_norm": 0.12630349397659302, "learning_rate": 2e-07, "loss": 0.0471, "num_tokens": 401021989.0, "reward": 0.598214328289032, "reward_std": 0.20857886970043182, "rewards/simpleverify_reward/mean": 0.5982142686843872, "rewards/simpleverify_reward/std": 0.49053290486335754, "step": 627 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.030133928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3667.0, "completions/mean_length": 697.4955444335938, "completions/mean_terminated_length": 591.9033203125, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 0.05861318680036633, "grad_norm": 0.12085574120283127, "learning_rate": 2e-07, "loss": 0.0221, "num_tokens": 401730657.0, "reward": 0.6004464626312256, "reward_std": 0.22375448048114777, "rewards/simpleverify_reward/mean": 0.6004464030265808, "rewards/simpleverify_reward/std": 0.49008017778396606, "step": 628 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3240.0, "completions/mean_length": 638.75, "completions/mean_terminated_length": 571.88623046875, "completions/min_length": 113.0, "completions/min_terminated_length": 113.0, "epoch": 0.05870651990036692, "grad_norm": 0.11959777772426605, "learning_rate": 2e-07, "loss": 0.0347, "num_tokens": 402399265.0, "reward": 0.5401785969734192, "reward_std": 0.22785300016403198, "rewards/simpleverify_reward/mean": 0.5401785969734192, "rewards/simpleverify_reward/std": 0.49866142868995667, "step": 629 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3852.0, "completions/mean_length": 629.203125, "completions/mean_terminated_length": 570.1771240234375, "completions/min_length": 116.0, "completions/min_terminated_length": 116.0, "epoch": 0.0587998530003675, "grad_norm": 0.13663065433502197, "learning_rate": 2e-07, "loss": 0.0302, "num_tokens": 403054039.0, "reward": 0.515625, "reward_std": 0.2248835563659668, "rewards/simpleverify_reward/mean": 0.515625, "rewards/simpleverify_reward/std": 0.5000349283218384, "step": 630 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2333.0, "completions/mean_length": 614.3314819335938, "completions/mean_terminated_length": 538.9019165039062, "completions/min_length": 68.0, "completions/min_terminated_length": 68.0, "epoch": 0.05889318610036808, "grad_norm": 0.10506126284599304, "learning_rate": 2e-07, "loss": 0.0122, "num_tokens": 403689592.0, "reward": 0.606026828289032, "reward_std": 0.15822389721870422, "rewards/simpleverify_reward/mean": 0.6060267686843872, "rewards/simpleverify_reward/std": 0.48890194296836853, "step": 631 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3210.0, "completions/mean_length": 573.2131958007812, "completions/mean_terminated_length": 505.0819091796875, "completions/min_length": 133.0, "completions/min_terminated_length": 133.0, "epoch": 0.05898651920036867, "grad_norm": 0.1309705525636673, "learning_rate": 2e-07, "loss": 0.0358, "num_tokens": 404279791.0, "reward": 0.6383928656578064, "reward_std": 0.21613538265228271, "rewards/simpleverify_reward/mean": 0.6383928656578064, "rewards/simpleverify_reward/std": 0.4807341992855072, "step": 632 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3857.0, "completions/mean_length": 610.372802734375, "completions/mean_terminated_length": 563.0565795898438, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 0.05907985230036925, "grad_norm": 0.11533993482589722, "learning_rate": 2e-07, "loss": 0.023, "num_tokens": 404906701.0, "reward": 0.5502232313156128, "reward_std": 0.19855231046676636, "rewards/simpleverify_reward/mean": 0.5502232313156128, "rewards/simpleverify_reward/std": 0.49774909019470215, "step": 633 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2871.0, "completions/mean_length": 621.9342041015625, "completions/mean_terminated_length": 570.7870483398438, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 0.05917318540036983, "grad_norm": 0.1234879121184349, "learning_rate": 2e-07, "loss": 0.0294, "num_tokens": 405545338.0, "reward": 0.5714285969734192, "reward_std": 0.19843651354312897, "rewards/simpleverify_reward/mean": 0.5714285969734192, "rewards/simpleverify_reward/std": 0.49514803290367126, "step": 634 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3393.0, "completions/mean_length": 615.8158569335938, "completions/mean_terminated_length": 548.508544921875, "completions/min_length": 120.0, "completions/min_terminated_length": 120.0, "epoch": 0.059266518500370416, "grad_norm": 0.11166516691446304, "learning_rate": 2e-07, "loss": 0.0291, "num_tokens": 406184301.0, "reward": 0.5736607313156128, "reward_std": 0.17197804152965546, "rewards/simpleverify_reward/mean": 0.5736607313156128, "rewards/simpleverify_reward/std": 0.4948205351829529, "step": 635 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.025669642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3682.0, "completions/mean_length": 667.5078125, "completions/mean_terminated_length": 577.1810302734375, "completions/min_length": 137.0, "completions/min_terminated_length": 137.0, "epoch": 0.059359851600371, "grad_norm": 0.12915107607841492, "learning_rate": 2e-07, "loss": 0.0459, "num_tokens": 406883380.0, "reward": 0.4899553656578064, "reward_std": 0.23762626945972443, "rewards/simpleverify_reward/mean": 0.4899553656578064, "rewards/simpleverify_reward/std": 0.5001782774925232, "step": 636 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 4091.0, "completions/mean_length": 660.950927734375, "completions/mean_terminated_length": 606.4263305664062, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 0.05945318470037158, "grad_norm": 0.11969389766454697, "learning_rate": 2e-07, "loss": 0.0281, "num_tokens": 407566848.0, "reward": 0.5212053656578064, "reward_std": 0.2386365830898285, "rewards/simpleverify_reward/mean": 0.5212053656578064, "rewards/simpleverify_reward/std": 0.49982914328575134, "step": 637 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3332.0, "completions/mean_length": 653.2879638671875, "completions/mean_terminated_length": 586.705322265625, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 0.059546517800372165, "grad_norm": 0.11193221807479858, "learning_rate": 2e-07, "loss": 0.0414, "num_tokens": 408230066.0, "reward": 0.5189732313156128, "reward_std": 0.19648300111293793, "rewards/simpleverify_reward/mean": 0.5189732313156128, "rewards/simpleverify_reward/std": 0.49991893768310547, "step": 638 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3786.0, "completions/mean_length": 646.7533569335938, "completions/mean_terminated_length": 580.0443725585938, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 0.05963985090037275, "grad_norm": 0.11117948591709137, "learning_rate": 2e-07, "loss": 0.0206, "num_tokens": 408902853.0, "reward": 0.5479910969734192, "reward_std": 0.18434211611747742, "rewards/simpleverify_reward/mean": 0.5479910969734192, "rewards/simpleverify_reward/std": 0.49796950817108154, "step": 639 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3319.0, "completions/mean_length": 605.646240234375, "completions/mean_terminated_length": 558.265869140625, "completions/min_length": 74.0, "completions/min_terminated_length": 74.0, "epoch": 0.059733184000373335, "grad_norm": 0.12911492586135864, "learning_rate": 2e-07, "loss": 0.0057, "num_tokens": 409542576.0, "reward": 0.4933035969734192, "reward_std": 0.20305706560611725, "rewards/simpleverify_reward/mean": 0.4933035671710968, "rewards/simpleverify_reward/std": 0.5002344250679016, "step": 640 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.005580357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3218.0, "completions/mean_length": 577.7366333007812, "completions/mean_terminated_length": 557.9932861328125, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 0.05982651710037391, "grad_norm": 0.1332138627767563, "learning_rate": 2e-07, "loss": 0.0197, "num_tokens": 410147532.0, "reward": 0.5892857313156128, "reward_std": 0.22698718309402466, "rewards/simpleverify_reward/mean": 0.5892857313156128, "rewards/simpleverify_reward/std": 0.49223825335502625, "step": 641 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3279.0, "completions/mean_length": 667.0870971679688, "completions/mean_terminated_length": 604.7431640625, "completions/min_length": 94.0, "completions/min_terminated_length": 94.0, "epoch": 0.0599198502003745, "grad_norm": 0.11862540990114212, "learning_rate": 2e-07, "loss": 0.0306, "num_tokens": 410842498.0, "reward": 0.4810267984867096, "reward_std": 0.21736615896224976, "rewards/simpleverify_reward/mean": 0.4810267984867096, "rewards/simpleverify_reward/std": 0.49991896748542786, "step": 642 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 4090.0, "completions/mean_length": 601.4185791015625, "completions/mean_terminated_length": 525.709228515625, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 0.060013183300375084, "grad_norm": 0.12723922729492188, "learning_rate": 2e-07, "loss": 0.0562, "num_tokens": 411458713.0, "reward": 0.6160714626312256, "reward_std": 0.19520379602909088, "rewards/simpleverify_reward/mean": 0.6160714030265808, "rewards/simpleverify_reward/std": 0.486612468957901, "step": 643 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3966.0, "completions/mean_length": 607.8348388671875, "completions/mean_terminated_length": 552.4671020507812, "completions/min_length": 104.0, "completions/min_terminated_length": 104.0, "epoch": 0.06010651640037567, "grad_norm": 0.12422408163547516, "learning_rate": 2e-07, "loss": 0.0337, "num_tokens": 412098821.0, "reward": 0.5881696939468384, "reward_std": 0.21372047066688538, "rewards/simpleverify_reward/mean": 0.5881696343421936, "rewards/simpleverify_reward/std": 0.4924395978450775, "step": 644 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2980.0, "completions/mean_length": 596.9598388671875, "completions/mean_terminated_length": 545.445068359375, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 0.06019984950037625, "grad_norm": 0.11948264390230179, "learning_rate": 2e-07, "loss": 0.0133, "num_tokens": 412718185.0, "reward": 0.590401828289032, "reward_std": 0.1686372607946396, "rewards/simpleverify_reward/mean": 0.5904017686843872, "rewards/simpleverify_reward/std": 0.49203425645828247, "step": 645 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 2822.0, "completions/mean_length": 571.8114013671875, "completions/mean_terminated_length": 532.0349731445312, "completions/min_length": 159.0, "completions/min_terminated_length": 159.0, "epoch": 0.06029318260037683, "grad_norm": 0.12014669924974442, "learning_rate": 2e-07, "loss": 0.0384, "num_tokens": 413318056.0, "reward": 0.613839328289032, "reward_std": 0.19035455584526062, "rewards/simpleverify_reward/mean": 0.6138392686843872, "rewards/simpleverify_reward/std": 0.48714008927345276, "step": 646 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.006696428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3208.0, "completions/mean_length": 555.818115234375, "completions/mean_terminated_length": 531.9517211914062, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 0.06038651570037742, "grad_norm": 0.134592667222023, "learning_rate": 2e-07, "loss": 0.023, "num_tokens": 413908941.0, "reward": 0.6127232313156128, "reward_std": 0.19276313483715057, "rewards/simpleverify_reward/mean": 0.6127232313156128, "rewards/simpleverify_reward/std": 0.4873998463153839, "step": 647 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3585.0, "completions/mean_length": 581.9308471679688, "completions/mean_terminated_length": 530.1947631835938, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 0.060479848800377996, "grad_norm": 0.13202019035816193, "learning_rate": 2e-07, "loss": 0.0135, "num_tokens": 414519087.0, "reward": 0.5502232313156128, "reward_std": 0.20569172501564026, "rewards/simpleverify_reward/mean": 0.5502232313156128, "rewards/simpleverify_reward/std": 0.49774909019470215, "step": 648 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 2826.0, "completions/mean_length": 633.3928833007812, "completions/mean_terminated_length": 594.3115234375, "completions/min_length": 160.0, "completions/min_terminated_length": 160.0, "epoch": 0.06057318190037858, "grad_norm": 0.11396337300539017, "learning_rate": 2e-07, "loss": 0.0074, "num_tokens": 415169127.0, "reward": 0.5479910969734192, "reward_std": 0.18231655657291412, "rewards/simpleverify_reward/mean": 0.5479910969734192, "rewards/simpleverify_reward/std": 0.49796950817108154, "step": 649 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2784.0, "completions/mean_length": 571.2824096679688, "completions/mean_terminated_length": 539.5281372070312, "completions/min_length": 97.0, "completions/min_terminated_length": 97.0, "epoch": 0.060666515000379166, "grad_norm": 0.11599686741828918, "learning_rate": 2e-07, "loss": 0.0106, "num_tokens": 415774540.0, "reward": 0.5915178656578064, "reward_std": 0.17209568619728088, "rewards/simpleverify_reward/mean": 0.5915178656578064, "rewards/simpleverify_reward/std": 0.49182769656181335, "step": 650 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2229.0, "completions/mean_length": 608.0301513671875, "completions/mean_terminated_length": 564.6768188476562, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 0.06075984810037975, "grad_norm": 0.12559126317501068, "learning_rate": 2e-07, "loss": 0.0586, "num_tokens": 416413543.0, "reward": 0.5881696939468384, "reward_std": 0.21361027657985687, "rewards/simpleverify_reward/mean": 0.5881696343421936, "rewards/simpleverify_reward/std": 0.4924395978450775, "step": 651 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2769.0, "completions/mean_length": 644.4498291015625, "completions/mean_terminated_length": 577.6962280273438, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 0.06085318120038033, "grad_norm": 0.12779025733470917, "learning_rate": 2e-07, "loss": 0.0206, "num_tokens": 417083098.0, "reward": 0.5613839626312256, "reward_std": 0.22567589581012726, "rewards/simpleverify_reward/mean": 0.5613839030265808, "rewards/simpleverify_reward/std": 0.496494859457016, "step": 652 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3067.0, "completions/mean_length": 588.328125, "completions/mean_terminated_length": 544.72998046875, "completions/min_length": 94.0, "completions/min_terminated_length": 94.0, "epoch": 0.060946514300380915, "grad_norm": 0.12277545034885406, "learning_rate": 2e-07, "loss": 0.0255, "num_tokens": 417695312.0, "reward": 0.598214328289032, "reward_std": 0.18283650279045105, "rewards/simpleverify_reward/mean": 0.5982142686843872, "rewards/simpleverify_reward/std": 0.49053290486335754, "step": 653 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2284.0, "completions/mean_length": 588.4676513671875, "completions/mean_terminated_length": 556.8682861328125, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 0.0610398474003815, "grad_norm": 0.10658971220254898, "learning_rate": 2e-07, "loss": 0.0317, "num_tokens": 418314123.0, "reward": 0.6205357313156128, "reward_std": 0.16548165678977966, "rewards/simpleverify_reward/mean": 0.6205357313156128, "rewards/simpleverify_reward/std": 0.4855247139930725, "step": 654 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0267857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3433.0, "completions/mean_length": 674.1171875, "completions/mean_terminated_length": 579.9368896484375, "completions/min_length": 5.0, "completions/min_terminated_length": 5.0, "epoch": 0.061133180500382085, "grad_norm": 0.12936429679393768, "learning_rate": 2e-07, "loss": 0.0357, "num_tokens": 419009436.0, "reward": 0.546875, "reward_std": 0.215499609708786, "rewards/simpleverify_reward/mean": 0.546875, "rewards/simpleverify_reward/std": 0.4980759024620056, "step": 655 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.005580357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3895.0, "completions/mean_length": 569.7020263671875, "completions/mean_terminated_length": 549.9136352539062, "completions/min_length": 105.0, "completions/min_terminated_length": 105.0, "epoch": 0.061226513600382663, "grad_norm": 0.1243654191493988, "learning_rate": 2e-07, "loss": 0.0155, "num_tokens": 419613585.0, "reward": 0.6305803656578064, "reward_std": 0.16758599877357483, "rewards/simpleverify_reward/mean": 0.6305803656578064, "rewards/simpleverify_reward/std": 0.4829172194004059, "step": 656 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2887.0, "completions/mean_length": 625.3147583007812, "completions/mean_terminated_length": 582.17626953125, "completions/min_length": 106.0, "completions/min_terminated_length": 106.0, "epoch": 0.06131984670038325, "grad_norm": 0.10461804270744324, "learning_rate": 2e-07, "loss": 0.0092, "num_tokens": 420259611.0, "reward": 0.5558035969734192, "reward_std": 0.16859561204910278, "rewards/simpleverify_reward/mean": 0.5558035969734192, "rewards/simpleverify_reward/std": 0.49715369939804077, "step": 657 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3901.0, "completions/mean_length": 620.958740234375, "completions/mean_terminated_length": 577.76611328125, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 0.061413179800383834, "grad_norm": 0.12151706963777542, "learning_rate": 2e-07, "loss": 0.0153, "num_tokens": 420907574.0, "reward": 0.559151828289032, "reward_std": 0.20756927132606506, "rewards/simpleverify_reward/mean": 0.5591517686843872, "rewards/simpleverify_reward/std": 0.496766060590744, "step": 658 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3937.0, "completions/mean_length": 560.7667846679688, "completions/mean_terminated_length": 528.9177856445312, "completions/min_length": 96.0, "completions/min_terminated_length": 96.0, "epoch": 0.06150651290038442, "grad_norm": 0.13278622925281525, "learning_rate": 2e-07, "loss": 0.0145, "num_tokens": 421497741.0, "reward": 0.5959821939468384, "reward_std": 0.22462141513824463, "rewards/simpleverify_reward/mean": 0.5959821343421936, "rewards/simpleverify_reward/std": 0.490975022315979, "step": 659 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3637.0, "completions/mean_length": 569.6395263671875, "completions/mean_terminated_length": 533.8590698242188, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 0.061599846000385, "grad_norm": 0.1306065171957016, "learning_rate": 2e-07, "loss": 0.0288, "num_tokens": 422092522.0, "reward": 0.5770089626312256, "reward_std": 0.20737744867801666, "rewards/simpleverify_reward/mean": 0.5770089030265808, "rewards/simpleverify_reward/std": 0.4943099319934845, "step": 660 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2779.0, "completions/mean_length": 614.6920166015625, "completions/mean_terminated_length": 583.328857421875, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 0.06169317910038558, "grad_norm": 0.12391114979982376, "learning_rate": 2e-07, "loss": 0.0147, "num_tokens": 422733518.0, "reward": 0.5680803656578064, "reward_std": 0.19723255932331085, "rewards/simpleverify_reward/mean": 0.5680803656578064, "rewards/simpleverify_reward/std": 0.4956200420856476, "step": 661 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 4044.0, "completions/mean_length": 621.4163208007812, "completions/mean_terminated_length": 558.2420043945312, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 0.06178651220038617, "grad_norm": 0.117815300822258, "learning_rate": 2e-07, "loss": 0.0165, "num_tokens": 423383675.0, "reward": 0.5267857313156128, "reward_std": 0.20410944521427155, "rewards/simpleverify_reward/mean": 0.5267857313156128, "rewards/simpleverify_reward/std": 0.4995608627796173, "step": 662 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 4082.0, "completions/mean_length": 582.841552734375, "completions/mean_terminated_length": 547.1950073242188, "completions/min_length": 107.0, "completions/min_terminated_length": 107.0, "epoch": 0.061879845300386746, "grad_norm": 0.1208818331360817, "learning_rate": 2e-07, "loss": 0.0226, "num_tokens": 423999997.0, "reward": 0.5714285969734192, "reward_std": 0.2119959145784378, "rewards/simpleverify_reward/mean": 0.5714285969734192, "rewards/simpleverify_reward/std": 0.49514803290367126, "step": 663 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.005580357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3196.0, "completions/mean_length": 564.1886596679688, "completions/mean_terminated_length": 544.3692626953125, "completions/min_length": 167.0, "completions/min_terminated_length": 167.0, "epoch": 0.06197317840038733, "grad_norm": 0.13433204591274261, "learning_rate": 2e-07, "loss": 0.0448, "num_tokens": 424608318.0, "reward": 0.5714285969734192, "reward_std": 0.22950129210948944, "rewards/simpleverify_reward/mean": 0.5714285969734192, "rewards/simpleverify_reward/std": 0.49514803290367126, "step": 664 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.005580357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3184.0, "completions/mean_length": 557.0658569335938, "completions/mean_terminated_length": 537.20654296875, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 0.062066511500387916, "grad_norm": 0.1357741355895996, "learning_rate": 2e-07, "loss": 0.0261, "num_tokens": 425194817.0, "reward": 0.6517857313156128, "reward_std": 0.22138871252536774, "rewards/simpleverify_reward/mean": 0.6517857313156128, "rewards/simpleverify_reward/std": 0.47667041420936584, "step": 665 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3458.0, "completions/mean_length": 583.6652221679688, "completions/mean_terminated_length": 548.0270385742188, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 0.0621598446003885, "grad_norm": 0.12754805386066437, "learning_rate": 2e-07, "loss": 0.0144, "num_tokens": 425807141.0, "reward": 0.53125, "reward_std": 0.19035570323467255, "rewards/simpleverify_reward/mean": 0.53125, "rewards/simpleverify_reward/std": 0.4993011951446533, "step": 666 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3453.0, "completions/mean_length": 598.3828125, "completions/mean_terminated_length": 550.9038696289062, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 0.06225317770038908, "grad_norm": 0.1142469048500061, "learning_rate": 2e-07, "loss": 0.0319, "num_tokens": 426423348.0, "reward": 0.6160714626312256, "reward_std": 0.18558205664157867, "rewards/simpleverify_reward/mean": 0.6160714030265808, "rewards/simpleverify_reward/std": 0.486612468957901, "step": 667 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3205.0, "completions/mean_length": 631.0535888671875, "completions/mean_terminated_length": 568.0545043945312, "completions/min_length": 158.0, "completions/min_terminated_length": 158.0, "epoch": 0.062346510800389665, "grad_norm": 0.12126720696687698, "learning_rate": 2e-07, "loss": 0.032, "num_tokens": 427074316.0, "reward": 0.5792410969734192, "reward_std": 0.19877490401268005, "rewards/simpleverify_reward/mean": 0.5792410969734192, "rewards/simpleverify_reward/std": 0.49395665526390076, "step": 668 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3379.0, "completions/mean_length": 715.7053833007812, "completions/mean_terminated_length": 606.66357421875, "completions/min_length": 101.0, "completions/min_terminated_length": 101.0, "epoch": 0.06243984390039025, "grad_norm": 0.11129731684923172, "learning_rate": 2e-07, "loss": 0.0223, "num_tokens": 427813348.0, "reward": 0.4921875298023224, "reward_std": 0.19351130723953247, "rewards/simpleverify_reward/mean": 0.4921875, "rewards/simpleverify_reward/std": 0.5002182126045227, "step": 669 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2203.0, "completions/mean_length": 588.5949096679688, "completions/mean_terminated_length": 540.9830322265625, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 0.06253317700039084, "grad_norm": 0.12069542706012726, "learning_rate": 2e-07, "loss": 0.0138, "num_tokens": 428419145.0, "reward": 0.59375, "reward_std": 0.1823158711194992, "rewards/simpleverify_reward/mean": 0.59375, "rewards/simpleverify_reward/std": 0.4914066195487976, "step": 670 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.004464285714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3823.0, "completions/mean_length": 577.3515625, "completions/mean_terminated_length": 561.5728759765625, "completions/min_length": 133.0, "completions/min_terminated_length": 133.0, "epoch": 0.06262651010039141, "grad_norm": 0.13155397772789001, "learning_rate": 2e-07, "loss": 0.0277, "num_tokens": 429021860.0, "reward": 0.5959821939468384, "reward_std": 0.22406010329723358, "rewards/simpleverify_reward/mean": 0.5959821343421936, "rewards/simpleverify_reward/std": 0.490975022315979, "step": 671 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2827.0, "completions/mean_length": 592.0748291015625, "completions/mean_terminated_length": 556.52197265625, "completions/min_length": 91.0, "completions/min_terminated_length": 91.0, "epoch": 0.062719843200392, "grad_norm": 0.12586405873298645, "learning_rate": 2e-07, "loss": 0.0127, "num_tokens": 429655535.0, "reward": 0.5424107313156128, "reward_std": 0.19234946370124817, "rewards/simpleverify_reward/mean": 0.5424107313156128, "rewards/simpleverify_reward/std": 0.4984763264656067, "step": 672 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 1968.0, "completions/mean_length": 615.6986694335938, "completions/mean_terminated_length": 560.455810546875, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 0.06281317630039258, "grad_norm": 0.1193930059671402, "learning_rate": 2e-07, "loss": 0.0251, "num_tokens": 430302569.0, "reward": 0.5758928656578064, "reward_std": 0.1848006695508957, "rewards/simpleverify_reward/mean": 0.5758928656578064, "rewards/simpleverify_reward/std": 0.49448275566101074, "step": 673 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.006696428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3043.0, "completions/mean_length": 575.453125, "completions/mean_terminated_length": 551.7191162109375, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 0.06290650940039316, "grad_norm": 0.14473803341388702, "learning_rate": 2e-07, "loss": 0.0175, "num_tokens": 430904399.0, "reward": 0.6037946939468384, "reward_std": 0.19460716843605042, "rewards/simpleverify_reward/mean": 0.6037946343421936, "rewards/simpleverify_reward/std": 0.48938122391700745, "step": 674 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2176.0, "completions/mean_length": 650.7913208007812, "completions/mean_terminated_length": 600.069091796875, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 0.06299984250039375, "grad_norm": 0.12235885858535767, "learning_rate": 2e-07, "loss": 0.0383, "num_tokens": 431572588.0, "reward": 0.535714328289032, "reward_std": 0.2298835963010788, "rewards/simpleverify_reward/mean": 0.5357142686843872, "rewards/simpleverify_reward/std": 0.4990014135837555, "step": 675 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2420.0, "completions/mean_length": 597.794677734375, "completions/mean_terminated_length": 530.1387939453125, "completions/min_length": 71.0, "completions/min_terminated_length": 71.0, "epoch": 0.06309317560039433, "grad_norm": 0.11891889572143555, "learning_rate": 2e-07, "loss": 0.027, "num_tokens": 432190884.0, "reward": 0.5770089626312256, "reward_std": 0.1936621367931366, "rewards/simpleverify_reward/mean": 0.5770089030265808, "rewards/simpleverify_reward/std": 0.4943099319934845, "step": 676 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3510.0, "completions/mean_length": 669.6785888671875, "completions/mean_terminated_length": 619.2344360351562, "completions/min_length": 117.0, "completions/min_terminated_length": 117.0, "epoch": 0.06318650870039491, "grad_norm": 0.11397001892328262, "learning_rate": 2e-07, "loss": 0.0191, "num_tokens": 432888284.0, "reward": 0.5245535969734192, "reward_std": 0.21169282495975494, "rewards/simpleverify_reward/mean": 0.5245535969734192, "rewards/simpleverify_reward/std": 0.4996756613254547, "step": 677 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2353.0, "completions/mean_length": 642.810302734375, "completions/mean_terminated_length": 567.9976806640625, "completions/min_length": 159.0, "completions/min_terminated_length": 159.0, "epoch": 0.0632798418003955, "grad_norm": 0.10880357027053833, "learning_rate": 2e-07, "loss": 0.0383, "num_tokens": 433560018.0, "reward": 0.5848214626312256, "reward_std": 0.17739249765872955, "rewards/simpleverify_reward/mean": 0.5848214030265808, "rewards/simpleverify_reward/std": 0.49302801489830017, "step": 678 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 4068.0, "completions/mean_length": 600.5748291015625, "completions/mean_terminated_length": 537.0215454101562, "completions/min_length": 121.0, "completions/min_terminated_length": 121.0, "epoch": 0.06337317490039608, "grad_norm": 0.11945977807044983, "learning_rate": 2e-07, "loss": 0.0434, "num_tokens": 434188685.0, "reward": 0.6462053656578064, "reward_std": 0.22026145458221436, "rewards/simpleverify_reward/mean": 0.6462053656578064, "rewards/simpleverify_reward/std": 0.478413462638855, "step": 679 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3000.0, "completions/mean_length": 613.6272583007812, "completions/mean_terminated_length": 538.1824340820312, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 0.06346650800039666, "grad_norm": 0.12863190472126007, "learning_rate": 2e-07, "loss": 0.0211, "num_tokens": 434829311.0, "reward": 0.59375, "reward_std": 0.21180906891822815, "rewards/simpleverify_reward/mean": 0.59375, "rewards/simpleverify_reward/std": 0.4914066195487976, "step": 680 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3351.0, "completions/mean_length": 635.3125, "completions/mean_terminated_length": 580.3809814453125, "completions/min_length": 120.0, "completions/min_terminated_length": 120.0, "epoch": 0.06355984110039725, "grad_norm": 0.11714020371437073, "learning_rate": 2e-07, "loss": -0.0034, "num_tokens": 435486351.0, "reward": 0.5491071939468384, "reward_std": 0.17394223809242249, "rewards/simpleverify_reward/mean": 0.5491071343421936, "rewards/simpleverify_reward/std": 0.49786055088043213, "step": 681 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3550.0, "completions/mean_length": 653.8092041015625, "completions/mean_terminated_length": 591.2238159179688, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 0.06365317420039783, "grad_norm": 0.13012750446796417, "learning_rate": 2e-07, "loss": 0.028, "num_tokens": 436168828.0, "reward": 0.5491071939468384, "reward_std": 0.21759578585624695, "rewards/simpleverify_reward/mean": 0.5491071343421936, "rewards/simpleverify_reward/std": 0.49786055088043213, "step": 682 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 4048.0, "completions/mean_length": 659.5234375, "completions/mean_terminated_length": 616.8101806640625, "completions/min_length": 133.0, "completions/min_terminated_length": 133.0, "epoch": 0.06374650730039842, "grad_norm": 0.11480817943811417, "learning_rate": 2e-07, "loss": 0.0336, "num_tokens": 436858617.0, "reward": 0.5345982313156128, "reward_std": 0.1929139792919159, "rewards/simpleverify_reward/mean": 0.5345982313156128, "rewards/simpleverify_reward/std": 0.4990801215171814, "step": 683 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3746.0, "completions/mean_length": 635.3850708007812, "completions/mean_terminated_length": 564.4385375976562, "completions/min_length": 116.0, "completions/min_terminated_length": 116.0, "epoch": 0.063839840400399, "grad_norm": 0.12080996483564377, "learning_rate": 2e-07, "loss": 0.0319, "num_tokens": 437521450.0, "reward": 0.5613839626312256, "reward_std": 0.1944560408592224, "rewards/simpleverify_reward/mean": 0.5613839030265808, "rewards/simpleverify_reward/std": 0.496494859457016, "step": 684 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4076.0, "completions/mean_length": 660.138427734375, "completions/mean_terminated_length": 589.6993408203125, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 0.06393317350039958, "grad_norm": 0.11061633378267288, "learning_rate": 2e-07, "loss": 0.0194, "num_tokens": 438196782.0, "reward": 0.5524553656578064, "reward_std": 0.19366355240345, "rewards/simpleverify_reward/mean": 0.5524553656578064, "rewards/simpleverify_reward/std": 0.49751853942871094, "step": 685 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 2593.0, "completions/mean_length": 548.7332763671875, "completions/mean_terminated_length": 520.8020629882812, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 0.06402650660040017, "grad_norm": 0.12615151703357697, "learning_rate": 2e-07, "loss": 0.0226, "num_tokens": 438779055.0, "reward": 0.6350446939468384, "reward_std": 0.17945334315299988, "rewards/simpleverify_reward/mean": 0.6350446343421936, "rewards/simpleverify_reward/std": 0.481686532497406, "step": 686 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3084.0, "completions/mean_length": 588.3783569335938, "completions/mean_terminated_length": 552.7880249023438, "completions/min_length": 110.0, "completions/min_terminated_length": 110.0, "epoch": 0.06411983970040075, "grad_norm": 0.12183422595262527, "learning_rate": 2e-07, "loss": 0.0095, "num_tokens": 439398554.0, "reward": 0.652901828289032, "reward_std": 0.19846788048744202, "rewards/simpleverify_reward/mean": 0.6529017686843872, "rewards/simpleverify_reward/std": 0.47631320357322693, "step": 687 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3969.0, "completions/mean_length": 595.046875, "completions/mean_terminated_length": 563.5067749023438, "completions/min_length": 121.0, "completions/min_terminated_length": 121.0, "epoch": 0.06421317280040133, "grad_norm": 0.12366434186697006, "learning_rate": 2e-07, "loss": 0.0231, "num_tokens": 440030404.0, "reward": 0.5636160969734192, "reward_std": 0.1764160841703415, "rewards/simpleverify_reward/mean": 0.5636160969734192, "rewards/simpleverify_reward/std": 0.49621346592903137, "step": 688 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2870.0, "completions/mean_length": 625.896240234375, "completions/mean_terminated_length": 566.8139038085938, "completions/min_length": 167.0, "completions/min_terminated_length": 167.0, "epoch": 0.06430650590040192, "grad_norm": 0.12179087847471237, "learning_rate": 2e-07, "loss": 0.0366, "num_tokens": 440678039.0, "reward": 0.598214328289032, "reward_std": 0.18626604974269867, "rewards/simpleverify_reward/mean": 0.5982142686843872, "rewards/simpleverify_reward/std": 0.49053290486335754, "step": 689 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 1935.0, "completions/mean_length": 588.0703125, "completions/mean_terminated_length": 548.4774169921875, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 0.0643998390004025, "grad_norm": 0.12135112285614014, "learning_rate": 2e-07, "loss": 0.0402, "num_tokens": 441285334.0, "reward": 0.6015625, "reward_std": 0.18772706389427185, "rewards/simpleverify_reward/mean": 0.6015625, "rewards/simpleverify_reward/std": 0.48984986543655396, "step": 690 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3054.0, "completions/mean_length": 620.0279541015625, "completions/mean_terminated_length": 568.852783203125, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 0.06449317210040308, "grad_norm": 0.11767952144145966, "learning_rate": 2e-07, "loss": 0.0449, "num_tokens": 441923679.0, "reward": 0.6004464626312256, "reward_std": 0.2062540203332901, "rewards/simpleverify_reward/mean": 0.6004464030265808, "rewards/simpleverify_reward/std": 0.49008017778396606, "step": 691 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2034.0, "completions/mean_length": 612.6864013671875, "completions/mean_terminated_length": 553.379150390625, "completions/min_length": 133.0, "completions/min_terminated_length": 133.0, "epoch": 0.06458650520040367, "grad_norm": 0.11245155334472656, "learning_rate": 2e-07, "loss": 0.013, "num_tokens": 442569942.0, "reward": 0.5569196939468384, "reward_std": 0.17006942629814148, "rewards/simpleverify_reward/mean": 0.5569196343421936, "rewards/simpleverify_reward/std": 0.49702703952789307, "step": 692 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3379.0, "completions/mean_length": 661.9810791015625, "completions/mean_terminated_length": 603.5130615234375, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 0.06467983830040425, "grad_norm": 0.11741670221090317, "learning_rate": 2e-07, "loss": 0.0209, "num_tokens": 443254581.0, "reward": 0.5546875, "reward_std": 0.20411649346351624, "rewards/simpleverify_reward/mean": 0.5546875, "rewards/simpleverify_reward/std": 0.4972778558731079, "step": 693 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2925.0, "completions/mean_length": 657.3660888671875, "completions/mean_terminated_length": 610.6878051757812, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 0.06477317140040484, "grad_norm": 0.1119515672326088, "learning_rate": 2e-07, "loss": 0.0269, "num_tokens": 443928445.0, "reward": 0.5647321939468384, "reward_std": 0.1948240101337433, "rewards/simpleverify_reward/mean": 0.5647321343421936, "rewards/simpleverify_reward/std": 0.49606895446777344, "step": 694 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2788.0, "completions/mean_length": 663.185302734375, "completions/mean_terminated_length": 592.8086547851562, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 0.06486650450040542, "grad_norm": 0.11413503438234329, "learning_rate": 2e-07, "loss": 0.0301, "num_tokens": 444617563.0, "reward": 0.5558035969734192, "reward_std": 0.19294790923595428, "rewards/simpleverify_reward/mean": 0.5558035969734192, "rewards/simpleverify_reward/std": 0.49715369939804077, "step": 695 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2896.0, "completions/mean_length": 576.8292846679688, "completions/mean_terminated_length": 545.125, "completions/min_length": 4.0, "completions/min_terminated_length": 4.0, "epoch": 0.064959837600406, "grad_norm": 0.12816612422466278, "learning_rate": 2e-07, "loss": 0.0394, "num_tokens": 445219330.0, "reward": 0.5401785969734192, "reward_std": 0.1998996138572693, "rewards/simpleverify_reward/mean": 0.5401785969734192, "rewards/simpleverify_reward/std": 0.49866142868995667, "step": 696 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2905.0, "completions/mean_length": 612.3203125, "completions/mean_terminated_length": 557.0238037109375, "completions/min_length": 91.0, "completions/min_terminated_length": 91.0, "epoch": 0.06505317070040659, "grad_norm": 0.13292458653450012, "learning_rate": 2e-07, "loss": 0.0186, "num_tokens": 445861625.0, "reward": 0.5714285969734192, "reward_std": 0.21658548712730408, "rewards/simpleverify_reward/mean": 0.5714285969734192, "rewards/simpleverify_reward/std": 0.49514803290367126, "step": 697 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3633.0, "completions/mean_length": 611.0234375, "completions/mean_terminated_length": 575.6629028320312, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 0.06514650380040717, "grad_norm": 0.1270810216665268, "learning_rate": 2e-07, "loss": 0.0435, "num_tokens": 446497190.0, "reward": 0.543526828289032, "reward_std": 0.21462947130203247, "rewards/simpleverify_reward/mean": 0.5435267686843872, "rewards/simpleverify_reward/std": 0.49838000535964966, "step": 698 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3710.0, "completions/mean_length": 572.2053833007812, "completions/mean_terminated_length": 524.37109375, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 0.06523983690040774, "grad_norm": 0.12845967710018158, "learning_rate": 2e-07, "loss": 0.0415, "num_tokens": 447096614.0, "reward": 0.6082589626312256, "reward_std": 0.20820863544940948, "rewards/simpleverify_reward/mean": 0.6082589030265808, "rewards/simpleverify_reward/std": 0.4884119927883148, "step": 699 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 4073.0, "completions/mean_length": 572.997802734375, "completions/mean_terminated_length": 541.259033203125, "completions/min_length": 118.0, "completions/min_terminated_length": 118.0, "epoch": 0.06533317000040834, "grad_norm": 0.11883914470672607, "learning_rate": 2e-07, "loss": 0.0373, "num_tokens": 447693764.0, "reward": 0.6238839626312256, "reward_std": 0.17622952163219452, "rewards/simpleverify_reward/mean": 0.6238839030265808, "rewards/simpleverify_reward/std": 0.4846802353858948, "step": 700 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3264.0, "completions/mean_length": 677.8504638671875, "completions/mean_terminated_length": 615.7022705078125, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 0.06542650310040891, "grad_norm": 0.11376847326755524, "learning_rate": 2e-07, "loss": 0.0349, "num_tokens": 448384982.0, "reward": 0.515625, "reward_std": 0.19851204752922058, "rewards/simpleverify_reward/mean": 0.515625, "rewards/simpleverify_reward/std": 0.5000349283218384, "step": 701 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2837.0, "completions/mean_length": 642.0658569335938, "completions/mean_terminated_length": 579.2670288085938, "completions/min_length": 88.0, "completions/min_terminated_length": 88.0, "epoch": 0.06551983620040949, "grad_norm": 0.11275488883256912, "learning_rate": 2e-07, "loss": 0.0182, "num_tokens": 449062753.0, "reward": 0.5959821939468384, "reward_std": 0.16165022552013397, "rewards/simpleverify_reward/mean": 0.5959821343421936, "rewards/simpleverify_reward/std": 0.490975022315979, "step": 702 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 2977.0, "completions/mean_length": 629.65625, "completions/mean_terminated_length": 590.5327758789062, "completions/min_length": 101.0, "completions/min_terminated_length": 101.0, "epoch": 0.06561316930041008, "grad_norm": 0.1295749694108963, "learning_rate": 2e-07, "loss": 0.0332, "num_tokens": 449709277.0, "reward": 0.5379464626312256, "reward_std": 0.22770215570926666, "rewards/simpleverify_reward/mean": 0.5379464030265808, "rewards/simpleverify_reward/std": 0.4988364577293396, "step": 703 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2593.0, "completions/mean_length": 600.654052734375, "completions/mean_terminated_length": 533.053466796875, "completions/min_length": 120.0, "completions/min_terminated_length": 120.0, "epoch": 0.06570650240041066, "grad_norm": 0.14281505346298218, "learning_rate": 2e-07, "loss": 0.0183, "num_tokens": 450334375.0, "reward": 0.582589328289032, "reward_std": 0.2473154067993164, "rewards/simpleverify_reward/mean": 0.5825892686843872, "rewards/simpleverify_reward/std": 0.493407279253006, "step": 704 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3086.0, "completions/mean_length": 577.65625, "completions/mean_terminated_length": 525.8572998046875, "completions/min_length": 108.0, "completions/min_terminated_length": 108.0, "epoch": 0.06579983550041126, "grad_norm": 0.12469789385795593, "learning_rate": 2e-07, "loss": 0.0288, "num_tokens": 450954251.0, "reward": 0.5926339626312256, "reward_std": 0.19865332543849945, "rewards/simpleverify_reward/mean": 0.5926339030265808, "rewards/simpleverify_reward/std": 0.49161845445632935, "step": 705 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0267857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3859.0, "completions/mean_length": 687.6339721679688, "completions/mean_terminated_length": 593.82568359375, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 0.06589316860041183, "grad_norm": 0.11242169141769409, "learning_rate": 2e-07, "loss": 0.0516, "num_tokens": 451675195.0, "reward": 0.5290178656578064, "reward_std": 0.18783655762672424, "rewards/simpleverify_reward/mean": 0.5290178656578064, "rewards/simpleverify_reward/std": 0.49943605065345764, "step": 706 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3793.0, "completions/mean_length": 653.3560791015625, "completions/mean_terminated_length": 590.762451171875, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 0.06598650170041241, "grad_norm": 0.1234223023056984, "learning_rate": 2e-07, "loss": 0.0517, "num_tokens": 452345338.0, "reward": 0.5792410969734192, "reward_std": 0.2166578322649002, "rewards/simpleverify_reward/mean": 0.5792410969734192, "rewards/simpleverify_reward/std": 0.49395665526390076, "step": 707 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3829.0, "completions/mean_length": 581.6439819335938, "completions/mean_terminated_length": 545.9852905273438, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 0.066079834800413, "grad_norm": 0.12356533855199814, "learning_rate": 2e-07, "loss": 0.0397, "num_tokens": 452953995.0, "reward": 0.6004464626312256, "reward_std": 0.21365447342395782, "rewards/simpleverify_reward/mean": 0.6004464030265808, "rewards/simpleverify_reward/std": 0.49008017778396606, "step": 708 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 4028.0, "completions/mean_length": 621.6875, "completions/mean_terminated_length": 578.5039672851562, "completions/min_length": 93.0, "completions/min_terminated_length": 93.0, "epoch": 0.06617316790041358, "grad_norm": 0.1184450089931488, "learning_rate": 2e-07, "loss": 0.0333, "num_tokens": 453596403.0, "reward": 0.5959821939468384, "reward_std": 0.2111261636018753, "rewards/simpleverify_reward/mean": 0.5959821343421936, "rewards/simpleverify_reward/std": 0.490975022315979, "step": 709 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3802.0, "completions/mean_length": 605.3538208007812, "completions/mean_terminated_length": 565.9559936523438, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 0.06626650100041416, "grad_norm": 0.1206669881939888, "learning_rate": 2e-07, "loss": 0.0265, "num_tokens": 454229312.0, "reward": 0.6037946939468384, "reward_std": 0.1817513257265091, "rewards/simpleverify_reward/mean": 0.6037946343421936, "rewards/simpleverify_reward/std": 0.48938122391700745, "step": 710 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 2873.0, "completions/mean_length": 586.8404541015625, "completions/mean_terminated_length": 559.209228515625, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 0.06635983410041475, "grad_norm": 0.1307794600725174, "learning_rate": 2e-07, "loss": 0.0283, "num_tokens": 454845641.0, "reward": 0.5535714626312256, "reward_std": 0.21365447342395782, "rewards/simpleverify_reward/mean": 0.5535714030265808, "rewards/simpleverify_reward/std": 0.4973994791507721, "step": 711 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3757.0, "completions/mean_length": 668.8225708007812, "completions/mean_terminated_length": 614.4229125976562, "completions/min_length": 159.0, "completions/min_terminated_length": 159.0, "epoch": 0.06645316720041533, "grad_norm": 0.10852993279695511, "learning_rate": 2e-07, "loss": 0.0382, "num_tokens": 455535162.0, "reward": 0.5524553656578064, "reward_std": 0.1851348727941513, "rewards/simpleverify_reward/mean": 0.5524553656578064, "rewards/simpleverify_reward/std": 0.49751853942871094, "step": 712 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2844.0, "completions/mean_length": 627.5100708007812, "completions/mean_terminated_length": 576.445068359375, "completions/min_length": 106.0, "completions/min_terminated_length": 106.0, "epoch": 0.06654650030041591, "grad_norm": 0.1233273521065712, "learning_rate": 2e-07, "loss": 0.0229, "num_tokens": 456185531.0, "reward": 0.582589328289032, "reward_std": 0.19869358837604523, "rewards/simpleverify_reward/mean": 0.5825892686843872, "rewards/simpleverify_reward/std": 0.493407279253006, "step": 713 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3163.0, "completions/mean_length": 664.1439819335938, "completions/mean_terminated_length": 589.7935791015625, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 0.0666398334004165, "grad_norm": 0.1234380453824997, "learning_rate": 2e-07, "loss": 0.043, "num_tokens": 456876140.0, "reward": 0.5803571939468384, "reward_std": 0.22090154886245728, "rewards/simpleverify_reward/mean": 0.5803571343421936, "rewards/simpleverify_reward/std": 0.4937761425971985, "step": 714 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3628.0, "completions/mean_length": 643.8225708007812, "completions/mean_terminated_length": 565.0056762695312, "completions/min_length": 162.0, "completions/min_terminated_length": 162.0, "epoch": 0.06673316650041708, "grad_norm": 0.12518207728862762, "learning_rate": 2e-07, "loss": 0.0224, "num_tokens": 457533501.0, "reward": 0.5647321939468384, "reward_std": 0.18329408764839172, "rewards/simpleverify_reward/mean": 0.5647321343421936, "rewards/simpleverify_reward/std": 0.49606892466545105, "step": 715 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3896.0, "completions/mean_length": 590.4029541015625, "completions/mean_terminated_length": 538.7916259765625, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 0.06682649960041767, "grad_norm": 0.10997849702835083, "learning_rate": 2e-07, "loss": 0.0327, "num_tokens": 458156206.0, "reward": 0.613839328289032, "reward_std": 0.15879617631435394, "rewards/simpleverify_reward/mean": 0.6138392686843872, "rewards/simpleverify_reward/std": 0.48714008927345276, "step": 716 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 1804.0, "completions/mean_length": 611.4285888671875, "completions/mean_terminated_length": 572.099365234375, "completions/min_length": 162.0, "completions/min_terminated_length": 162.0, "epoch": 0.06691983270041825, "grad_norm": 0.1273539811372757, "learning_rate": 2e-07, "loss": 0.0402, "num_tokens": 458799526.0, "reward": 0.5334821939468384, "reward_std": 0.24784743785858154, "rewards/simpleverify_reward/mean": 0.5334821343421936, "rewards/simpleverify_reward/std": 0.49915632605552673, "step": 717 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2496.0, "completions/mean_length": 656.6517944335938, "completions/mean_terminated_length": 602.0589599609375, "completions/min_length": 112.0, "completions/min_terminated_length": 112.0, "epoch": 0.06701316580041883, "grad_norm": 0.12911580502986908, "learning_rate": 2e-07, "loss": 0.0452, "num_tokens": 459492478.0, "reward": 0.5167410969734192, "reward_std": 0.21925225853919983, "rewards/simpleverify_reward/mean": 0.5167410969734192, "rewards/simpleverify_reward/std": 0.4999987483024597, "step": 718 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.024553571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 4001.0, "completions/mean_length": 656.9598388671875, "completions/mean_terminated_length": 570.3935546875, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 0.06710649890041942, "grad_norm": 0.11454705893993378, "learning_rate": 2e-07, "loss": 0.0264, "num_tokens": 460173610.0, "reward": 0.5892857313156128, "reward_std": 0.1958431750535965, "rewards/simpleverify_reward/mean": 0.5892857313156128, "rewards/simpleverify_reward/std": 0.49223822355270386, "step": 719 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3982.0, "completions/mean_length": 628.622802734375, "completions/mean_terminated_length": 581.5543212890625, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 0.06719983200042, "grad_norm": 0.11215090751647949, "learning_rate": 2e-07, "loss": 0.0261, "num_tokens": 460816520.0, "reward": 0.6350446939468384, "reward_std": 0.17810788750648499, "rewards/simpleverify_reward/mean": 0.6350446343421936, "rewards/simpleverify_reward/std": 0.4816865026950836, "step": 720 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2586.0, "completions/mean_length": 639.8348388671875, "completions/mean_terminated_length": 580.9898071289062, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 0.06729316510042058, "grad_norm": 0.11230301856994629, "learning_rate": 2e-07, "loss": 0.0108, "num_tokens": 461473340.0, "reward": 0.590401828289032, "reward_std": 0.19527865946292877, "rewards/simpleverify_reward/mean": 0.5904017686843872, "rewards/simpleverify_reward/std": 0.49203425645828247, "step": 721 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0267857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3117.0, "completions/mean_length": 681.5870971679688, "completions/mean_terminated_length": 587.6123657226562, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 0.06738649820042117, "grad_norm": 0.12191807478666306, "learning_rate": 2e-07, "loss": 0.0479, "num_tokens": 462173738.0, "reward": 0.5390625, "reward_std": 0.2348807156085968, "rewards/simpleverify_reward/mean": 0.5390625, "rewards/simpleverify_reward/std": 0.4987502098083496, "step": 722 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2349.0, "completions/mean_length": 596.7288208007812, "completions/mean_terminated_length": 545.2106323242188, "completions/min_length": 167.0, "completions/min_terminated_length": 167.0, "epoch": 0.06747983130042175, "grad_norm": 0.12010949105024338, "learning_rate": 2e-07, "loss": 0.0207, "num_tokens": 462804743.0, "reward": 0.5758928656578064, "reward_std": 0.18689404428005219, "rewards/simpleverify_reward/mean": 0.5758928656578064, "rewards/simpleverify_reward/std": 0.49448272585868835, "step": 723 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3361.0, "completions/mean_length": 557.7589721679688, "completions/mean_terminated_length": 497.5164794921875, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 0.06757316440042234, "grad_norm": 0.13549436628818512, "learning_rate": 2e-07, "loss": 0.0332, "num_tokens": 463403687.0, "reward": 0.5691964626312256, "reward_std": 0.199411079287529, "rewards/simpleverify_reward/mean": 0.5691964030265808, "rewards/simpleverify_reward/std": 0.4954652488231659, "step": 724 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3475.0, "completions/mean_length": 662.6942138671875, "completions/mean_terminated_length": 600.2704467773438, "completions/min_length": 137.0, "completions/min_terminated_length": 137.0, "epoch": 0.06766649750042292, "grad_norm": 0.12309268116950989, "learning_rate": 2e-07, "loss": 0.0411, "num_tokens": 464086365.0, "reward": 0.5479910969734192, "reward_std": 0.22533707320690155, "rewards/simpleverify_reward/mean": 0.5479910969734192, "rewards/simpleverify_reward/std": 0.49796950817108154, "step": 725 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2670.0, "completions/mean_length": 610.9029541015625, "completions/mean_terminated_length": 567.5853271484375, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 0.0677598306004235, "grad_norm": 0.11727333813905716, "learning_rate": 2e-07, "loss": 0.0432, "num_tokens": 464717934.0, "reward": 0.598214328289032, "reward_std": 0.18844275176525116, "rewards/simpleverify_reward/mean": 0.5982142686843872, "rewards/simpleverify_reward/std": 0.49053287506103516, "step": 726 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2572.0, "completions/mean_length": 578.1283569335938, "completions/mean_terminated_length": 534.4033813476562, "completions/min_length": 123.0, "completions/min_terminated_length": 123.0, "epoch": 0.06785316370042409, "grad_norm": 0.1350460946559906, "learning_rate": 2e-07, "loss": 0.0045, "num_tokens": 465318113.0, "reward": 0.6026785969734192, "reward_std": 0.2039571851491928, "rewards/simpleverify_reward/mean": 0.6026785969734192, "rewards/simpleverify_reward/std": 0.48961687088012695, "step": 727 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0279017857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4092.0, "completions/mean_length": 697.1428833007812, "completions/mean_terminated_length": 599.586669921875, "completions/min_length": 112.0, "completions/min_terminated_length": 112.0, "epoch": 0.06794649680042467, "grad_norm": 0.11941482871770859, "learning_rate": 2e-07, "loss": 0.0503, "num_tokens": 466036505.0, "reward": 0.5145089626312256, "reward_std": 0.1820901334285736, "rewards/simpleverify_reward/mean": 0.5145089030265808, "rewards/simpleverify_reward/std": 0.5000685453414917, "step": 728 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3255.0, "completions/mean_length": 655.9017944335938, "completions/mean_terminated_length": 613.1434936523438, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 0.06803982990042524, "grad_norm": 0.11253414303064346, "learning_rate": 2e-07, "loss": 0.0179, "num_tokens": 466708849.0, "reward": 0.535714328289032, "reward_std": 0.16980911791324615, "rewards/simpleverify_reward/mean": 0.5357142686843872, "rewards/simpleverify_reward/std": 0.4990014135837555, "step": 729 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3400.0, "completions/mean_length": 626.2265625, "completions/mean_terminated_length": 587.0643310546875, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 0.06813316300042584, "grad_norm": 0.1164517030119896, "learning_rate": 2e-07, "loss": 0.0266, "num_tokens": 467362420.0, "reward": 0.5558035969734192, "reward_std": 0.20523597300052643, "rewards/simpleverify_reward/mean": 0.5558035969734192, "rewards/simpleverify_reward/std": 0.49715372920036316, "step": 730 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3574.0, "completions/mean_length": 626.1506958007812, "completions/mean_terminated_length": 563.0625, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 0.06822649610042641, "grad_norm": 0.11438339948654175, "learning_rate": 2e-07, "loss": 0.0079, "num_tokens": 468006811.0, "reward": 0.5881696939468384, "reward_std": 0.18521183729171753, "rewards/simpleverify_reward/mean": 0.5881696343421936, "rewards/simpleverify_reward/std": 0.4924396276473999, "step": 731 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2793.0, "completions/mean_length": 567.536865234375, "completions/mean_terminated_length": 515.5888671875, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "epoch": 0.06831982920042699, "grad_norm": 0.13392749428749084, "learning_rate": 2e-07, "loss": 0.0266, "num_tokens": 468597236.0, "reward": 0.660714328289032, "reward_std": 0.20395858585834503, "rewards/simpleverify_reward/mean": 0.6607142686843872, "rewards/simpleverify_reward/std": 0.4737313687801361, "step": 732 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2455.0, "completions/mean_length": 644.2433471679688, "completions/mean_terminated_length": 577.4857788085938, "completions/min_length": 166.0, "completions/min_terminated_length": 166.0, "epoch": 0.06841316230042759, "grad_norm": 0.11775097250938416, "learning_rate": 2e-07, "loss": 0.0313, "num_tokens": 469264646.0, "reward": 0.5223214626312256, "reward_std": 0.20351210236549377, "rewards/simpleverify_reward/mean": 0.5223214030265808, "rewards/simpleverify_reward/std": 0.49978047609329224, "step": 733 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 2775.0, "completions/mean_length": 603.1027221679688, "completions/mean_terminated_length": 563.679443359375, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 0.06850649540042816, "grad_norm": 0.12252161651849747, "learning_rate": 2e-07, "loss": 0.0397, "num_tokens": 469889746.0, "reward": 0.5892857313156128, "reward_std": 0.2117016762495041, "rewards/simpleverify_reward/mean": 0.5892857313156128, "rewards/simpleverify_reward/std": 0.49223825335502625, "step": 734 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3784.0, "completions/mean_length": 620.1674194335938, "completions/mean_terminated_length": 576.9649658203125, "completions/min_length": 157.0, "completions/min_terminated_length": 157.0, "epoch": 0.06859982850042876, "grad_norm": 0.11007355898618698, "learning_rate": 2e-07, "loss": 0.0246, "num_tokens": 470531376.0, "reward": 0.5870535969734192, "reward_std": 0.18148988485336304, "rewards/simpleverify_reward/mean": 0.5870535969734192, "rewards/simpleverify_reward/std": 0.49263834953308105, "step": 735 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 4041.0, "completions/mean_length": 588.2935791015625, "completions/mean_terminated_length": 540.6776123046875, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 0.06869316160042933, "grad_norm": 0.12331582605838776, "learning_rate": 2e-07, "loss": 0.0182, "num_tokens": 471149823.0, "reward": 0.5691964626312256, "reward_std": 0.19017164409160614, "rewards/simpleverify_reward/mean": 0.5691964030265808, "rewards/simpleverify_reward/std": 0.4954652488231659, "step": 736 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2331.0, "completions/mean_length": 635.8314819335938, "completions/mean_terminated_length": 588.8609008789062, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 0.06878649470042991, "grad_norm": 0.11511775851249695, "learning_rate": 2e-07, "loss": 0.0417, "num_tokens": 471812840.0, "reward": 0.543526828289032, "reward_std": 0.21470868587493896, "rewards/simpleverify_reward/mean": 0.5435267686843872, "rewards/simpleverify_reward/std": 0.49838000535964966, "step": 737 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3266.0, "completions/mean_length": 617.7064819335938, "completions/mean_terminated_length": 586.3704833984375, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 0.0688798278004305, "grad_norm": 0.1193184033036232, "learning_rate": 2e-07, "loss": 0.0137, "num_tokens": 472465825.0, "reward": 0.543526828289032, "reward_std": 0.21947866678237915, "rewards/simpleverify_reward/mean": 0.5435267686843872, "rewards/simpleverify_reward/std": 0.49838000535964966, "step": 738 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2759.0, "completions/mean_length": 606.09375, "completions/mean_terminated_length": 542.640869140625, "completions/min_length": 137.0, "completions/min_terminated_length": 137.0, "epoch": 0.06897316090043108, "grad_norm": 0.126976877450943, "learning_rate": 2e-07, "loss": 0.079, "num_tokens": 473096581.0, "reward": 0.6049107313156128, "reward_std": 0.2307828962802887, "rewards/simpleverify_reward/mean": 0.6049107313156128, "rewards/simpleverify_reward/std": 0.48914292454719543, "step": 739 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.025669642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3405.0, "completions/mean_length": 671.0201416015625, "completions/mean_terminated_length": 580.7858276367188, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 0.06906649400043166, "grad_norm": 0.11680159717798233, "learning_rate": 2e-07, "loss": 0.0264, "num_tokens": 473787199.0, "reward": 0.5446428656578064, "reward_std": 0.19399915635585785, "rewards/simpleverify_reward/mean": 0.5446428656578064, "rewards/simpleverify_reward/std": 0.4982811510562897, "step": 740 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3744.0, "completions/mean_length": 617.9342041015625, "completions/mean_terminated_length": 554.6965942382812, "completions/min_length": 93.0, "completions/min_terminated_length": 93.0, "epoch": 0.06915982710043225, "grad_norm": 0.11121483892202377, "learning_rate": 2e-07, "loss": 0.0481, "num_tokens": 474427708.0, "reward": 0.5770089626312256, "reward_std": 0.1739315390586853, "rewards/simpleverify_reward/mean": 0.5770089030265808, "rewards/simpleverify_reward/std": 0.4943099319934845, "step": 741 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 4054.0, "completions/mean_length": 625.5502319335938, "completions/mean_terminated_length": 570.4637451171875, "completions/min_length": 162.0, "completions/min_terminated_length": 162.0, "epoch": 0.06925316020043283, "grad_norm": 0.1255883276462555, "learning_rate": 2e-07, "loss": 0.0508, "num_tokens": 475076033.0, "reward": 0.5892857313156128, "reward_std": 0.20357881486415863, "rewards/simpleverify_reward/mean": 0.5892857313156128, "rewards/simpleverify_reward/std": 0.49223825335502625, "step": 742 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2484.0, "completions/mean_length": 602.3170166015625, "completions/mean_terminated_length": 558.8926391601562, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 0.06934649330043341, "grad_norm": 0.12415578961372375, "learning_rate": 2e-07, "loss": 0.0116, "num_tokens": 475701469.0, "reward": 0.53125, "reward_std": 0.21470683813095093, "rewards/simpleverify_reward/mean": 0.53125, "rewards/simpleverify_reward/std": 0.4993011951446533, "step": 743 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3240.0, "completions/mean_length": 604.4777221679688, "completions/mean_terminated_length": 549.0567016601562, "completions/min_length": 172.0, "completions/min_terminated_length": 172.0, "epoch": 0.069439826400434, "grad_norm": 0.11976417154073715, "learning_rate": 2e-07, "loss": 0.0276, "num_tokens": 476336905.0, "reward": 0.590401828289032, "reward_std": 0.1725439727306366, "rewards/simpleverify_reward/mean": 0.5904017686843872, "rewards/simpleverify_reward/std": 0.49203425645828247, "step": 744 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3873.0, "completions/mean_length": 576.138427734375, "completions/mean_terminated_length": 540.4238891601562, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 0.06953315950043458, "grad_norm": 0.11732464283704758, "learning_rate": 2e-07, "loss": 0.0187, "num_tokens": 476941677.0, "reward": 0.6227678656578064, "reward_std": 0.18006137013435364, "rewards/simpleverify_reward/mean": 0.6227678656578064, "rewards/simpleverify_reward/std": 0.4849644899368286, "step": 745 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 2822.0, "completions/mean_length": 604.1339721679688, "completions/mean_terminated_length": 564.7223510742188, "completions/min_length": 121.0, "completions/min_terminated_length": 121.0, "epoch": 0.06962649260043517, "grad_norm": 0.13018371164798737, "learning_rate": 2e-07, "loss": 0.0222, "num_tokens": 477577829.0, "reward": 0.5714285969734192, "reward_std": 0.20935770869255066, "rewards/simpleverify_reward/mean": 0.5714285969734192, "rewards/simpleverify_reward/std": 0.49514803290367126, "step": 746 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 2458.0, "completions/mean_length": 576.0201416015625, "completions/mean_terminated_length": 536.2911987304688, "completions/min_length": 105.0, "completions/min_terminated_length": 105.0, "epoch": 0.06971982570043575, "grad_norm": 0.12228605151176453, "learning_rate": 2e-07, "loss": 0.0285, "num_tokens": 478178519.0, "reward": 0.652901828289032, "reward_std": 0.17739064991474152, "rewards/simpleverify_reward/mean": 0.6529017686843872, "rewards/simpleverify_reward/std": 0.47631317377090454, "step": 747 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2377.0, "completions/mean_length": 643.3192138671875, "completions/mean_terminated_length": 568.5176391601562, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 0.06981315880043633, "grad_norm": 0.12339183688163757, "learning_rate": 2e-07, "loss": 0.0334, "num_tokens": 478851157.0, "reward": 0.5145089626312256, "reward_std": 0.20633253455162048, "rewards/simpleverify_reward/mean": 0.5145089030265808, "rewards/simpleverify_reward/std": 0.5000685453414917, "step": 748 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3582.0, "completions/mean_length": 598.8460083007812, "completions/mean_terminated_length": 571.3093872070312, "completions/min_length": 120.0, "completions/min_terminated_length": 120.0, "epoch": 0.06990649190043692, "grad_norm": 0.12025352567434311, "learning_rate": 2e-07, "loss": 0.0116, "num_tokens": 479476467.0, "reward": 0.551339328289032, "reward_std": 0.18986602127552032, "rewards/simpleverify_reward/mean": 0.5513392686843872, "rewards/simpleverify_reward/std": 0.4976350665092468, "step": 749 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3909.0, "completions/mean_length": 603.6082763671875, "completions/mean_terminated_length": 556.2002563476562, "completions/min_length": 158.0, "completions/min_terminated_length": 158.0, "epoch": 0.0699998250004375, "grad_norm": 0.13030563294887543, "learning_rate": 2e-07, "loss": 0.0298, "num_tokens": 480108980.0, "reward": 0.6383928656578064, "reward_std": 0.19163449108600616, "rewards/simpleverify_reward/mean": 0.6383928656578064, "rewards/simpleverify_reward/std": 0.4807341992855072, "step": 750 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3935.0, "completions/mean_length": 704.8080444335938, "completions/mean_terminated_length": 647.0692749023438, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 0.07009315810043808, "grad_norm": 0.11026362329721451, "learning_rate": 2e-07, "loss": 0.0462, "num_tokens": 480829744.0, "reward": 0.5323660969734192, "reward_std": 0.17739318311214447, "rewards/simpleverify_reward/mean": 0.5323660969734192, "rewards/simpleverify_reward/std": 0.4992299973964691, "step": 751 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3184.0, "completions/mean_length": 589.7879638671875, "completions/mean_terminated_length": 546.2079467773438, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 0.07018649120043867, "grad_norm": 0.11860298365354538, "learning_rate": 2e-07, "loss": 0.0321, "num_tokens": 481444882.0, "reward": 0.5703125, "reward_std": 0.19302275776863098, "rewards/simpleverify_reward/mean": 0.5703125, "rewards/simpleverify_reward/std": 0.49530795216560364, "step": 752 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2095.0, "completions/mean_length": 632.8314819335938, "completions/mean_terminated_length": 561.8325805664062, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 0.07027982430043925, "grad_norm": 0.12139637023210526, "learning_rate": 2e-07, "loss": 0.0185, "num_tokens": 482088795.0, "reward": 0.6071428656578064, "reward_std": 0.19745828211307526, "rewards/simpleverify_reward/mean": 0.6071428656578064, "rewards/simpleverify_reward/std": 0.48865827918052673, "step": 753 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3413.0, "completions/mean_length": 615.4699096679688, "completions/mean_terminated_length": 576.1862182617188, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 0.07037315740043983, "grad_norm": 0.11385700851678848, "learning_rate": 2e-07, "loss": 0.0246, "num_tokens": 482724656.0, "reward": 0.5948660969734192, "reward_std": 0.1737881749868393, "rewards/simpleverify_reward/mean": 0.5948660969734192, "rewards/simpleverify_reward/std": 0.49119213223457336, "step": 754 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3126.0, "completions/mean_length": 656.2421875, "completions/mean_terminated_length": 605.6002197265625, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 0.07046649050044042, "grad_norm": 0.12916527688503265, "learning_rate": 2e-07, "loss": 0.0295, "num_tokens": 483403129.0, "reward": 0.5089285969734192, "reward_std": 0.2532693147659302, "rewards/simpleverify_reward/mean": 0.5089285969734192, "rewards/simpleverify_reward/std": 0.5001994967460632, "step": 755 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2537.0, "completions/mean_length": 583.5324096679688, "completions/mean_terminated_length": 519.6693115234375, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 0.070559823600441, "grad_norm": 0.12980622053146362, "learning_rate": 2e-07, "loss": 0.0264, "num_tokens": 484023910.0, "reward": 0.578125, "reward_std": 0.2224399596452713, "rewards/simpleverify_reward/mean": 0.578125, "rewards/simpleverify_reward/std": 0.4941346049308777, "step": 756 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3338.0, "completions/mean_length": 598.8270263671875, "completions/mean_terminated_length": 563.3427124023438, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 0.07065315670044159, "grad_norm": 0.1261042207479477, "learning_rate": 2e-07, "loss": 0.0202, "num_tokens": 484648275.0, "reward": 0.6104910969734192, "reward_std": 0.19201244413852692, "rewards/simpleverify_reward/mean": 0.6104910969734192, "rewards/simpleverify_reward/std": 0.48791128396987915, "step": 757 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3640.0, "completions/mean_length": 609.052490234375, "completions/mean_terminated_length": 533.508544921875, "completions/min_length": 96.0, "completions/min_terminated_length": 96.0, "epoch": 0.07074648980044217, "grad_norm": 0.1303604245185852, "learning_rate": 2e-07, "loss": 0.0322, "num_tokens": 485278322.0, "reward": 0.5100446939468384, "reward_std": 0.21605415642261505, "rewards/simpleverify_reward/mean": 0.5100446343421936, "rewards/simpleverify_reward/std": 0.5001782774925232, "step": 758 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3820.0, "completions/mean_length": 627.9732666015625, "completions/mean_terminated_length": 596.729736328125, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 0.07083982290044274, "grad_norm": 0.11005319654941559, "learning_rate": 2e-07, "loss": 0.0316, "num_tokens": 485928226.0, "reward": 0.5915178656578064, "reward_std": 0.16642414033412933, "rewards/simpleverify_reward/mean": 0.5915178656578064, "rewards/simpleverify_reward/std": 0.49182769656181335, "step": 759 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3996.0, "completions/mean_length": 601.419677734375, "completions/mean_terminated_length": 549.9705200195312, "completions/min_length": 123.0, "completions/min_terminated_length": 123.0, "epoch": 0.07093315600044334, "grad_norm": 0.13268131017684937, "learning_rate": 2e-07, "loss": 0.0208, "num_tokens": 486560050.0, "reward": 0.5881696939468384, "reward_std": 0.2231578528881073, "rewards/simpleverify_reward/mean": 0.5881696343421936, "rewards/simpleverify_reward/std": 0.4924395978450775, "step": 760 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3638.0, "completions/mean_length": 602.9230346679688, "completions/mean_terminated_length": 567.480224609375, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 0.07102648910044392, "grad_norm": 0.1121458187699318, "learning_rate": 2e-07, "loss": 0.0268, "num_tokens": 487187469.0, "reward": 0.660714328289032, "reward_std": 0.1676262468099594, "rewards/simpleverify_reward/mean": 0.6607142686843872, "rewards/simpleverify_reward/std": 0.4737313687801361, "step": 761 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2336.0, "completions/mean_length": 644.7667846679688, "completions/mean_terminated_length": 597.9174194335938, "completions/min_length": 133.0, "completions/min_terminated_length": 133.0, "epoch": 0.0711198222004445, "grad_norm": 0.10650760680437088, "learning_rate": 2e-07, "loss": 0.0151, "num_tokens": 487853324.0, "reward": 0.4977678656578064, "reward_std": 0.18637119233608246, "rewards/simpleverify_reward/mean": 0.4977678656578064, "rewards/simpleverify_reward/std": 0.5002743005752563, "step": 762 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.006696428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 4043.0, "completions/mean_length": 627.2756958007812, "completions/mean_terminated_length": 603.8909912109375, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 0.07121315530044509, "grad_norm": 0.10886607319116592, "learning_rate": 2e-07, "loss": 0.0158, "num_tokens": 488498835.0, "reward": 0.5636160969734192, "reward_std": 0.19152529537677765, "rewards/simpleverify_reward/mean": 0.5636160969734192, "rewards/simpleverify_reward/std": 0.49621346592903137, "step": 763 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3851.0, "completions/mean_length": 657.833740234375, "completions/mean_terminated_length": 591.3389892578125, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 0.07130648840044566, "grad_norm": 0.12177766114473343, "learning_rate": 2e-07, "loss": 0.04, "num_tokens": 489172718.0, "reward": 0.527901828289032, "reward_std": 0.22811472415924072, "rewards/simpleverify_reward/mean": 0.5279017686843872, "rewards/simpleverify_reward/std": 0.49949970841407776, "step": 764 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 4017.0, "completions/mean_length": 629.4163208007812, "completions/mean_terminated_length": 578.37939453125, "completions/min_length": 160.0, "completions/min_terminated_length": 160.0, "epoch": 0.07139982150044624, "grad_norm": 0.1311195194721222, "learning_rate": 2e-07, "loss": 0.0465, "num_tokens": 489827323.0, "reward": 0.5457589626312256, "reward_std": 0.22894704341888428, "rewards/simpleverify_reward/mean": 0.5457589030265808, "rewards/simpleverify_reward/std": 0.4981797933578491, "step": 765 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3518.0, "completions/mean_length": 657.3136596679688, "completions/mean_terminated_length": 582.8152465820312, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 0.07149315460044683, "grad_norm": 0.11612444370985031, "learning_rate": 2e-07, "loss": 0.0307, "num_tokens": 490499948.0, "reward": 0.5491071939468384, "reward_std": 0.19828453660011292, "rewards/simpleverify_reward/mean": 0.5491071343421936, "rewards/simpleverify_reward/std": 0.49786055088043213, "step": 766 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3633.0, "completions/mean_length": 587.2377319335938, "completions/mean_terminated_length": 547.6354370117188, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 0.07158648770044741, "grad_norm": 0.12226850539445877, "learning_rate": 2e-07, "loss": 0.0218, "num_tokens": 491115233.0, "reward": 0.5792410969734192, "reward_std": 0.16740307211875916, "rewards/simpleverify_reward/mean": 0.5792410969734192, "rewards/simpleverify_reward/std": 0.49395665526390076, "step": 767 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3830.0, "completions/mean_length": 607.1428833007812, "completions/mean_terminated_length": 579.6715698242188, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 0.071679820800448, "grad_norm": 0.12522974610328674, "learning_rate": 2e-07, "loss": 0.0213, "num_tokens": 491754073.0, "reward": 0.5691964626312256, "reward_std": 0.20703791081905365, "rewards/simpleverify_reward/mean": 0.5691964030265808, "rewards/simpleverify_reward/std": 0.4954652488231659, "step": 768 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3138.0, "completions/mean_length": 616.6495971679688, "completions/mean_terminated_length": 565.4246826171875, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 0.07177315390044858, "grad_norm": 0.1151333823800087, "learning_rate": 2e-07, "loss": 0.0389, "num_tokens": 492392743.0, "reward": 0.582589328289032, "reward_std": 0.18899770081043243, "rewards/simpleverify_reward/mean": 0.5825892686843872, "rewards/simpleverify_reward/std": 0.4934072494506836, "step": 769 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3450.0, "completions/mean_length": 601.9520263671875, "completions/mean_terminated_length": 566.4993896484375, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 0.07186648700044916, "grad_norm": 0.1232394352555275, "learning_rate": 2e-07, "loss": 0.0232, "num_tokens": 493031836.0, "reward": 0.6026785969734192, "reward_std": 0.18874582648277283, "rewards/simpleverify_reward/mean": 0.6026785969734192, "rewards/simpleverify_reward/std": 0.48961687088012695, "step": 770 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.006696428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3026.0, "completions/mean_length": 523.59375, "completions/mean_terminated_length": 499.5101318359375, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 0.07195982010044975, "grad_norm": 0.12958775460720062, "learning_rate": 2e-07, "loss": 0.0236, "num_tokens": 493583672.0, "reward": 0.6662946939468384, "reward_std": 0.18836788833141327, "rewards/simpleverify_reward/mean": 0.6662946343421936, "rewards/simpleverify_reward/std": 0.47179925441741943, "step": 771 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2260.0, "completions/mean_length": 546.9308471679688, "completions/mean_terminated_length": 514.9572143554688, "completions/min_length": 118.0, "completions/min_terminated_length": 118.0, "epoch": 0.07205315320045033, "grad_norm": 0.12408667802810669, "learning_rate": 2e-07, "loss": 0.0356, "num_tokens": 494168386.0, "reward": 0.5870535969734192, "reward_std": 0.19681887328624725, "rewards/simpleverify_reward/mean": 0.5870535969734192, "rewards/simpleverify_reward/std": 0.49263834953308105, "step": 772 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2019.0, "completions/mean_length": 599.2444458007812, "completions/mean_terminated_length": 547.7633056640625, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 0.07214648630045091, "grad_norm": 0.12748344242572784, "learning_rate": 2e-07, "loss": 0.0308, "num_tokens": 494795677.0, "reward": 0.5446428656578064, "reward_std": 0.1949748396873474, "rewards/simpleverify_reward/mean": 0.5446428656578064, "rewards/simpleverify_reward/std": 0.49828118085861206, "step": 773 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2687.0, "completions/mean_length": 600.9564819335938, "completions/mean_terminated_length": 541.4495239257812, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 0.0722398194004515, "grad_norm": 0.11879529803991318, "learning_rate": 2e-07, "loss": 0.0347, "num_tokens": 495427038.0, "reward": 0.5703125, "reward_std": 0.16116490960121155, "rewards/simpleverify_reward/mean": 0.5703125, "rewards/simpleverify_reward/std": 0.49530795216560364, "step": 774 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.024553571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3487.0, "completions/mean_length": 665.935302734375, "completions/mean_terminated_length": 579.5949096679688, "completions/min_length": 50.0, "completions/min_terminated_length": 50.0, "epoch": 0.07233315250045208, "grad_norm": 0.10706914961338043, "learning_rate": 2e-07, "loss": 0.0202, "num_tokens": 496107556.0, "reward": 0.5290178656578064, "reward_std": 0.19069086015224457, "rewards/simpleverify_reward/mean": 0.5290178656578064, "rewards/simpleverify_reward/std": 0.49943605065345764, "step": 775 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2444.0, "completions/mean_length": 618.7533569335938, "completions/mean_terminated_length": 571.5509033203125, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 0.07242648560045266, "grad_norm": 0.12644153833389282, "learning_rate": 2e-07, "loss": 0.0523, "num_tokens": 496756183.0, "reward": 0.5881696939468384, "reward_std": 0.20580190420150757, "rewards/simpleverify_reward/mean": 0.5881696343421936, "rewards/simpleverify_reward/std": 0.4924396276473999, "step": 776 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3232.0, "completions/mean_length": 585.5636596679688, "completions/mean_terminated_length": 525.7945556640625, "completions/min_length": 133.0, "completions/min_terminated_length": 133.0, "epoch": 0.07251981870045325, "grad_norm": 0.12675584852695465, "learning_rate": 2e-07, "loss": 0.0316, "num_tokens": 497364792.0, "reward": 0.5725446939468384, "reward_std": 0.17258603870868683, "rewards/simpleverify_reward/mean": 0.5725446343421936, "rewards/simpleverify_reward/std": 0.49498558044433594, "step": 777 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3669.0, "completions/mean_length": 627.3671875, "completions/mean_terminated_length": 568.3098754882812, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 0.07261315180045383, "grad_norm": 0.11289744079113007, "learning_rate": 2e-07, "loss": 0.0316, "num_tokens": 498022313.0, "reward": 0.5792410969734192, "reward_std": 0.19756634533405304, "rewards/simpleverify_reward/mean": 0.5792410969734192, "rewards/simpleverify_reward/std": 0.49395665526390076, "step": 778 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2160.0, "completions/mean_length": 608.1607666015625, "completions/mean_terminated_length": 564.80908203125, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 0.07270648490045442, "grad_norm": 0.12459919601678848, "learning_rate": 2e-07, "loss": 0.0393, "num_tokens": 498660225.0, "reward": 0.5647321939468384, "reward_std": 0.2144804447889328, "rewards/simpleverify_reward/mean": 0.5647321343421936, "rewards/simpleverify_reward/std": 0.49606895446777344, "step": 779 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3233.0, "completions/mean_length": 631.7254638671875, "completions/mean_terminated_length": 580.7225341796875, "completions/min_length": 121.0, "completions/min_terminated_length": 121.0, "epoch": 0.072799818000455, "grad_norm": 0.11307240277528763, "learning_rate": 2e-07, "loss": 0.0064, "num_tokens": 499323739.0, "reward": 0.5446428656578064, "reward_std": 0.19230598211288452, "rewards/simpleverify_reward/mean": 0.5446428656578064, "rewards/simpleverify_reward/std": 0.49828118085861206, "step": 780 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3249.0, "completions/mean_length": 623.8035888671875, "completions/mean_terminated_length": 572.6840209960938, "completions/min_length": 115.0, "completions/min_terminated_length": 115.0, "epoch": 0.07289315110045558, "grad_norm": 0.1312204748392105, "learning_rate": 2e-07, "loss": 0.0315, "num_tokens": 499974051.0, "reward": 0.574776828289032, "reward_std": 0.22048896551132202, "rewards/simpleverify_reward/mean": 0.5747767686843872, "rewards/simpleverify_reward/std": 0.49465295672416687, "step": 781 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3681.0, "completions/mean_length": 570.4910888671875, "completions/mean_terminated_length": 510.46539306640625, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 0.07298648420045617, "grad_norm": 0.12852296233177185, "learning_rate": 2e-07, "loss": 0.0288, "num_tokens": 500571355.0, "reward": 0.6272321939468384, "reward_std": 0.16311588883399963, "rewards/simpleverify_reward/mean": 0.6272321343421936, "rewards/simpleverify_reward/std": 0.4838111400604248, "step": 782 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 4075.0, "completions/mean_length": 592.974365234375, "completions/mean_terminated_length": 561.41552734375, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 0.07307981730045675, "grad_norm": 0.12924833595752716, "learning_rate": 2e-07, "loss": 0.0417, "num_tokens": 501192508.0, "reward": 0.6238839626312256, "reward_std": 0.19497695565223694, "rewards/simpleverify_reward/mean": 0.6238839030265808, "rewards/simpleverify_reward/std": 0.48468026518821716, "step": 783 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3306.0, "completions/mean_length": 577.802490234375, "completions/mean_terminated_length": 526.0056762695312, "completions/min_length": 97.0, "completions/min_terminated_length": 97.0, "epoch": 0.07317315040045733, "grad_norm": 0.12458653002977371, "learning_rate": 2e-07, "loss": 0.0225, "num_tokens": 501809395.0, "reward": 0.5245535969734192, "reward_std": 0.19017024338245392, "rewards/simpleverify_reward/mean": 0.5245535969734192, "rewards/simpleverify_reward/std": 0.4996756613254547, "step": 784 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 2072.0, "completions/mean_length": 576.0625, "completions/mean_terminated_length": 536.3341064453125, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 0.07326648350045792, "grad_norm": 0.13053016364574432, "learning_rate": 2e-07, "loss": 0.0265, "num_tokens": 502414315.0, "reward": 0.6540178656578064, "reward_std": 0.19952097535133362, "rewards/simpleverify_reward/mean": 0.6540178656578064, "rewards/simpleverify_reward/std": 0.4759531021118164, "step": 785 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3659.0, "completions/mean_length": 557.1295166015625, "completions/mean_terminated_length": 521.2220458984375, "completions/min_length": 114.0, "completions/min_terminated_length": 114.0, "epoch": 0.0733598166004585, "grad_norm": 0.135705828666687, "learning_rate": 2e-07, "loss": 0.0321, "num_tokens": 503012279.0, "reward": 0.5803571939468384, "reward_std": 0.24059009552001953, "rewards/simpleverify_reward/mean": 0.5803571343421936, "rewards/simpleverify_reward/std": 0.4937761425971985, "step": 786 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3672.0, "completions/mean_length": 603.8092041015625, "completions/mean_terminated_length": 556.4038696289062, "completions/min_length": 100.0, "completions/min_terminated_length": 100.0, "epoch": 0.07345314970045909, "grad_norm": 0.11402718722820282, "learning_rate": 2e-07, "loss": 0.0398, "num_tokens": 503642580.0, "reward": 0.6383928656578064, "reward_std": 0.1605236977338791, "rewards/simpleverify_reward/mean": 0.6383928656578064, "rewards/simpleverify_reward/std": 0.4807341694831848, "step": 787 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3629.0, "completions/mean_length": 597.3504638671875, "completions/mean_terminated_length": 549.8574829101562, "completions/min_length": 89.0, "completions/min_terminated_length": 89.0, "epoch": 0.07354648280045967, "grad_norm": 0.13678964972496033, "learning_rate": 2e-07, "loss": 0.0423, "num_tokens": 504266494.0, "reward": 0.6160714626312256, "reward_std": 0.21579134464263916, "rewards/simpleverify_reward/mean": 0.6160714030265808, "rewards/simpleverify_reward/std": 0.486612468957901, "step": 788 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3841.0, "completions/mean_length": 634.6707763671875, "completions/mean_terminated_length": 563.7095947265625, "completions/min_length": 114.0, "completions/min_terminated_length": 114.0, "epoch": 0.07363981590046025, "grad_norm": 0.13468696177005768, "learning_rate": 2e-07, "loss": 0.0372, "num_tokens": 504924671.0, "reward": 0.5580357313156128, "reward_std": 0.22567518055438995, "rewards/simpleverify_reward/mean": 0.5580357313156128, "rewards/simpleverify_reward/std": 0.49689778685569763, "step": 789 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3887.0, "completions/mean_length": 593.5569458007812, "completions/mean_terminated_length": 550.0237426757812, "completions/min_length": 158.0, "completions/min_terminated_length": 158.0, "epoch": 0.07373314900046084, "grad_norm": 0.12357960641384125, "learning_rate": 2e-07, "loss": 0.0418, "num_tokens": 505545082.0, "reward": 0.5892857313156128, "reward_std": 0.18994158506393433, "rewards/simpleverify_reward/mean": 0.5892857313156128, "rewards/simpleverify_reward/std": 0.49223825335502625, "step": 790 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3022.0, "completions/mean_length": 617.4096069335938, "completions/mean_terminated_length": 566.1959228515625, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 0.07382648210046142, "grad_norm": 0.12113036215305328, "learning_rate": 2e-07, "loss": 0.0312, "num_tokens": 506187657.0, "reward": 0.5558035969734192, "reward_std": 0.18765361607074738, "rewards/simpleverify_reward/mean": 0.5558035969734192, "rewards/simpleverify_reward/std": 0.49715372920036316, "step": 791 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2558.0, "completions/mean_length": 597.4564819335938, "completions/mean_terminated_length": 537.8899536132812, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 0.073919815200462, "grad_norm": 0.1274089217185974, "learning_rate": 2e-07, "loss": 0.0371, "num_tokens": 506811290.0, "reward": 0.5859375, "reward_std": 0.19892433285713196, "rewards/simpleverify_reward/mean": 0.5859375, "rewards/simpleverify_reward/std": 0.4928344786167145, "step": 792 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2464.0, "completions/mean_length": 596.6484375, "completions/mean_terminated_length": 545.1290893554688, "completions/min_length": 123.0, "completions/min_terminated_length": 123.0, "epoch": 0.07401314830046259, "grad_norm": 0.12256631255149841, "learning_rate": 2e-07, "loss": 0.0322, "num_tokens": 507442383.0, "reward": 0.5546875, "reward_std": 0.19846926629543304, "rewards/simpleverify_reward/mean": 0.5546875, "rewards/simpleverify_reward/std": 0.4972778558731079, "step": 793 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3245.0, "completions/mean_length": 622.0692138671875, "completions/mean_terminated_length": 562.9216918945312, "completions/min_length": 117.0, "completions/min_terminated_length": 117.0, "epoch": 0.07410648140046316, "grad_norm": 0.12403933703899384, "learning_rate": 2e-07, "loss": 0.016, "num_tokens": 508087501.0, "reward": 0.5334821939468384, "reward_std": 0.19340182840824127, "rewards/simpleverify_reward/mean": 0.5334821343421936, "rewards/simpleverify_reward/std": 0.49915632605552673, "step": 794 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2113.0, "completions/mean_length": 588.3783569335938, "completions/mean_terminated_length": 540.7636108398438, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 0.07419981450046374, "grad_norm": 0.10811442881822586, "learning_rate": 2e-07, "loss": 0.0111, "num_tokens": 508694360.0, "reward": 0.5714285969734192, "reward_std": 0.16314908862113953, "rewards/simpleverify_reward/mean": 0.5714285969734192, "rewards/simpleverify_reward/std": 0.49514803290367126, "step": 795 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 2421.0, "completions/mean_length": 560.138427734375, "completions/mean_terminated_length": 520.2302856445312, "completions/min_length": 116.0, "completions/min_terminated_length": 116.0, "epoch": 0.07429314760046433, "grad_norm": 0.13510894775390625, "learning_rate": 2e-07, "loss": 0.0225, "num_tokens": 509290284.0, "reward": 0.6049107313156128, "reward_std": 0.21620431542396545, "rewards/simpleverify_reward/mean": 0.6049107313156128, "rewards/simpleverify_reward/std": 0.48914292454719543, "step": 796 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3752.0, "completions/mean_length": 592.9464721679688, "completions/mean_terminated_length": 541.37255859375, "completions/min_length": 156.0, "completions/min_terminated_length": 156.0, "epoch": 0.07438648070046491, "grad_norm": 0.1225707158446312, "learning_rate": 2e-07, "loss": 0.0215, "num_tokens": 509914388.0, "reward": 0.5803571939468384, "reward_std": 0.1997498869895935, "rewards/simpleverify_reward/mean": 0.5803571343421936, "rewards/simpleverify_reward/std": 0.4937761127948761, "step": 797 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 1673.0, "completions/mean_length": 591.1283569335938, "completions/mean_terminated_length": 547.5650024414062, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 0.0744798138004655, "grad_norm": 0.12258046120405197, "learning_rate": 2e-07, "loss": 0.0289, "num_tokens": 510532663.0, "reward": 0.6160714626312256, "reward_std": 0.20921824872493744, "rewards/simpleverify_reward/mean": 0.6160714030265808, "rewards/simpleverify_reward/std": 0.486612468957901, "step": 798 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3479.0, "completions/mean_length": 624.3058471679688, "completions/mean_terminated_length": 581.15478515625, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 0.07457314690046608, "grad_norm": 0.12246536463499069, "learning_rate": 2e-07, "loss": 0.0266, "num_tokens": 511180857.0, "reward": 0.5558035969734192, "reward_std": 0.20883849263191223, "rewards/simpleverify_reward/mean": 0.5558035969734192, "rewards/simpleverify_reward/std": 0.49715372920036316, "step": 799 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2195.0, "completions/mean_length": 629.0111694335938, "completions/mean_terminated_length": 577.96826171875, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 0.07466648000046666, "grad_norm": 0.11720240116119385, "learning_rate": 2e-07, "loss": 0.0125, "num_tokens": 511840139.0, "reward": 0.590401828289032, "reward_std": 0.1993015855550766, "rewards/simpleverify_reward/mean": 0.5904017686843872, "rewards/simpleverify_reward/std": 0.49203425645828247, "step": 800 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3681.0, "completions/mean_length": 590.1864013671875, "completions/mean_terminated_length": 538.5718994140625, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 0.07475981310046725, "grad_norm": 0.13558317720890045, "learning_rate": 2e-07, "loss": 0.0453, "num_tokens": 512459810.0, "reward": 0.5859375, "reward_std": 0.202568918466568, "rewards/simpleverify_reward/mean": 0.5859375, "rewards/simpleverify_reward/std": 0.4928344786167145, "step": 801 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3769.0, "completions/mean_length": 642.685302734375, "completions/mean_terminated_length": 599.7626953125, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 0.07485314620046783, "grad_norm": 0.1066189706325531, "learning_rate": 2e-07, "loss": 0.0145, "num_tokens": 513120632.0, "reward": 0.613839328289032, "reward_std": 0.16273610293865204, "rewards/simpleverify_reward/mean": 0.6138392686843872, "rewards/simpleverify_reward/std": 0.48714008927345276, "step": 802 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3646.0, "completions/mean_length": 629.622802734375, "completions/mean_terminated_length": 570.6038818359375, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 0.07494647930046841, "grad_norm": 0.11989188939332962, "learning_rate": 2e-07, "loss": 0.0262, "num_tokens": 513781214.0, "reward": 0.5245535969734192, "reward_std": 0.18629814684391022, "rewards/simpleverify_reward/mean": 0.5245535969734192, "rewards/simpleverify_reward/std": 0.4996756911277771, "step": 803 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2488.0, "completions/mean_length": 659.9207763671875, "completions/mean_terminated_length": 601.417724609375, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 0.075039812400469, "grad_norm": 0.10909339785575867, "learning_rate": 2e-07, "loss": 0.0184, "num_tokens": 514454159.0, "reward": 0.5368303656578064, "reward_std": 0.19531185925006866, "rewards/simpleverify_reward/mean": 0.5368303656578064, "rewards/simpleverify_reward/std": 0.49892017245292664, "step": 804 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.024553571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 4096.0, "completions/mean_length": 646.8404541015625, "completions/mean_terminated_length": 560.0194091796875, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 0.07513314550046958, "grad_norm": 0.11108136922121048, "learning_rate": 2e-07, "loss": 0.0283, "num_tokens": 515118264.0, "reward": 0.625, "reward_std": 0.1676594614982605, "rewards/simpleverify_reward/mean": 0.625, "rewards/simpleverify_reward/std": 0.48439329862594604, "step": 805 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3878.0, "completions/mean_length": 633.8817138671875, "completions/mean_terminated_length": 570.93408203125, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 0.07522647860047016, "grad_norm": 0.11714525520801544, "learning_rate": 2e-07, "loss": 0.0562, "num_tokens": 515778662.0, "reward": 0.5647321939468384, "reward_std": 0.2156379669904709, "rewards/simpleverify_reward/mean": 0.5647321343421936, "rewards/simpleverify_reward/std": 0.49606895446777344, "step": 806 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2045.0, "completions/mean_length": 652.4017944335938, "completions/mean_terminated_length": 585.802001953125, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 0.07531981170047075, "grad_norm": 0.11080990731716156, "learning_rate": 2e-07, "loss": 0.062, "num_tokens": 516456518.0, "reward": 0.5535714626312256, "reward_std": 0.19215373694896698, "rewards/simpleverify_reward/mean": 0.5535714030265808, "rewards/simpleverify_reward/std": 0.4973994791507721, "step": 807 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3872.0, "completions/mean_length": 614.4944458007812, "completions/mean_terminated_length": 583.1295166015625, "completions/min_length": 119.0, "completions/min_terminated_length": 119.0, "epoch": 0.07541314480047133, "grad_norm": 0.12295068800449371, "learning_rate": 2e-07, "loss": 0.0216, "num_tokens": 517102633.0, "reward": 0.546875, "reward_std": 0.20978237688541412, "rewards/simpleverify_reward/mean": 0.546875, "rewards/simpleverify_reward/std": 0.4980759024620056, "step": 808 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3552.0, "completions/mean_length": 547.6272583007812, "completions/mean_terminated_length": 519.6873168945312, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 0.07550647790047192, "grad_norm": 0.1269228458404541, "learning_rate": 2e-07, "loss": 0.0341, "num_tokens": 517682371.0, "reward": 0.6674107313156128, "reward_std": 0.17336814105510712, "rewards/simpleverify_reward/mean": 0.6674107313156128, "rewards/simpleverify_reward/std": 0.47140392661094666, "step": 809 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3830.0, "completions/mean_length": 630.3158569335938, "completions/mean_terminated_length": 563.2889404296875, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 0.0755998110004725, "grad_norm": 0.11736507713794708, "learning_rate": 2e-07, "loss": 0.0237, "num_tokens": 518338726.0, "reward": 0.5212053656578064, "reward_std": 0.1708928793668747, "rewards/simpleverify_reward/mean": 0.5212053656578064, "rewards/simpleverify_reward/std": 0.49982914328575134, "step": 810 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2557.0, "completions/mean_length": 588.4609375, "completions/mean_terminated_length": 540.8472900390625, "completions/min_length": 101.0, "completions/min_terminated_length": 101.0, "epoch": 0.07569314410047308, "grad_norm": 0.12432559579610825, "learning_rate": 2e-07, "loss": 0.0408, "num_tokens": 518960923.0, "reward": 0.559151828289032, "reward_std": 0.19918397068977356, "rewards/simpleverify_reward/mean": 0.5591517686843872, "rewards/simpleverify_reward/std": 0.496766060590744, "step": 811 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2994.0, "completions/mean_length": 582.458740234375, "completions/mean_terminated_length": 530.73046875, "completions/min_length": 111.0, "completions/min_terminated_length": 111.0, "epoch": 0.07578647720047367, "grad_norm": 0.1243983581662178, "learning_rate": 2e-07, "loss": 0.009, "num_tokens": 519574134.0, "reward": 0.6328125, "reward_std": 0.17559124529361725, "rewards/simpleverify_reward/mean": 0.6328125, "rewards/simpleverify_reward/std": 0.48230743408203125, "step": 812 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3954.0, "completions/mean_length": 696.161865234375, "completions/mean_terminated_length": 626.4613037109375, "completions/min_length": 157.0, "completions/min_terminated_length": 157.0, "epoch": 0.07587981030047425, "grad_norm": 0.11511807143688202, "learning_rate": 2e-07, "loss": 0.0527, "num_tokens": 520298415.0, "reward": 0.512276828289032, "reward_std": 0.19640658795833588, "rewards/simpleverify_reward/mean": 0.5122767686843872, "rewards/simpleverify_reward/std": 0.500128448009491, "step": 813 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 1744.0, "completions/mean_length": 568.5814819335938, "completions/mean_terminated_length": 520.697998046875, "completions/min_length": 153.0, "completions/min_terminated_length": 153.0, "epoch": 0.07597314340047483, "grad_norm": 0.13083051145076752, "learning_rate": 2e-07, "loss": 0.0169, "num_tokens": 520899104.0, "reward": 0.5915178656578064, "reward_std": 0.1943022459745407, "rewards/simpleverify_reward/mean": 0.5915178656578064, "rewards/simpleverify_reward/std": 0.49182769656181335, "step": 814 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3951.0, "completions/mean_length": 653.2467041015625, "completions/mean_terminated_length": 594.6300048828125, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 0.07606647650047542, "grad_norm": 0.11205723881721497, "learning_rate": 2e-07, "loss": 0.0285, "num_tokens": 521577717.0, "reward": 0.5926339626312256, "reward_std": 0.19584135711193085, "rewards/simpleverify_reward/mean": 0.5926339030265808, "rewards/simpleverify_reward/std": 0.49161848425865173, "step": 815 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 2497.0, "completions/mean_length": 610.4777221679688, "completions/mean_terminated_length": 571.1376953125, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 0.076159809600476, "grad_norm": 0.14153219759464264, "learning_rate": 2e-07, "loss": 0.0345, "num_tokens": 522208953.0, "reward": 0.5859375, "reward_std": 0.22248414158821106, "rewards/simpleverify_reward/mean": 0.5859375, "rewards/simpleverify_reward/std": 0.4928344786167145, "step": 816 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2141.0, "completions/mean_length": 580.1953125, "completions/mean_terminated_length": 528.4337158203125, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 0.07625314270047658, "grad_norm": 0.12718023359775543, "learning_rate": 2e-07, "loss": 0.0346, "num_tokens": 522831104.0, "reward": 0.5892857313156128, "reward_std": 0.21781830489635468, "rewards/simpleverify_reward/mean": 0.5892857313156128, "rewards/simpleverify_reward/std": 0.49223825335502625, "step": 817 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3860.0, "completions/mean_length": 614.1796875, "completions/mean_terminated_length": 578.8511352539062, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 0.07634647580047717, "grad_norm": 0.13802237808704376, "learning_rate": 2e-07, "loss": 0.02, "num_tokens": 523467881.0, "reward": 0.5837053656578064, "reward_std": 0.24427856504917145, "rewards/simpleverify_reward/mean": 0.5837053656578064, "rewards/simpleverify_reward/std": 0.49321895837783813, "step": 818 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2331.0, "completions/mean_length": 593.1942138671875, "completions/mean_terminated_length": 541.6240234375, "completions/min_length": 84.0, "completions/min_terminated_length": 84.0, "epoch": 0.07643980890047775, "grad_norm": 0.11904804408550262, "learning_rate": 2e-07, "loss": 0.0057, "num_tokens": 524076151.0, "reward": 0.6484375, "reward_std": 0.20201507210731506, "rewards/simpleverify_reward/mean": 0.6484375, "rewards/simpleverify_reward/std": 0.4777248501777649, "step": 819 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3200.0, "completions/mean_length": 672.2098388671875, "completions/mean_terminated_length": 590.0388793945312, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 0.07653314200047834, "grad_norm": 0.13397493958473206, "learning_rate": 2e-07, "loss": 0.0382, "num_tokens": 524765923.0, "reward": 0.5658482313156128, "reward_std": 0.21376506984233856, "rewards/simpleverify_reward/mean": 0.5658482313156128, "rewards/simpleverify_reward/std": 0.49592188000679016, "step": 820 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.024553571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3297.0, "completions/mean_length": 658.5223388671875, "completions/mean_terminated_length": 571.9954223632812, "completions/min_length": 137.0, "completions/min_terminated_length": 137.0, "epoch": 0.07662647510047892, "grad_norm": 0.1165715754032135, "learning_rate": 2e-07, "loss": 0.0461, "num_tokens": 525454087.0, "reward": 0.5792410969734192, "reward_std": 0.1932491660118103, "rewards/simpleverify_reward/mean": 0.5792410969734192, "rewards/simpleverify_reward/std": 0.49395665526390076, "step": 821 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3805.0, "completions/mean_length": 643.3270263671875, "completions/mean_terminated_length": 564.4988403320312, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 0.0767198082004795, "grad_norm": 0.121103435754776, "learning_rate": 2e-07, "loss": 0.0249, "num_tokens": 526115988.0, "reward": 0.5881696939468384, "reward_std": 0.1756233274936676, "rewards/simpleverify_reward/mean": 0.5881696343421936, "rewards/simpleverify_reward/std": 0.4924396276473999, "step": 822 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3364.0, "completions/mean_length": 608.364990234375, "completions/mean_terminated_length": 580.9032592773438, "completions/min_length": 162.0, "completions/min_terminated_length": 162.0, "epoch": 0.07681314130048009, "grad_norm": 0.12668262422084808, "learning_rate": 2e-07, "loss": 0.0158, "num_tokens": 526745971.0, "reward": 0.520089328289032, "reward_std": 0.20587536692619324, "rewards/simpleverify_reward/mean": 0.5200892686843872, "rewards/simpleverify_reward/std": 0.4998753070831299, "step": 823 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3049.0, "completions/mean_length": 599.1964721679688, "completions/mean_terminated_length": 571.6625366210938, "completions/min_length": 153.0, "completions/min_terminated_length": 153.0, "epoch": 0.07690647440048066, "grad_norm": 0.13137410581111908, "learning_rate": 2e-07, "loss": 0.0155, "num_tokens": 527367491.0, "reward": 0.5736607313156128, "reward_std": 0.21045497059822083, "rewards/simpleverify_reward/mean": 0.5736607313156128, "rewards/simpleverify_reward/std": 0.4948205351829529, "step": 824 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.005580357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3056.0, "completions/mean_length": 608.091552734375, "completions/mean_terminated_length": 588.5185546875, "completions/min_length": 162.0, "completions/min_terminated_length": 162.0, "epoch": 0.07699980750048124, "grad_norm": 0.11633870005607605, "learning_rate": 2e-07, "loss": 0.008, "num_tokens": 528002709.0, "reward": 0.5524553656578064, "reward_std": 0.18803158402442932, "rewards/simpleverify_reward/mean": 0.5524553656578064, "rewards/simpleverify_reward/std": 0.49751853942871094, "step": 825 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3512.0, "completions/mean_length": 671.9096069335938, "completions/mean_terminated_length": 613.6107177734375, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 0.07709314060048184, "grad_norm": 0.10468082875013351, "learning_rate": 2e-07, "loss": 0.0192, "num_tokens": 528692572.0, "reward": 0.5078125, "reward_std": 0.1633441150188446, "rewards/simpleverify_reward/mean": 0.5078125, "rewards/simpleverify_reward/std": 0.5002182126045227, "step": 826 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 4026.0, "completions/mean_length": 654.286865234375, "completions/mean_terminated_length": 619.365234375, "completions/min_length": 166.0, "completions/min_terminated_length": 166.0, "epoch": 0.07718647370048241, "grad_norm": 0.12135232239961624, "learning_rate": 2e-07, "loss": 0.0231, "num_tokens": 529374405.0, "reward": 0.4910714626312256, "reward_std": 0.21158292889595032, "rewards/simpleverify_reward/mean": 0.4910714328289032, "rewards/simpleverify_reward/std": 0.5001994967460632, "step": 827 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3428.0, "completions/mean_length": 667.9754638671875, "completions/mean_terminated_length": 601.6768798828125, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 0.07727980680048299, "grad_norm": 0.10894346982240677, "learning_rate": 2e-07, "loss": 0.0392, "num_tokens": 530063415.0, "reward": 0.5602678656578064, "reward_std": 0.17145629227161407, "rewards/simpleverify_reward/mean": 0.5602678656578064, "rewards/simpleverify_reward/std": 0.4966317415237427, "step": 828 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.006696428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3658.0, "completions/mean_length": 581.9017944335938, "completions/mean_terminated_length": 558.2112426757812, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 0.07737313990048358, "grad_norm": 0.14972776174545288, "learning_rate": 2e-07, "loss": 0.0374, "num_tokens": 530673511.0, "reward": 0.625, "reward_std": 0.22064054012298584, "rewards/simpleverify_reward/mean": 0.625, "rewards/simpleverify_reward/std": 0.48439329862594604, "step": 829 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3353.0, "completions/mean_length": 637.4576416015625, "completions/mean_terminated_length": 566.5535278320312, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 0.07746647300048416, "grad_norm": 0.1223003938794136, "learning_rate": 2e-07, "loss": 0.0385, "num_tokens": 531322585.0, "reward": 0.6261160969734192, "reward_std": 0.21004633605480194, "rewards/simpleverify_reward/mean": 0.6261160969734192, "rewards/simpleverify_reward/std": 0.48410359025001526, "step": 830 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3452.0, "completions/mean_length": 590.0145263671875, "completions/mean_terminated_length": 554.4407958984375, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 0.07755980610048475, "grad_norm": 0.1287589967250824, "learning_rate": 2e-07, "loss": 0.0317, "num_tokens": 531936310.0, "reward": 0.598214328289032, "reward_std": 0.19276243448257446, "rewards/simpleverify_reward/mean": 0.5982142686843872, "rewards/simpleverify_reward/std": 0.49053287506103516, "step": 831 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3882.0, "completions/mean_length": 561.6049194335938, "completions/mean_terminated_length": 525.742919921875, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 0.07765313920048533, "grad_norm": 0.12075383216142654, "learning_rate": 2e-07, "loss": 0.0179, "num_tokens": 532531612.0, "reward": 0.6417410969734192, "reward_std": 0.1868947595357895, "rewards/simpleverify_reward/mean": 0.6417410969734192, "rewards/simpleverify_reward/std": 0.47975659370422363, "step": 832 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 4048.0, "completions/mean_length": 612.0625, "completions/mean_terminated_length": 552.74462890625, "completions/min_length": 153.0, "completions/min_terminated_length": 153.0, "epoch": 0.07774647230048591, "grad_norm": 0.12564657628536224, "learning_rate": 2e-07, "loss": 0.0206, "num_tokens": 533168100.0, "reward": 0.6328125, "reward_std": 0.20670345425605774, "rewards/simpleverify_reward/mean": 0.6328125, "rewards/simpleverify_reward/std": 0.48230743408203125, "step": 833 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3432.0, "completions/mean_length": 663.982177734375, "completions/mean_terminated_length": 589.6282348632812, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 0.0778398054004865, "grad_norm": 0.10563230514526367, "learning_rate": 2e-07, "loss": 0.0407, "num_tokens": 533856684.0, "reward": 0.5033482313156128, "reward_std": 0.1699492633342743, "rewards/simpleverify_reward/mean": 0.5033482313156128, "rewards/simpleverify_reward/std": 0.5002680420875549, "step": 834 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3742.0, "completions/mean_length": 605.109375, "completions/mean_terminated_length": 573.659912109375, "completions/min_length": 94.0, "completions/min_terminated_length": 94.0, "epoch": 0.07793313850048708, "grad_norm": 0.12203970551490784, "learning_rate": 2e-07, "loss": 0.0284, "num_tokens": 534495950.0, "reward": 0.5714285969734192, "reward_std": 0.18930330872535706, "rewards/simpleverify_reward/mean": 0.5714285969734192, "rewards/simpleverify_reward/std": 0.49514803290367126, "step": 835 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 1946.0, "completions/mean_length": 630.2444458007812, "completions/mean_terminated_length": 571.2361450195312, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 0.07802647160048766, "grad_norm": 0.1181313544511795, "learning_rate": 2e-07, "loss": 0.0361, "num_tokens": 535148369.0, "reward": 0.59375, "reward_std": 0.19655926525592804, "rewards/simpleverify_reward/mean": 0.59375, "rewards/simpleverify_reward/std": 0.4914066195487976, "step": 836 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2220.0, "completions/mean_length": 596.779052734375, "completions/mean_terminated_length": 553.285888671875, "completions/min_length": 83.0, "completions/min_terminated_length": 83.0, "epoch": 0.07811980470048825, "grad_norm": 0.13063298165798187, "learning_rate": 2e-07, "loss": 0.0103, "num_tokens": 535781515.0, "reward": 0.598214328289032, "reward_std": 0.22740933299064636, "rewards/simpleverify_reward/mean": 0.5982142686843872, "rewards/simpleverify_reward/std": 0.49053290486335754, "step": 837 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3523.0, "completions/mean_length": 635.0100708007812, "completions/mean_terminated_length": 584.0554809570312, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 0.07821313780048883, "grad_norm": 0.11032333970069885, "learning_rate": 2e-07, "loss": 0.0352, "num_tokens": 536433684.0, "reward": 0.6205357313156128, "reward_std": 0.17919190227985382, "rewards/simpleverify_reward/mean": 0.6205357313156128, "rewards/simpleverify_reward/std": 0.4855247139930725, "step": 838 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.005580357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2105.0, "completions/mean_length": 532.7332763671875, "completions/mean_terminated_length": 512.7374267578125, "completions/min_length": 107.0, "completions/min_terminated_length": 107.0, "epoch": 0.07830647090048942, "grad_norm": 0.20574921369552612, "learning_rate": 2e-07, "loss": 0.0126, "num_tokens": 536998981.0, "reward": 0.5948660969734192, "reward_std": 0.18528853356838226, "rewards/simpleverify_reward/mean": 0.5948660969734192, "rewards/simpleverify_reward/std": 0.49119213223457336, "step": 839 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3597.0, "completions/mean_length": 607.9777221679688, "completions/mean_terminated_length": 548.5902709960938, "completions/min_length": 116.0, "completions/min_terminated_length": 116.0, "epoch": 0.07839980400049, "grad_norm": 0.10836874693632126, "learning_rate": 2e-07, "loss": 0.0135, "num_tokens": 537620449.0, "reward": 0.5892857313156128, "reward_std": 0.17292053997516632, "rewards/simpleverify_reward/mean": 0.5892857313156128, "rewards/simpleverify_reward/std": 0.49223825335502625, "step": 840 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 4055.0, "completions/mean_length": 579.0402221679688, "completions/mean_terminated_length": 527.2615966796875, "completions/min_length": 123.0, "completions/min_terminated_length": 123.0, "epoch": 0.07849313710049058, "grad_norm": 0.12810757756233215, "learning_rate": 2e-07, "loss": 0.0396, "num_tokens": 538219645.0, "reward": 0.5881696939468384, "reward_std": 0.18739217519760132, "rewards/simpleverify_reward/mean": 0.5881696343421936, "rewards/simpleverify_reward/std": 0.4924395978450775, "step": 841 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 4090.0, "completions/mean_length": 655.953125, "completions/mean_terminated_length": 605.306884765625, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 0.07858647020049117, "grad_norm": 0.1129724457859993, "learning_rate": 2e-07, "loss": 0.0179, "num_tokens": 538890243.0, "reward": 0.5245535969734192, "reward_std": 0.17359568178653717, "rewards/simpleverify_reward/mean": 0.5245535969734192, "rewards/simpleverify_reward/std": 0.4996756613254547, "step": 842 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0267857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3591.0, "completions/mean_length": 715.3359985351562, "completions/mean_terminated_length": 622.2901000976562, "completions/min_length": 160.0, "completions/min_terminated_length": 160.0, "epoch": 0.07867980330049175, "grad_norm": 0.10382253676652908, "learning_rate": 2e-07, "loss": 0.0322, "num_tokens": 539620840.0, "reward": 0.5647321939468384, "reward_std": 0.1716749519109726, "rewards/simpleverify_reward/mean": 0.5647321343421936, "rewards/simpleverify_reward/std": 0.49606895446777344, "step": 843 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3752.0, "completions/mean_length": 577.4207763671875, "completions/mean_terminated_length": 533.68701171875, "completions/min_length": 105.0, "completions/min_terminated_length": 105.0, "epoch": 0.07877313640049233, "grad_norm": 0.12644648551940918, "learning_rate": 2e-07, "loss": 0.0229, "num_tokens": 540226217.0, "reward": 0.640625, "reward_std": 0.16393931210041046, "rewards/simpleverify_reward/mean": 0.640625, "rewards/simpleverify_reward/std": 0.48008525371551514, "step": 844 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.006696428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3230.0, "completions/mean_length": 561.3671875, "completions/mean_terminated_length": 537.5382080078125, "completions/min_length": 159.0, "completions/min_terminated_length": 159.0, "epoch": 0.07886646950049292, "grad_norm": 0.13152748346328735, "learning_rate": 2e-07, "loss": 0.0121, "num_tokens": 540806330.0, "reward": 0.6104910969734192, "reward_std": 0.1993771642446518, "rewards/simpleverify_reward/mean": 0.6104910969734192, "rewards/simpleverify_reward/std": 0.48791125416755676, "step": 845 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3167.0, "completions/mean_length": 649.8873291015625, "completions/mean_terminated_length": 591.2134399414062, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 0.0789598026004935, "grad_norm": 0.12455906718969345, "learning_rate": 2e-07, "loss": 0.0343, "num_tokens": 541480709.0, "reward": 0.5647321939468384, "reward_std": 0.22732238471508026, "rewards/simpleverify_reward/mean": 0.5647321343421936, "rewards/simpleverify_reward/std": 0.49606895446777344, "step": 846 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 2501.0, "completions/mean_length": 566.716552734375, "completions/mean_terminated_length": 538.9268798828125, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 0.07905313570049408, "grad_norm": 0.12981991469860077, "learning_rate": 2e-07, "loss": 0.0358, "num_tokens": 542073927.0, "reward": 0.6171875, "reward_std": 0.18426933884620667, "rewards/simpleverify_reward/mean": 0.6171875, "rewards/simpleverify_reward/std": 0.4863446056842804, "step": 847 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3622.0, "completions/mean_length": 606.5926513671875, "completions/mean_terminated_length": 563.2214965820312, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 0.07914646880049467, "grad_norm": 0.11578691750764847, "learning_rate": 2e-07, "loss": 0.0121, "num_tokens": 542704850.0, "reward": 0.578125, "reward_std": 0.17990800738334656, "rewards/simpleverify_reward/mean": 0.578125, "rewards/simpleverify_reward/std": 0.4941346049308777, "step": 848 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2964.0, "completions/mean_length": 617.3527221679688, "completions/mean_terminated_length": 546.0364990234375, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 0.07923980190049525, "grad_norm": 0.14289085566997528, "learning_rate": 2e-07, "loss": 0.0464, "num_tokens": 543348198.0, "reward": 0.578125, "reward_std": 0.2637366056442261, "rewards/simpleverify_reward/mean": 0.578125, "rewards/simpleverify_reward/std": 0.4941346049308777, "step": 849 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3980.0, "completions/mean_length": 661.9944458007812, "completions/mean_terminated_length": 579.5783081054688, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 0.07933313500049584, "grad_norm": 0.1212274506688118, "learning_rate": 2e-07, "loss": 0.0336, "num_tokens": 544031897.0, "reward": 0.5736607313156128, "reward_std": 0.2241317480802536, "rewards/simpleverify_reward/mean": 0.5736607313156128, "rewards/simpleverify_reward/std": 0.4948205351829529, "step": 850 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 1800.0, "completions/mean_length": 582.2109375, "completions/mean_terminated_length": 530.4790649414062, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 0.07942646810049642, "grad_norm": 0.12489480525255203, "learning_rate": 2e-07, "loss": 0.0296, "num_tokens": 544638982.0, "reward": 0.6082589626312256, "reward_std": 0.18603530526161194, "rewards/simpleverify_reward/mean": 0.6082589030265808, "rewards/simpleverify_reward/std": 0.48841196298599243, "step": 851 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2616.0, "completions/mean_length": 621.6272583007812, "completions/mean_terminated_length": 562.4722290039062, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 0.079519801200497, "grad_norm": 0.11748546361923218, "learning_rate": 2e-07, "loss": 0.0396, "num_tokens": 545287472.0, "reward": 0.5613839626312256, "reward_std": 0.1957223117351532, "rewards/simpleverify_reward/mean": 0.5613839030265808, "rewards/simpleverify_reward/std": 0.496494859457016, "step": 852 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2354.0, "completions/mean_length": 638.6451416015625, "completions/mean_terminated_length": 603.5648193359375, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 0.07961313430049759, "grad_norm": 0.12141449004411697, "learning_rate": 2e-07, "loss": 0.0193, "num_tokens": 545954898.0, "reward": 0.5859375, "reward_std": 0.19854483008384705, "rewards/simpleverify_reward/mean": 0.5859375, "rewards/simpleverify_reward/std": 0.4928344786167145, "step": 853 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 4070.0, "completions/mean_length": 639.8326416015625, "completions/mean_terminated_length": 604.7643432617188, "completions/min_length": 98.0, "completions/min_terminated_length": 98.0, "epoch": 0.07970646740049817, "grad_norm": 0.12446518242359161, "learning_rate": 2e-07, "loss": 0.0233, "num_tokens": 546628244.0, "reward": 0.535714328289032, "reward_std": 0.21417735517024994, "rewards/simpleverify_reward/mean": 0.5357142686843872, "rewards/simpleverify_reward/std": 0.4990013837814331, "step": 854 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3811.0, "completions/mean_length": 625.7980346679688, "completions/mean_terminated_length": 562.703369140625, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 0.07979980050049874, "grad_norm": 0.11110784858465195, "learning_rate": 2e-07, "loss": 0.0179, "num_tokens": 547274879.0, "reward": 0.578125, "reward_std": 0.16435259580612183, "rewards/simpleverify_reward/mean": 0.578125, "rewards/simpleverify_reward/std": 0.4941346049308777, "step": 855 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 4036.0, "completions/mean_length": 620.6830444335938, "completions/mean_terminated_length": 577.4869995117188, "completions/min_length": 119.0, "completions/min_terminated_length": 119.0, "epoch": 0.07989313360049934, "grad_norm": 0.1300434023141861, "learning_rate": 2e-07, "loss": 0.0331, "num_tokens": 547916387.0, "reward": 0.6205357313156128, "reward_std": 0.20316554605960846, "rewards/simpleverify_reward/mean": 0.6205357313156128, "rewards/simpleverify_reward/std": 0.4855247139930725, "step": 856 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3133.0, "completions/mean_length": 603.6796875, "completions/mean_terminated_length": 540.1829223632812, "completions/min_length": 121.0, "completions/min_terminated_length": 121.0, "epoch": 0.07998646670049991, "grad_norm": 0.13219599425792694, "learning_rate": 2e-07, "loss": 0.0444, "num_tokens": 548549204.0, "reward": 0.6104910969734192, "reward_std": 0.22138941287994385, "rewards/simpleverify_reward/mean": 0.6104910969734192, "rewards/simpleverify_reward/std": 0.48791125416755676, "step": 857 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2522.0, "completions/mean_length": 625.2980346679688, "completions/mean_terminated_length": 574.200439453125, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 0.08007979980050049, "grad_norm": 0.12066076695919037, "learning_rate": 2e-07, "loss": 0.0323, "num_tokens": 549189335.0, "reward": 0.6261160969734192, "reward_std": 0.19986683130264282, "rewards/simpleverify_reward/mean": 0.6261160969734192, "rewards/simpleverify_reward/std": 0.48410359025001526, "step": 858 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.006696428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3986.0, "completions/mean_length": 547.7846069335938, "completions/mean_terminated_length": 523.8640747070312, "completions/min_length": 101.0, "completions/min_terminated_length": 101.0, "epoch": 0.08017313290050108, "grad_norm": 0.1317836195230484, "learning_rate": 2e-07, "loss": 0.0263, "num_tokens": 549761254.0, "reward": 0.6752232313156128, "reward_std": 0.17220626771450043, "rewards/simpleverify_reward/mean": 0.6752232313156128, "rewards/simpleverify_reward/std": 0.46855294704437256, "step": 859 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3983.0, "completions/mean_length": 607.107177734375, "completions/mean_terminated_length": 559.7466430664062, "completions/min_length": 120.0, "completions/min_terminated_length": 120.0, "epoch": 0.08026646600050166, "grad_norm": 0.11265739798545837, "learning_rate": 2e-07, "loss": 0.0321, "num_tokens": 550394030.0, "reward": 0.5725446939468384, "reward_std": 0.17844417691230774, "rewards/simpleverify_reward/mean": 0.5725446343421936, "rewards/simpleverify_reward/std": 0.49498558044433594, "step": 860 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3736.0, "completions/mean_length": 638.560302734375, "completions/mean_terminated_length": 575.6976928710938, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 0.08035979910050225, "grad_norm": 0.1243128627538681, "learning_rate": 2e-07, "loss": 0.0247, "num_tokens": 551058356.0, "reward": 0.5345982313156128, "reward_std": 0.16999386250972748, "rewards/simpleverify_reward/mean": 0.5345982313156128, "rewards/simpleverify_reward/std": 0.4990801215171814, "step": 861 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2958.0, "completions/mean_length": 598.7611694335938, "completions/mean_terminated_length": 547.27294921875, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 0.08045313220050283, "grad_norm": 0.17591197788715363, "learning_rate": 2e-07, "loss": 0.0367, "num_tokens": 551694526.0, "reward": 0.5502232313156128, "reward_std": 0.19320820271968842, "rewards/simpleverify_reward/mean": 0.5502232313156128, "rewards/simpleverify_reward/std": 0.49774909019470215, "step": 862 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3074.0, "completions/mean_length": 651.1842041015625, "completions/mean_terminated_length": 576.552978515625, "completions/min_length": 102.0, "completions/min_terminated_length": 102.0, "epoch": 0.08054646530050341, "grad_norm": 0.11734477430582047, "learning_rate": 2e-07, "loss": 0.0146, "num_tokens": 552371219.0, "reward": 0.5234375, "reward_std": 0.16507048904895782, "rewards/simpleverify_reward/mean": 0.5234375, "rewards/simpleverify_reward/std": 0.49972933530807495, "step": 863 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3390.0, "completions/mean_length": 599.5022583007812, "completions/mean_terminated_length": 556.04296875, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 0.080639798400504, "grad_norm": 0.1356065571308136, "learning_rate": 2e-07, "loss": 0.0303, "num_tokens": 552990341.0, "reward": 0.5881696939468384, "reward_std": 0.21665894985198975, "rewards/simpleverify_reward/mean": 0.5881696343421936, "rewards/simpleverify_reward/std": 0.4924395978450775, "step": 864 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3353.0, "completions/mean_length": 615.161865234375, "completions/mean_terminated_length": 555.896728515625, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 0.08073313150050458, "grad_norm": 0.1112927570939064, "learning_rate": 2e-07, "loss": 0.0196, "num_tokens": 553637374.0, "reward": 0.5758928656578064, "reward_std": 0.17656762897968292, "rewards/simpleverify_reward/mean": 0.5758928656578064, "rewards/simpleverify_reward/std": 0.49448272585868835, "step": 865 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3634.0, "completions/mean_length": 583.1004638671875, "completions/mean_terminated_length": 555.4398193359375, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 0.08082646460050516, "grad_norm": 0.11420892179012299, "learning_rate": 2e-07, "loss": 0.0179, "num_tokens": 554256520.0, "reward": 0.5792410969734192, "reward_std": 0.16686852276325226, "rewards/simpleverify_reward/mean": 0.5792410969734192, "rewards/simpleverify_reward/std": 0.49395665526390076, "step": 866 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3980.0, "completions/mean_length": 596.1596069335938, "completions/mean_terminated_length": 552.6587524414062, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 0.08091979770050575, "grad_norm": 0.12719908356666565, "learning_rate": 2e-07, "loss": 0.023, "num_tokens": 554884623.0, "reward": 0.5457589626312256, "reward_std": 0.19978128373622894, "rewards/simpleverify_reward/mean": 0.5457589030265808, "rewards/simpleverify_reward/std": 0.4981797933578491, "step": 867 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2079.0, "completions/mean_length": 639.9989013671875, "completions/mean_terminated_length": 577.1624755859375, "completions/min_length": 119.0, "completions/min_terminated_length": 119.0, "epoch": 0.08101313080050633, "grad_norm": 0.13059256970882416, "learning_rate": 2e-07, "loss": 0.0209, "num_tokens": 555542542.0, "reward": 0.5580357313156128, "reward_std": 0.23660892248153687, "rewards/simpleverify_reward/mean": 0.5580357313156128, "rewards/simpleverify_reward/std": 0.49689778685569763, "step": 868 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3774.0, "completions/mean_length": 595.341552734375, "completions/mean_terminated_length": 563.8040771484375, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 0.08110646390050691, "grad_norm": 0.13839896023273468, "learning_rate": 2e-07, "loss": 0.0208, "num_tokens": 556164392.0, "reward": 0.5725446939468384, "reward_std": 0.22105170786380768, "rewards/simpleverify_reward/mean": 0.5725446343421936, "rewards/simpleverify_reward/std": 0.49498558044433594, "step": 869 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3066.0, "completions/mean_length": 598.9620971679688, "completions/mean_terminated_length": 531.3287963867188, "completions/min_length": 115.0, "completions/min_terminated_length": 115.0, "epoch": 0.0811997970005075, "grad_norm": 0.13024349510669708, "learning_rate": 2e-07, "loss": 0.0484, "num_tokens": 556790982.0, "reward": 0.5993303656578064, "reward_std": 0.206626757979393, "rewards/simpleverify_reward/mean": 0.5993303656578064, "rewards/simpleverify_reward/std": 0.49030786752700806, "step": 870 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2851.0, "completions/mean_length": 612.1864013671875, "completions/mean_terminated_length": 564.8948364257812, "completions/min_length": 70.0, "completions/min_terminated_length": 70.0, "epoch": 0.08129313010050808, "grad_norm": 0.12317056208848953, "learning_rate": 2e-07, "loss": 0.0289, "num_tokens": 557427861.0, "reward": 0.5602678656578064, "reward_std": 0.1904287487268448, "rewards/simpleverify_reward/mean": 0.5602678656578064, "rewards/simpleverify_reward/std": 0.4966317415237427, "step": 871 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3923.0, "completions/mean_length": 670.1975708007812, "completions/mean_terminated_length": 559.6878051757812, "completions/min_length": 177.0, "completions/min_terminated_length": 177.0, "epoch": 0.08138646320050867, "grad_norm": 0.12720882892608643, "learning_rate": 2e-07, "loss": 0.0438, "num_tokens": 558102294.0, "reward": 0.5948660969734192, "reward_std": 0.2050088495016098, "rewards/simpleverify_reward/mean": 0.5948660969734192, "rewards/simpleverify_reward/std": 0.49119213223457336, "step": 872 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2510.0, "completions/mean_length": 633.5178833007812, "completions/mean_terminated_length": 586.515869140625, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 0.08147979630050925, "grad_norm": 0.11728081852197647, "learning_rate": 2e-07, "loss": 0.0271, "num_tokens": 558759694.0, "reward": 0.5446428656578064, "reward_std": 0.2123333066701889, "rewards/simpleverify_reward/mean": 0.5446428656578064, "rewards/simpleverify_reward/std": 0.4982811510562897, "step": 873 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3975.0, "completions/mean_length": 687.0703735351562, "completions/mean_terminated_length": 621.1410522460938, "completions/min_length": 95.0, "completions/min_terminated_length": 95.0, "epoch": 0.08157312940050983, "grad_norm": 0.10851467400789261, "learning_rate": 2e-07, "loss": 0.037, "num_tokens": 559468741.0, "reward": 0.5033482313156128, "reward_std": 0.19294606149196625, "rewards/simpleverify_reward/mean": 0.5033482313156128, "rewards/simpleverify_reward/std": 0.5002680420875549, "step": 874 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3249.0, "completions/mean_length": 621.0, "completions/mean_terminated_length": 557.8181762695312, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 0.08166646250051042, "grad_norm": 0.1176573857665062, "learning_rate": 2e-07, "loss": 0.0474, "num_tokens": 560126117.0, "reward": 0.6328125, "reward_std": 0.16288764774799347, "rewards/simpleverify_reward/mean": 0.6328125, "rewards/simpleverify_reward/std": 0.48230743408203125, "step": 875 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3588.0, "completions/mean_length": 646.3114013671875, "completions/mean_terminated_length": 571.5746459960938, "completions/min_length": 164.0, "completions/min_terminated_length": 164.0, "epoch": 0.081759795600511, "grad_norm": 0.12533675134181976, "learning_rate": 2e-07, "loss": 0.0284, "num_tokens": 560799572.0, "reward": 0.5881696939468384, "reward_std": 0.2122994214296341, "rewards/simpleverify_reward/mean": 0.5881696343421936, "rewards/simpleverify_reward/std": 0.4924395978450775, "step": 876 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2522.0, "completions/mean_length": 626.3627319335938, "completions/mean_terminated_length": 559.2593383789062, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 0.08185312870051158, "grad_norm": 0.10729487240314484, "learning_rate": 2e-07, "loss": 0.0246, "num_tokens": 561446625.0, "reward": 0.578125, "reward_std": 0.16439467668533325, "rewards/simpleverify_reward/mean": 0.578125, "rewards/simpleverify_reward/std": 0.4941346049308777, "step": 877 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2710.0, "completions/mean_length": 630.1585083007812, "completions/mean_terminated_length": 571.1487426757812, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 0.08194646180051217, "grad_norm": 0.12543043494224548, "learning_rate": 2e-07, "loss": 0.0257, "num_tokens": 562111047.0, "reward": 0.5792410969734192, "reward_std": 0.2420204132795334, "rewards/simpleverify_reward/mean": 0.5792410969734192, "rewards/simpleverify_reward/std": 0.49395665526390076, "step": 878 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3596.0, "completions/mean_length": 678.5558471679688, "completions/mean_terminated_length": 608.4943237304688, "completions/min_length": 98.0, "completions/min_terminated_length": 98.0, "epoch": 0.08203979490051275, "grad_norm": 0.125189870595932, "learning_rate": 2e-07, "loss": 0.0403, "num_tokens": 562809569.0, "reward": 0.535714328289032, "reward_std": 0.21496787667274475, "rewards/simpleverify_reward/mean": 0.5357142686843872, "rewards/simpleverify_reward/std": 0.4990014135837555, "step": 879 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3574.0, "completions/mean_length": 564.3136596679688, "completions/mean_terminated_length": 536.5050659179688, "completions/min_length": 171.0, "completions/min_terminated_length": 171.0, "epoch": 0.08213312800051333, "grad_norm": 0.13293366134166718, "learning_rate": 2e-07, "loss": 0.0186, "num_tokens": 563400106.0, "reward": 0.6127232313156128, "reward_std": 0.20647591352462769, "rewards/simpleverify_reward/mean": 0.6127232313156128, "rewards/simpleverify_reward/std": 0.4873998463153839, "step": 880 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.030133928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2284.0, "completions/mean_length": 688.5256958007812, "completions/mean_terminated_length": 582.65478515625, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 0.08222646110051392, "grad_norm": 0.11477047950029373, "learning_rate": 2e-07, "loss": 0.0421, "num_tokens": 564102977.0, "reward": 0.5758928656578064, "reward_std": 0.18862931430339813, "rewards/simpleverify_reward/mean": 0.5758928656578064, "rewards/simpleverify_reward/std": 0.49448275566101074, "step": 881 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2568.0, "completions/mean_length": 606.7846069335938, "completions/mean_terminated_length": 551.4002075195312, "completions/min_length": 137.0, "completions/min_terminated_length": 137.0, "epoch": 0.0823197942005145, "grad_norm": 0.1347273290157318, "learning_rate": 2e-07, "loss": 0.0244, "num_tokens": 564743360.0, "reward": 0.5770089626312256, "reward_std": 0.22624607384204865, "rewards/simpleverify_reward/mean": 0.5770089030265808, "rewards/simpleverify_reward/std": 0.4943099617958069, "step": 882 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 2855.0, "completions/mean_length": 565.0335083007812, "completions/mean_terminated_length": 525.1806030273438, "completions/min_length": 120.0, "completions/min_terminated_length": 120.0, "epoch": 0.08241312730051509, "grad_norm": 0.12454091012477875, "learning_rate": 2e-07, "loss": 0.039, "num_tokens": 565330934.0, "reward": 0.6328125, "reward_std": 0.1912222057580948, "rewards/simpleverify_reward/mean": 0.6328125, "rewards/simpleverify_reward/std": 0.48230743408203125, "step": 883 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3670.0, "completions/mean_length": 668.2756958007812, "completions/mean_terminated_length": 601.98291015625, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 0.08250646040051567, "grad_norm": 0.0994601920247078, "learning_rate": 2e-07, "loss": 0.043, "num_tokens": 566023181.0, "reward": 0.5725446939468384, "reward_std": 0.16634424030780792, "rewards/simpleverify_reward/mean": 0.5725446343421936, "rewards/simpleverify_reward/std": 0.49498558044433594, "step": 884 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3753.0, "completions/mean_length": 712.427490234375, "completions/mean_terminated_length": 650.907958984375, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 0.08259979350051624, "grad_norm": 0.1057833805680275, "learning_rate": 2e-07, "loss": 0.0322, "num_tokens": 566752596.0, "reward": 0.5479910969734192, "reward_std": 0.18952901661396027, "rewards/simpleverify_reward/mean": 0.5479910969734192, "rewards/simpleverify_reward/std": 0.49796950817108154, "step": 885 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 4000.0, "completions/mean_length": 689.1886596679688, "completions/mean_terminated_length": 615.3807983398438, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 0.08269312660051684, "grad_norm": 0.1191234439611435, "learning_rate": 2e-07, "loss": 0.0214, "num_tokens": 567468813.0, "reward": 0.5256696939468384, "reward_std": 0.20084139704704285, "rewards/simpleverify_reward/mean": 0.5256696343421936, "rewards/simpleverify_reward/std": 0.4996195137500763, "step": 886 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2032.0, "completions/mean_length": 597.6317138671875, "completions/mean_terminated_length": 554.149169921875, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 0.08278645970051741, "grad_norm": 0.13308651745319366, "learning_rate": 2e-07, "loss": 0.0137, "num_tokens": 568096363.0, "reward": 0.5691964626312256, "reward_std": 0.23521137237548828, "rewards/simpleverify_reward/mean": 0.5691964030265808, "rewards/simpleverify_reward/std": 0.4954652488231659, "step": 887 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3666.0, "completions/mean_length": 692.9810791015625, "completions/mean_terminated_length": 611.30859375, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 0.08287979280051799, "grad_norm": 0.11567077040672302, "learning_rate": 2e-07, "loss": 0.0289, "num_tokens": 568796138.0, "reward": 0.5479910969734192, "reward_std": 0.2223651111125946, "rewards/simpleverify_reward/mean": 0.5479910969734192, "rewards/simpleverify_reward/std": 0.49796947836875916, "step": 888 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3122.0, "completions/mean_length": 597.4944458007812, "completions/mean_terminated_length": 561.99658203125, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 0.08297312590051859, "grad_norm": 0.12271963804960251, "learning_rate": 2e-07, "loss": 0.0302, "num_tokens": 569414285.0, "reward": 0.5859375, "reward_std": 0.20534686744213104, "rewards/simpleverify_reward/mean": 0.5859375, "rewards/simpleverify_reward/std": 0.4928344786167145, "step": 889 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3947.0, "completions/mean_length": 624.9877319335938, "completions/mean_terminated_length": 573.8856201171875, "completions/min_length": 196.0, "completions/min_terminated_length": 196.0, "epoch": 0.08306645900051916, "grad_norm": 0.12427064031362534, "learning_rate": 2e-07, "loss": 0.0265, "num_tokens": 570070290.0, "reward": 0.5758928656578064, "reward_std": 0.18517157435417175, "rewards/simpleverify_reward/mean": 0.5758928656578064, "rewards/simpleverify_reward/std": 0.49448272585868835, "step": 890 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3284.0, "completions/mean_length": 621.1830444335938, "completions/mean_terminated_length": 558.0045166015625, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 0.08315979210051976, "grad_norm": 0.129943385720253, "learning_rate": 2e-07, "loss": 0.0326, "num_tokens": 570720478.0, "reward": 0.5714285969734192, "reward_std": 0.23856060206890106, "rewards/simpleverify_reward/mean": 0.5714285969734192, "rewards/simpleverify_reward/std": 0.49514806270599365, "step": 891 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 4011.0, "completions/mean_length": 626.771240234375, "completions/mean_terminated_length": 579.6776123046875, "completions/min_length": 113.0, "completions/min_terminated_length": 113.0, "epoch": 0.08325312520052033, "grad_norm": 0.1232077106833458, "learning_rate": 2e-07, "loss": 0.0379, "num_tokens": 571373113.0, "reward": 0.5814732313156128, "reward_std": 0.19227205216884613, "rewards/simpleverify_reward/mean": 0.5814732313156128, "rewards/simpleverify_reward/std": 0.4935929775238037, "step": 892 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 4008.0, "completions/mean_length": 655.482177734375, "completions/mean_terminated_length": 612.7186279296875, "completions/min_length": 168.0, "completions/min_terminated_length": 168.0, "epoch": 0.08334645830052091, "grad_norm": 0.12025871872901917, "learning_rate": 2e-07, "loss": 0.0154, "num_tokens": 572063745.0, "reward": 0.5245535969734192, "reward_std": 0.18257686495780945, "rewards/simpleverify_reward/mean": 0.5245535969734192, "rewards/simpleverify_reward/std": 0.4996756613254547, "step": 893 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2820.0, "completions/mean_length": 648.4308471679688, "completions/mean_terminated_length": 577.751708984375, "completions/min_length": 156.0, "completions/min_terminated_length": 156.0, "epoch": 0.0834397914005215, "grad_norm": 0.10207284241914749, "learning_rate": 2e-07, "loss": 0.0249, "num_tokens": 572745323.0, "reward": 0.551339328289032, "reward_std": 0.1701340228319168, "rewards/simpleverify_reward/mean": 0.5513392686843872, "rewards/simpleverify_reward/std": 0.4976350665092468, "step": 894 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 4047.0, "completions/mean_length": 664.9654541015625, "completions/mean_terminated_length": 614.4518432617188, "completions/min_length": 173.0, "completions/min_terminated_length": 173.0, "epoch": 0.08353312450052208, "grad_norm": 0.11630449444055557, "learning_rate": 2e-07, "loss": 0.0119, "num_tokens": 573431876.0, "reward": 0.5613839626312256, "reward_std": 0.19723369181156158, "rewards/simpleverify_reward/mean": 0.5613839030265808, "rewards/simpleverify_reward/std": 0.496494859457016, "step": 895 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4002.0, "completions/mean_length": 633.1986694335938, "completions/mean_terminated_length": 562.2073364257812, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 0.08362645760052266, "grad_norm": 0.1229080855846405, "learning_rate": 2e-07, "loss": 0.0133, "num_tokens": 574092694.0, "reward": 0.5658482313156128, "reward_std": 0.2050512284040451, "rewards/simpleverify_reward/mean": 0.5658482313156128, "rewards/simpleverify_reward/std": 0.49592188000679016, "step": 896 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3101.0, "completions/mean_length": 611.9140625, "completions/mean_terminated_length": 552.5936889648438, "completions/min_length": 116.0, "completions/min_terminated_length": 116.0, "epoch": 0.08371979070052325, "grad_norm": 0.12826503813266754, "learning_rate": 2e-07, "loss": 0.0348, "num_tokens": 574739369.0, "reward": 0.5915178656578064, "reward_std": 0.1910717785358429, "rewards/simpleverify_reward/mean": 0.5915178656578064, "rewards/simpleverify_reward/std": 0.49182769656181335, "step": 897 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3878.0, "completions/mean_length": 650.513427734375, "completions/mean_terminated_length": 599.7870483398438, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 0.08381312380052383, "grad_norm": 0.10512150079011917, "learning_rate": 2e-07, "loss": 0.0159, "num_tokens": 575417733.0, "reward": 0.5323660969734192, "reward_std": 0.16622911393642426, "rewards/simpleverify_reward/mean": 0.5323660969734192, "rewards/simpleverify_reward/std": 0.4992299973964691, "step": 898 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3831.0, "completions/mean_length": 612.6685791015625, "completions/mean_terminated_length": 557.3775634765625, "completions/min_length": 123.0, "completions/min_terminated_length": 123.0, "epoch": 0.08390645690052441, "grad_norm": 0.11974160373210907, "learning_rate": 2e-07, "loss": 0.0425, "num_tokens": 576048508.0, "reward": 0.598214328289032, "reward_std": 0.19306553900241852, "rewards/simpleverify_reward/mean": 0.5982142686843872, "rewards/simpleverify_reward/std": 0.49053287506103516, "step": 899 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2711.0, "completions/mean_length": 586.9967041015625, "completions/mean_terminated_length": 551.392333984375, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 0.083999790000525, "grad_norm": 0.11666915565729141, "learning_rate": 2e-07, "loss": 0.0329, "num_tokens": 576666801.0, "reward": 0.559151828289032, "reward_std": 0.18524505198001862, "rewards/simpleverify_reward/mean": 0.5591517686843872, "rewards/simpleverify_reward/std": 0.496766060590744, "step": 900 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3593.0, "completions/mean_length": 627.6038208007812, "completions/mean_terminated_length": 556.4977416992188, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 0.08409312310052558, "grad_norm": 0.12605252861976624, "learning_rate": 2e-07, "loss": 0.0306, "num_tokens": 577319710.0, "reward": 0.566964328289032, "reward_std": 0.18127234280109406, "rewards/simpleverify_reward/mean": 0.5669642686843872, "rewards/simpleverify_reward/std": 0.49577224254608154, "step": 901 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2704.0, "completions/mean_length": 633.4710083007812, "completions/mean_terminated_length": 566.505126953125, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 0.08418645620052617, "grad_norm": 0.12339833378791809, "learning_rate": 2e-07, "loss": 0.0302, "num_tokens": 577983788.0, "reward": 0.5714285969734192, "reward_std": 0.22744254767894745, "rewards/simpleverify_reward/mean": 0.5714285969734192, "rewards/simpleverify_reward/std": 0.49514803290367126, "step": 902 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2814.0, "completions/mean_length": 639.3236694335938, "completions/mean_terminated_length": 592.4004516601562, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 0.08427978930052675, "grad_norm": 0.11727753281593323, "learning_rate": 2e-07, "loss": 0.0395, "num_tokens": 578647486.0, "reward": 0.6194196939468384, "reward_std": 0.20230677723884583, "rewards/simpleverify_reward/mean": 0.6194196343421936, "rewards/simpleverify_reward/std": 0.48580074310302734, "step": 903 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 4021.0, "completions/mean_length": 599.6239013671875, "completions/mean_terminated_length": 556.1661376953125, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 0.08437312240052733, "grad_norm": 0.1307058483362198, "learning_rate": 2e-07, "loss": 0.0311, "num_tokens": 579266357.0, "reward": 0.6183035969734192, "reward_std": 0.20235207676887512, "rewards/simpleverify_reward/mean": 0.6183035969734192, "rewards/simpleverify_reward/std": 0.4860740303993225, "step": 904 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 2805.0, "completions/mean_length": 596.8359375, "completions/mean_terminated_length": 557.3419799804688, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 0.08446645550052792, "grad_norm": 0.12001758068799973, "learning_rate": 2e-07, "loss": 0.0218, "num_tokens": 579893186.0, "reward": 0.606026828289032, "reward_std": 0.19087563455104828, "rewards/simpleverify_reward/mean": 0.6060267686843872, "rewards/simpleverify_reward/std": 0.48890191316604614, "step": 905 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3884.0, "completions/mean_length": 678.0201416015625, "completions/mean_terminated_length": 599.9840087890625, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 0.0845597886005285, "grad_norm": 0.11426129937171936, "learning_rate": 2e-07, "loss": 0.0381, "num_tokens": 580594116.0, "reward": 0.546875, "reward_std": 0.2202250361442566, "rewards/simpleverify_reward/mean": 0.546875, "rewards/simpleverify_reward/std": 0.4980759024620056, "step": 906 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2375.0, "completions/mean_length": 613.3314819335938, "completions/mean_terminated_length": 570.0440673828125, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 0.08465312170052908, "grad_norm": 0.14169490337371826, "learning_rate": 2e-07, "loss": 0.0257, "num_tokens": 581238189.0, "reward": 0.6283482313156128, "reward_std": 0.21331746876239777, "rewards/simpleverify_reward/mean": 0.6283482313156128, "rewards/simpleverify_reward/std": 0.4835159480571747, "step": 907 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2985.0, "completions/mean_length": 574.8035888671875, "completions/mean_terminated_length": 539.0755004882812, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 0.08474645480052967, "grad_norm": 0.1437978297472, "learning_rate": 2e-07, "loss": 0.0378, "num_tokens": 581831517.0, "reward": 0.6774553656578064, "reward_std": 0.20302285254001617, "rewards/simpleverify_reward/mean": 0.6774553656578064, "rewards/simpleverify_reward/std": 0.4677111804485321, "step": 908 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3424.0, "completions/mean_length": 644.0424194335938, "completions/mean_terminated_length": 601.13671875, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 0.08483978790053025, "grad_norm": 0.11099899560213089, "learning_rate": 2e-07, "loss": 0.0101, "num_tokens": 582487723.0, "reward": 0.6049107313156128, "reward_std": 0.19196967780590057, "rewards/simpleverify_reward/mean": 0.6049107313156128, "rewards/simpleverify_reward/std": 0.48914292454719543, "step": 909 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3240.0, "completions/mean_length": 643.6183471679688, "completions/mean_terminated_length": 608.5885009765625, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 0.08493312100053083, "grad_norm": 0.12519104778766632, "learning_rate": 2e-07, "loss": 0.0214, "num_tokens": 583156725.0, "reward": 0.5524553656578064, "reward_std": 0.23022200167179108, "rewards/simpleverify_reward/mean": 0.5524553656578064, "rewards/simpleverify_reward/std": 0.49751853942871094, "step": 910 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3743.0, "completions/mean_length": 622.078125, "completions/mean_terminated_length": 570.9331665039062, "completions/min_length": 121.0, "completions/min_terminated_length": 121.0, "epoch": 0.08502645410053142, "grad_norm": 0.11770468950271606, "learning_rate": 2e-07, "loss": 0.0234, "num_tokens": 583801763.0, "reward": 0.5323660969734192, "reward_std": 0.16717274487018585, "rewards/simpleverify_reward/mean": 0.5323660969734192, "rewards/simpleverify_reward/std": 0.4992299973964691, "step": 911 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 4003.0, "completions/mean_length": 640.1484375, "completions/mean_terminated_length": 565.2781982421875, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 0.085119787200532, "grad_norm": 0.10956496745347977, "learning_rate": 2e-07, "loss": 0.0303, "num_tokens": 584470536.0, "reward": 0.5625, "reward_std": 0.14853617548942566, "rewards/simpleverify_reward/mean": 0.5625, "rewards/simpleverify_reward/std": 0.49635544419288635, "step": 912 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3904.0, "completions/mean_length": 640.2835083007812, "completions/mean_terminated_length": 605.2197875976562, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 0.08521312030053259, "grad_norm": 0.11883435398340225, "learning_rate": 2e-07, "loss": 0.0393, "num_tokens": 585148326.0, "reward": 0.5245535969734192, "reward_std": 0.20347003638744354, "rewards/simpleverify_reward/mean": 0.5245535969734192, "rewards/simpleverify_reward/std": 0.4996756613254547, "step": 913 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2184.0, "completions/mean_length": 637.9967041015625, "completions/mean_terminated_length": 587.0860595703125, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 0.08530645340053317, "grad_norm": 0.12492192536592484, "learning_rate": 2e-07, "loss": 0.0219, "num_tokens": 585805387.0, "reward": 0.5602678656578064, "reward_std": 0.2066606730222702, "rewards/simpleverify_reward/mean": 0.5602678656578064, "rewards/simpleverify_reward/std": 0.4966317415237427, "step": 914 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3263.0, "completions/mean_length": 639.3460083007812, "completions/mean_terminated_length": 592.423095703125, "completions/min_length": 42.0, "completions/min_terminated_length": 42.0, "epoch": 0.08539978650053374, "grad_norm": 0.11742501705884933, "learning_rate": 2e-07, "loss": 0.0266, "num_tokens": 586483185.0, "reward": 0.5234375, "reward_std": 0.18606990575790405, "rewards/simpleverify_reward/mean": 0.5234375, "rewards/simpleverify_reward/std": 0.49972933530807495, "step": 915 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3587.0, "completions/mean_length": 594.8192138671875, "completions/mean_terminated_length": 567.2508544921875, "completions/min_length": 111.0, "completions/min_terminated_length": 111.0, "epoch": 0.08549311960053434, "grad_norm": 0.13131794333457947, "learning_rate": 2e-07, "loss": 0.0109, "num_tokens": 587100655.0, "reward": 0.5703125, "reward_std": 0.21511802077293396, "rewards/simpleverify_reward/mean": 0.5703125, "rewards/simpleverify_reward/std": 0.49530795216560364, "step": 916 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3632.0, "completions/mean_length": 587.7042846679688, "completions/mean_terminated_length": 544.0983276367188, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 0.08558645270053492, "grad_norm": 0.13507968187332153, "learning_rate": 2e-07, "loss": 0.0317, "num_tokens": 587712558.0, "reward": 0.6328125, "reward_std": 0.21786609292030334, "rewards/simpleverify_reward/mean": 0.6328125, "rewards/simpleverify_reward/std": 0.48230743408203125, "step": 917 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3897.0, "completions/mean_length": 655.8292846679688, "completions/mean_terminated_length": 585.3018188476562, "completions/min_length": 164.0, "completions/min_terminated_length": 164.0, "epoch": 0.0856797858005355, "grad_norm": 0.10914458334445953, "learning_rate": 2e-07, "loss": 0.0087, "num_tokens": 588391573.0, "reward": 0.559151828289032, "reward_std": 0.1756233274936676, "rewards/simpleverify_reward/mean": 0.5591517686843872, "rewards/simpleverify_reward/std": 0.496766060590744, "step": 918 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2904.0, "completions/mean_length": 622.7723388671875, "completions/mean_terminated_length": 547.525634765625, "completions/min_length": 118.0, "completions/min_terminated_length": 118.0, "epoch": 0.08577311890053609, "grad_norm": 0.12643668055534363, "learning_rate": 2e-07, "loss": 0.0209, "num_tokens": 589043913.0, "reward": 0.5758928656578064, "reward_std": 0.2141006737947464, "rewards/simpleverify_reward/mean": 0.5758928656578064, "rewards/simpleverify_reward/std": 0.49448272585868835, "step": 919 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 4056.0, "completions/mean_length": 586.8783569335938, "completions/mean_terminated_length": 559.2474975585938, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 0.08586645200053666, "grad_norm": 0.13099656999111176, "learning_rate": 2e-07, "loss": 0.0135, "num_tokens": 589647300.0, "reward": 0.6037946939468384, "reward_std": 0.19107134640216827, "rewards/simpleverify_reward/mean": 0.6037946343421936, "rewards/simpleverify_reward/std": 0.48938122391700745, "step": 920 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2316.0, "completions/mean_length": 585.622802734375, "completions/mean_terminated_length": 541.990966796875, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 0.08595978510053724, "grad_norm": 0.11880455166101456, "learning_rate": 2e-07, "loss": 0.0175, "num_tokens": 590266786.0, "reward": 0.5848214626312256, "reward_std": 0.17830178141593933, "rewards/simpleverify_reward/mean": 0.5848214030265808, "rewards/simpleverify_reward/std": 0.49302801489830017, "step": 921 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3209.0, "completions/mean_length": 678.2913208007812, "completions/mean_terminated_length": 612.1922607421875, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 0.08605311820053783, "grad_norm": 0.10425497591495514, "learning_rate": 2e-07, "loss": 0.0202, "num_tokens": 590965367.0, "reward": 0.5457589626312256, "reward_std": 0.17438800632953644, "rewards/simpleverify_reward/mean": 0.5457589030265808, "rewards/simpleverify_reward/std": 0.4981797933578491, "step": 922 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3662.0, "completions/mean_length": 636.7277221679688, "completions/mean_terminated_length": 597.6839599609375, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 0.08614645130053841, "grad_norm": 0.16021451354026794, "learning_rate": 2e-07, "loss": 0.0341, "num_tokens": 591622995.0, "reward": 0.578125, "reward_std": 0.19329194724559784, "rewards/simpleverify_reward/mean": 0.578125, "rewards/simpleverify_reward/std": 0.4941346049308777, "step": 923 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3838.0, "completions/mean_length": 633.5078125, "completions/mean_terminated_length": 578.547607421875, "completions/min_length": 133.0, "completions/min_terminated_length": 133.0, "epoch": 0.086239784400539, "grad_norm": 0.12459367513656616, "learning_rate": 2e-07, "loss": 0.0237, "num_tokens": 592286114.0, "reward": 0.5424107313156128, "reward_std": 0.1832943558692932, "rewards/simpleverify_reward/mean": 0.5424107313156128, "rewards/simpleverify_reward/std": 0.4984763264656067, "step": 924 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 4015.0, "completions/mean_length": 659.7567138671875, "completions/mean_terminated_length": 601.2508544921875, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 0.08633311750053958, "grad_norm": 0.11480510234832764, "learning_rate": 2e-07, "loss": 0.0231, "num_tokens": 592961560.0, "reward": 0.5725446939468384, "reward_std": 0.20024296641349792, "rewards/simpleverify_reward/mean": 0.5725446343421936, "rewards/simpleverify_reward/std": 0.49498558044433594, "step": 925 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 4077.0, "completions/mean_length": 701.1752319335938, "completions/mean_terminated_length": 623.6677856445312, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 0.08642645060054016, "grad_norm": 0.11050456017255783, "learning_rate": 2e-07, "loss": 0.0157, "num_tokens": 593684957.0, "reward": 0.4966517984867096, "reward_std": 0.18509279191493988, "rewards/simpleverify_reward/mean": 0.4966517984867096, "rewards/simpleverify_reward/std": 0.5002680420875549, "step": 926 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2926.0, "completions/mean_length": 619.2355346679688, "completions/mean_terminated_length": 560.0397338867188, "completions/min_length": 94.0, "completions/min_terminated_length": 94.0, "epoch": 0.08651978370054075, "grad_norm": 0.12446035444736481, "learning_rate": 2e-07, "loss": 0.0461, "num_tokens": 594332920.0, "reward": 0.5993303656578064, "reward_std": 0.1962222456932068, "rewards/simpleverify_reward/mean": 0.5993303656578064, "rewards/simpleverify_reward/std": 0.49030786752700806, "step": 927 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2196.0, "completions/mean_length": 622.7689819335938, "completions/mean_terminated_length": 591.4786376953125, "completions/min_length": 117.0, "completions/min_terminated_length": 117.0, "epoch": 0.08661311680054133, "grad_norm": 0.13111460208892822, "learning_rate": 2e-07, "loss": 0.0187, "num_tokens": 594983681.0, "reward": 0.5881696939468384, "reward_std": 0.22634738683700562, "rewards/simpleverify_reward/mean": 0.5881696343421936, "rewards/simpleverify_reward/std": 0.4924395978450775, "step": 928 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4057.0, "completions/mean_length": 670.0078125, "completions/mean_terminated_length": 603.74853515625, "completions/min_length": 160.0, "completions/min_terminated_length": 160.0, "epoch": 0.08670644990054191, "grad_norm": 0.12250160425901413, "learning_rate": 2e-07, "loss": 0.0265, "num_tokens": 595668536.0, "reward": 0.5848214626312256, "reward_std": 0.19287008047103882, "rewards/simpleverify_reward/mean": 0.5848214030265808, "rewards/simpleverify_reward/std": 0.49302801489830017, "step": 929 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 4068.0, "completions/mean_length": 620.3080444335938, "completions/mean_terminated_length": 577.1073608398438, "completions/min_length": 137.0, "completions/min_terminated_length": 137.0, "epoch": 0.0867997830005425, "grad_norm": 0.12641620635986328, "learning_rate": 2e-07, "loss": 0.0497, "num_tokens": 596310748.0, "reward": 0.6283482313156128, "reward_std": 0.22255942225456238, "rewards/simpleverify_reward/mean": 0.6283482313156128, "rewards/simpleverify_reward/std": 0.4835159480571747, "step": 930 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2508.0, "completions/mean_length": 569.9944458007812, "completions/mean_terminated_length": 526.1683959960938, "completions/min_length": 111.0, "completions/min_terminated_length": 111.0, "epoch": 0.08689311610054308, "grad_norm": 0.13290734589099884, "learning_rate": 2e-07, "loss": 0.0382, "num_tokens": 596906895.0, "reward": 0.621651828289032, "reward_std": 0.17946474254131317, "rewards/simpleverify_reward/mean": 0.6216517686843872, "rewards/simpleverify_reward/std": 0.485245943069458, "step": 931 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3968.0, "completions/mean_length": 684.2444458007812, "completions/mean_terminated_length": 622.2124633789062, "completions/min_length": 164.0, "completions/min_terminated_length": 164.0, "epoch": 0.08698644920054366, "grad_norm": 0.11845230311155319, "learning_rate": 2e-07, "loss": 0.0269, "num_tokens": 597611018.0, "reward": 0.5323660969734192, "reward_std": 0.202644482254982, "rewards/simpleverify_reward/mean": 0.5323660969734192, "rewards/simpleverify_reward/std": 0.4992299973964691, "step": 932 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3019.0, "completions/mean_length": 625.8839721679688, "completions/mean_terminated_length": 582.7525634765625, "completions/min_length": 115.0, "completions/min_terminated_length": 115.0, "epoch": 0.08707978230054425, "grad_norm": 0.13412460684776306, "learning_rate": 2e-07, "loss": 0.028, "num_tokens": 598269506.0, "reward": 0.5613839626312256, "reward_std": 0.20639783143997192, "rewards/simpleverify_reward/mean": 0.5613839030265808, "rewards/simpleverify_reward/std": 0.496494859457016, "step": 933 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2821.0, "completions/mean_length": 598.943115234375, "completions/mean_terminated_length": 543.4342651367188, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 0.08717311540054483, "grad_norm": 0.134661465883255, "learning_rate": 2e-07, "loss": 0.0221, "num_tokens": 598884351.0, "reward": 0.6071428656578064, "reward_std": 0.2142963856458664, "rewards/simpleverify_reward/mean": 0.6071428656578064, "rewards/simpleverify_reward/std": 0.48865827918052673, "step": 934 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.006696428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3487.0, "completions/mean_length": 586.536865234375, "completions/mean_terminated_length": 562.8775634765625, "completions/min_length": 159.0, "completions/min_terminated_length": 159.0, "epoch": 0.08726644850054542, "grad_norm": 0.13860218226909637, "learning_rate": 2e-07, "loss": 0.0288, "num_tokens": 599495936.0, "reward": 0.566964328289032, "reward_std": 0.2032090127468109, "rewards/simpleverify_reward/mean": 0.5669642686843872, "rewards/simpleverify_reward/std": 0.49577224254608154, "step": 935 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2141.0, "completions/mean_length": 598.4788208007812, "completions/mean_terminated_length": 551.0011596679688, "completions/min_length": 115.0, "completions/min_terminated_length": 115.0, "epoch": 0.087359781600546, "grad_norm": 0.13241828978061676, "learning_rate": 2e-07, "loss": 0.0265, "num_tokens": 600106757.0, "reward": 0.6037946939468384, "reward_std": 0.197836235165596, "rewards/simpleverify_reward/mean": 0.6037946343421936, "rewards/simpleverify_reward/std": 0.48938122391700745, "step": 936 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3774.0, "completions/mean_length": 668.1763916015625, "completions/mean_terminated_length": 605.8522338867188, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 0.08745311470054658, "grad_norm": 0.1126481369137764, "learning_rate": 2e-07, "loss": 0.0348, "num_tokens": 600790859.0, "reward": 0.6104910969734192, "reward_std": 0.19290512800216675, "rewards/simpleverify_reward/mean": 0.6104910969734192, "rewards/simpleverify_reward/std": 0.48791125416755676, "step": 937 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3582.0, "completions/mean_length": 631.2879638671875, "completions/mean_terminated_length": 592.182861328125, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 0.08754644780054717, "grad_norm": 0.11109371483325958, "learning_rate": 2e-07, "loss": 0.0331, "num_tokens": 601439029.0, "reward": 0.5870535969734192, "reward_std": 0.16397280991077423, "rewards/simpleverify_reward/mean": 0.5870535969734192, "rewards/simpleverify_reward/std": 0.49263834953308105, "step": 938 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2313.0, "completions/mean_length": 591.2611694335938, "completions/mean_terminated_length": 543.685546875, "completions/min_length": 161.0, "completions/min_terminated_length": 161.0, "epoch": 0.08763978090054775, "grad_norm": 0.1236126720905304, "learning_rate": 2e-07, "loss": 0.0247, "num_tokens": 602061679.0, "reward": 0.5970982313156128, "reward_std": 0.19820715487003326, "rewards/simpleverify_reward/mean": 0.5970982313156128, "rewards/simpleverify_reward/std": 0.4907552897930145, "step": 939 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 4085.0, "completions/mean_length": 597.9129638671875, "completions/mean_terminated_length": 558.43115234375, "completions/min_length": 122.0, "completions/min_terminated_length": 122.0, "epoch": 0.08773311400054833, "grad_norm": 0.1315751075744629, "learning_rate": 2e-07, "loss": 0.0325, "num_tokens": 602691369.0, "reward": 0.574776828289032, "reward_std": 0.2107584923505783, "rewards/simpleverify_reward/mean": 0.5747767686843872, "rewards/simpleverify_reward/std": 0.49465295672416687, "step": 940 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3366.0, "completions/mean_length": 649.7745971679688, "completions/mean_terminated_length": 599.037353515625, "completions/min_length": 121.0, "completions/min_terminated_length": 121.0, "epoch": 0.08782644710054892, "grad_norm": 0.10528384149074554, "learning_rate": 2e-07, "loss": 0.0118, "num_tokens": 603352159.0, "reward": 0.5703125, "reward_std": 0.16424313187599182, "rewards/simpleverify_reward/mean": 0.5703125, "rewards/simpleverify_reward/std": 0.49530795216560364, "step": 941 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2359.0, "completions/mean_length": 602.7890625, "completions/mean_terminated_length": 559.37060546875, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 0.0879197802005495, "grad_norm": 0.13004302978515625, "learning_rate": 2e-07, "loss": 0.024, "num_tokens": 603985866.0, "reward": 0.6049107313156128, "reward_std": 0.2175932675600052, "rewards/simpleverify_reward/mean": 0.6049107313156128, "rewards/simpleverify_reward/std": 0.48914292454719543, "step": 942 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.006696428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3688.0, "completions/mean_length": 617.4631958007812, "completions/mean_terminated_length": 594.0123901367188, "completions/min_length": 70.0, "completions/min_terminated_length": 70.0, "epoch": 0.08801311330055009, "grad_norm": 0.13372260332107544, "learning_rate": 2e-07, "loss": 0.0399, "num_tokens": 604622681.0, "reward": 0.5736607313156128, "reward_std": 0.2342420220375061, "rewards/simpleverify_reward/mean": 0.5736607313156128, "rewards/simpleverify_reward/std": 0.4948205351829529, "step": 943 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3581.0, "completions/mean_length": 633.1361694335938, "completions/mean_terminated_length": 570.1749877929688, "completions/min_length": 107.0, "completions/min_terminated_length": 107.0, "epoch": 0.08810644640055067, "grad_norm": 0.12624193727970123, "learning_rate": 2e-07, "loss": 0.0377, "num_tokens": 605275435.0, "reward": 0.5970982313156128, "reward_std": 0.19489885866641998, "rewards/simpleverify_reward/mean": 0.5970982313156128, "rewards/simpleverify_reward/std": 0.49075525999069214, "step": 944 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3282.0, "completions/mean_length": 609.4453125, "completions/mean_terminated_length": 558.1143798828125, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 0.08819977950055125, "grad_norm": 0.12672986090183258, "learning_rate": 2e-07, "loss": 0.0335, "num_tokens": 605925762.0, "reward": 0.551339328289032, "reward_std": 0.19039805233478546, "rewards/simpleverify_reward/mean": 0.5513392686843872, "rewards/simpleverify_reward/std": 0.4976350665092468, "step": 945 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3997.0, "completions/mean_length": 662.4230346679688, "completions/mean_terminated_length": 584.0308227539062, "completions/min_length": 166.0, "completions/min_terminated_length": 166.0, "epoch": 0.08829311260055184, "grad_norm": 0.11674949526786804, "learning_rate": 2e-07, "loss": 0.0246, "num_tokens": 606617749.0, "reward": 0.5290178656578064, "reward_std": 0.19727282226085663, "rewards/simpleverify_reward/mean": 0.5290178656578064, "rewards/simpleverify_reward/std": 0.49943605065345764, "step": 946 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3130.0, "completions/mean_length": 586.4754638671875, "completions/mean_terminated_length": 550.8657836914062, "completions/min_length": 117.0, "completions/min_terminated_length": 117.0, "epoch": 0.08838644570055242, "grad_norm": 0.13368940353393555, "learning_rate": 2e-07, "loss": 0.0212, "num_tokens": 607225167.0, "reward": 0.6328125, "reward_std": 0.19490139186382294, "rewards/simpleverify_reward/mean": 0.6328125, "rewards/simpleverify_reward/std": 0.48230743408203125, "step": 947 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3449.0, "completions/mean_length": 632.3348388671875, "completions/mean_terminated_length": 589.2836303710938, "completions/min_length": 156.0, "completions/min_terminated_length": 156.0, "epoch": 0.088479778800553, "grad_norm": 0.11978844553232193, "learning_rate": 2e-07, "loss": 0.0361, "num_tokens": 607886779.0, "reward": 0.5502232313156128, "reward_std": 0.18399947881698608, "rewards/simpleverify_reward/mean": 0.5502232313156128, "rewards/simpleverify_reward/std": 0.49774909019470215, "step": 948 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3528.0, "completions/mean_length": 667.5256958007812, "completions/mean_terminated_length": 585.2423095703125, "completions/min_length": 153.0, "completions/min_terminated_length": 153.0, "epoch": 0.08857311190055359, "grad_norm": 0.13198943436145782, "learning_rate": 2e-07, "loss": 0.0097, "num_tokens": 608579674.0, "reward": 0.5636160969734192, "reward_std": 0.2023056596517563, "rewards/simpleverify_reward/mean": 0.5636160969734192, "rewards/simpleverify_reward/std": 0.49621346592903137, "step": 949 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3993.0, "completions/mean_length": 654.1283569335938, "completions/mean_terminated_length": 603.4552612304688, "completions/min_length": 175.0, "completions/min_terminated_length": 175.0, "epoch": 0.08866644500055416, "grad_norm": 0.12380458414554596, "learning_rate": 2e-07, "loss": 0.0299, "num_tokens": 609260397.0, "reward": 0.5022321939468384, "reward_std": 0.2134280651807785, "rewards/simpleverify_reward/mean": 0.5022321343421936, "rewards/simpleverify_reward/std": 0.5002743005752563, "step": 950 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2491.0, "completions/mean_length": 595.310302734375, "completions/mean_terminated_length": 539.7437744140625, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 0.08875977810055474, "grad_norm": 0.142459437251091, "learning_rate": 2e-07, "loss": 0.0268, "num_tokens": 609881867.0, "reward": 0.5881696939468384, "reward_std": 0.2127547711133957, "rewards/simpleverify_reward/mean": 0.5881696343421936, "rewards/simpleverify_reward/std": 0.4924395978450775, "step": 951 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2801.0, "completions/mean_length": 607.6585083007812, "completions/mean_terminated_length": 536.1435546875, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 0.08885311120055533, "grad_norm": 0.11830049753189087, "learning_rate": 2e-07, "loss": 0.0602, "num_tokens": 610506097.0, "reward": 0.6205357313156128, "reward_std": 0.1956551969051361, "rewards/simpleverify_reward/mean": 0.6205357313156128, "rewards/simpleverify_reward/std": 0.4855247139930725, "step": 952 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3201.0, "completions/mean_length": 658.0714721679688, "completions/mean_terminated_length": 583.5894775390625, "completions/min_length": 112.0, "completions/min_terminated_length": 112.0, "epoch": 0.08894644430055591, "grad_norm": 0.11163041740655899, "learning_rate": 2e-07, "loss": 0.0138, "num_tokens": 611178777.0, "reward": 0.5446428656578064, "reward_std": 0.17303183674812317, "rewards/simpleverify_reward/mean": 0.5446428656578064, "rewards/simpleverify_reward/std": 0.49828118085861206, "step": 953 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2246.0, "completions/mean_length": 627.1506958007812, "completions/mean_terminated_length": 556.0353393554688, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 0.0890397774005565, "grad_norm": 0.12333400547504425, "learning_rate": 2e-07, "loss": 0.0431, "num_tokens": 611827216.0, "reward": 0.6026785969734192, "reward_std": 0.21053799986839294, "rewards/simpleverify_reward/mean": 0.6026785969734192, "rewards/simpleverify_reward/std": 0.48961687088012695, "step": 954 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3193.0, "completions/mean_length": 633.208740234375, "completions/mean_terminated_length": 554.1495361328125, "completions/min_length": 175.0, "completions/min_terminated_length": 175.0, "epoch": 0.08913311050055708, "grad_norm": 0.12349176406860352, "learning_rate": 2e-07, "loss": 0.0421, "num_tokens": 612476571.0, "reward": 0.6272321939468384, "reward_std": 0.2089981883764267, "rewards/simpleverify_reward/mean": 0.6272321343421936, "rewards/simpleverify_reward/std": 0.4838111698627472, "step": 955 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3628.0, "completions/mean_length": 628.8850708007812, "completions/mean_terminated_length": 565.8465576171875, "completions/min_length": 121.0, "completions/min_terminated_length": 121.0, "epoch": 0.08922644360055766, "grad_norm": 0.1076839342713356, "learning_rate": 2e-07, "loss": 0.0286, "num_tokens": 613130892.0, "reward": 0.6417410969734192, "reward_std": 0.16499380767345428, "rewards/simpleverify_reward/mean": 0.6417410969734192, "rewards/simpleverify_reward/std": 0.47975656390190125, "step": 956 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.033482142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2701.0, "completions/mean_length": 657.421875, "completions/mean_terminated_length": 538.3025512695312, "completions/min_length": 133.0, "completions/min_terminated_length": 133.0, "epoch": 0.08931977670055825, "grad_norm": 0.12356871366500854, "learning_rate": 2e-07, "loss": 0.0295, "num_tokens": 613817222.0, "reward": 0.5546875, "reward_std": 0.1801387518644333, "rewards/simpleverify_reward/mean": 0.5546875, "rewards/simpleverify_reward/std": 0.4972778558731079, "step": 957 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 4055.0, "completions/mean_length": 640.8527221679688, "completions/mean_terminated_length": 589.984130859375, "completions/min_length": 101.0, "completions/min_terminated_length": 101.0, "epoch": 0.08941310980055883, "grad_norm": 0.11153213679790497, "learning_rate": 2e-07, "loss": 0.0308, "num_tokens": 614487338.0, "reward": 0.5613839626312256, "reward_std": 0.1986585557460785, "rewards/simpleverify_reward/mean": 0.5613839030265808, "rewards/simpleverify_reward/std": 0.496494859457016, "step": 958 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3376.0, "completions/mean_length": 632.7377319335938, "completions/mean_terminated_length": 593.6489868164062, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 0.08950644290055941, "grad_norm": 0.13706840574741364, "learning_rate": 2e-07, "loss": 0.0366, "num_tokens": 615133095.0, "reward": 0.5714285969734192, "reward_std": 0.2272902876138687, "rewards/simpleverify_reward/mean": 0.5714285969734192, "rewards/simpleverify_reward/std": 0.49514806270599365, "step": 959 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3542.0, "completions/mean_length": 656.6105346679688, "completions/mean_terminated_length": 625.625, "completions/min_length": 158.0, "completions/min_terminated_length": 158.0, "epoch": 0.08959977600056, "grad_norm": 0.11415804177522659, "learning_rate": 2e-07, "loss": 0.0229, "num_tokens": 615808482.0, "reward": 0.5256696939468384, "reward_std": 0.20704609155654907, "rewards/simpleverify_reward/mean": 0.5256696343421936, "rewards/simpleverify_reward/std": 0.4996195435523987, "step": 960 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3526.0, "completions/mean_length": 583.7221069335938, "completions/mean_terminated_length": 536.0441284179688, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 0.08969310910056058, "grad_norm": 0.14194171130657196, "learning_rate": 2e-07, "loss": 0.0504, "num_tokens": 616425137.0, "reward": 0.5446428656578064, "reward_std": 0.23909121751785278, "rewards/simpleverify_reward/mean": 0.5446428656578064, "rewards/simpleverify_reward/std": 0.4982811510562897, "step": 961 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3870.0, "completions/mean_length": 605.7991333007812, "completions/mean_terminated_length": 574.3558959960938, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 0.08978644220056116, "grad_norm": 0.11890088766813278, "learning_rate": 2e-07, "loss": 0.0326, "num_tokens": 617056621.0, "reward": 0.5926339626312256, "reward_std": 0.19215694069862366, "rewards/simpleverify_reward/mean": 0.5926339030265808, "rewards/simpleverify_reward/std": 0.49161845445632935, "step": 962 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2255.0, "completions/mean_length": 584.6317138671875, "completions/mean_terminated_length": 540.9876098632812, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 0.08987977530056175, "grad_norm": 0.13491947948932648, "learning_rate": 2e-07, "loss": 0.026, "num_tokens": 617670387.0, "reward": 0.566964328289032, "reward_std": 0.2113642543554306, "rewards/simpleverify_reward/mean": 0.5669642686843872, "rewards/simpleverify_reward/std": 0.49577224254608154, "step": 963 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 1978.0, "completions/mean_length": 616.896240234375, "completions/mean_terminated_length": 569.6685791015625, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 0.08997310840056233, "grad_norm": 0.11515174061059952, "learning_rate": 2e-07, "loss": 0.0233, "num_tokens": 618317054.0, "reward": 0.6015625, "reward_std": 0.16961409151554108, "rewards/simpleverify_reward/mean": 0.6015625, "rewards/simpleverify_reward/std": 0.48984986543655396, "step": 964 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 2072.0, "completions/mean_length": 588.654052734375, "completions/mean_terminated_length": 549.0677490234375, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 0.09006644150056292, "grad_norm": 0.11450297385454178, "learning_rate": 2e-07, "loss": 0.0166, "num_tokens": 618934320.0, "reward": 0.625, "reward_std": 0.17382095754146576, "rewards/simpleverify_reward/mean": 0.625, "rewards/simpleverify_reward/std": 0.48439329862594604, "step": 965 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3624.0, "completions/mean_length": 637.8080444335938, "completions/mean_terminated_length": 590.8642578125, "completions/min_length": 108.0, "completions/min_terminated_length": 108.0, "epoch": 0.0901597746005635, "grad_norm": 0.13015009462833405, "learning_rate": 2e-07, "loss": 0.0435, "num_tokens": 619594292.0, "reward": 0.5546875, "reward_std": 0.21173419058322906, "rewards/simpleverify_reward/mean": 0.5546875, "rewards/simpleverify_reward/std": 0.4972778558731079, "step": 966 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2816.0, "completions/mean_length": 649.8314819335938, "completions/mean_terminated_length": 603.0509033203125, "completions/min_length": 169.0, "completions/min_terminated_length": 169.0, "epoch": 0.09025310770056408, "grad_norm": 0.1200789138674736, "learning_rate": 2e-07, "loss": 0.014, "num_tokens": 620265221.0, "reward": 0.5401785969734192, "reward_std": 0.19745966792106628, "rewards/simpleverify_reward/mean": 0.5401785969734192, "rewards/simpleverify_reward/std": 0.49866142868995667, "step": 967 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.004464285714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 4069.0, "completions/mean_length": 566.2154541015625, "completions/mean_terminated_length": 550.3867797851562, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 0.09034644080056467, "grad_norm": 0.1274561733007431, "learning_rate": 2e-07, "loss": 0.0186, "num_tokens": 620859926.0, "reward": 0.6383928656578064, "reward_std": 0.1849105805158615, "rewards/simpleverify_reward/mean": 0.6383928656578064, "rewards/simpleverify_reward/std": 0.4807341694831848, "step": 968 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3975.0, "completions/mean_length": 592.6796875, "completions/mean_terminated_length": 561.1182861328125, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 0.09043977390056525, "grad_norm": 0.13210172951221466, "learning_rate": 2e-07, "loss": 0.0436, "num_tokens": 621487023.0, "reward": 0.566964328289032, "reward_std": 0.21275407075881958, "rewards/simpleverify_reward/mean": 0.5669642686843872, "rewards/simpleverify_reward/std": 0.49577224254608154, "step": 969 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3901.0, "completions/mean_length": 641.372802734375, "completions/mean_terminated_length": 610.25, "completions/min_length": 89.0, "completions/min_terminated_length": 89.0, "epoch": 0.09053310700056583, "grad_norm": 0.11699752509593964, "learning_rate": 2e-07, "loss": 0.0034, "num_tokens": 622147789.0, "reward": 0.5636160969734192, "reward_std": 0.18431143462657928, "rewards/simpleverify_reward/mean": 0.5636160969734192, "rewards/simpleverify_reward/std": 0.49621346592903137, "step": 970 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2027.0, "completions/mean_length": 638.2421875, "completions/mean_terminated_length": 583.357177734375, "completions/min_length": 119.0, "completions/min_terminated_length": 119.0, "epoch": 0.09062644010056642, "grad_norm": 0.11425739526748657, "learning_rate": 2e-07, "loss": 0.0351, "num_tokens": 622814422.0, "reward": 0.5892857313156128, "reward_std": 0.18190033733844757, "rewards/simpleverify_reward/mean": 0.5892857313156128, "rewards/simpleverify_reward/std": 0.49223825335502625, "step": 971 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.006696428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3556.0, "completions/mean_length": 586.325927734375, "completions/mean_terminated_length": 562.6651611328125, "completions/min_length": 3.0, "completions/min_terminated_length": 3.0, "epoch": 0.090719773200567, "grad_norm": 0.13563169538974762, "learning_rate": 2e-07, "loss": 0.0117, "num_tokens": 623430658.0, "reward": 0.6160714626312256, "reward_std": 0.20974887907505035, "rewards/simpleverify_reward/mean": 0.6160714030265808, "rewards/simpleverify_reward/std": 0.4866124987602234, "step": 972 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2557.0, "completions/mean_length": 662.75, "completions/mean_terminated_length": 596.3504028320312, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 0.09081310630056758, "grad_norm": 0.10817781835794449, "learning_rate": 2e-07, "loss": 0.0031, "num_tokens": 624111098.0, "reward": 0.5546875, "reward_std": 0.18520157039165497, "rewards/simpleverify_reward/mean": 0.5546875, "rewards/simpleverify_reward/std": 0.4972778558731079, "step": 973 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3901.0, "completions/mean_length": 642.3214721679688, "completions/mean_terminated_length": 559.4331665039062, "completions/min_length": 105.0, "completions/min_terminated_length": 105.0, "epoch": 0.09090643940056817, "grad_norm": 0.13307307660579681, "learning_rate": 2e-07, "loss": 0.0509, "num_tokens": 624766458.0, "reward": 0.5859375, "reward_std": 0.2180490493774414, "rewards/simpleverify_reward/mean": 0.5859375, "rewards/simpleverify_reward/std": 0.4928344786167145, "step": 974 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3645.0, "completions/mean_length": 605.0100708007812, "completions/mean_terminated_length": 573.5596923828125, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 0.09099977250056875, "grad_norm": 0.11174090206623077, "learning_rate": 2e-07, "loss": 0.0519, "num_tokens": 625395211.0, "reward": 0.606026828289032, "reward_std": 0.17235712707042694, "rewards/simpleverify_reward/mean": 0.6060267686843872, "rewards/simpleverify_reward/std": 0.48890194296836853, "step": 975 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3024.0, "completions/mean_length": 669.9207763671875, "completions/mean_terminated_length": 615.53857421875, "completions/min_length": 167.0, "completions/min_terminated_length": 167.0, "epoch": 0.09109310560056934, "grad_norm": 0.11501194536685944, "learning_rate": 2e-07, "loss": 0.0314, "num_tokens": 626083276.0, "reward": 0.5379464626312256, "reward_std": 0.207633838057518, "rewards/simpleverify_reward/mean": 0.5379464030265808, "rewards/simpleverify_reward/std": 0.4988364577293396, "step": 976 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0267857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3696.0, "completions/mean_length": 684.5033569335938, "completions/mean_terminated_length": 590.6089477539062, "completions/min_length": 115.0, "completions/min_terminated_length": 115.0, "epoch": 0.09118643870056992, "grad_norm": 0.11666333675384521, "learning_rate": 2e-07, "loss": 0.0191, "num_tokens": 626799351.0, "reward": 0.5502232313156128, "reward_std": 0.21038012206554413, "rewards/simpleverify_reward/mean": 0.5502232313156128, "rewards/simpleverify_reward/std": 0.49774909019470215, "step": 977 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3440.0, "completions/mean_length": 582.0748291015625, "completions/mean_terminated_length": 546.4204711914062, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 0.0912797718005705, "grad_norm": 0.13796910643577576, "learning_rate": 2e-07, "loss": 0.0315, "num_tokens": 627401306.0, "reward": 0.6261160969734192, "reward_std": 0.18919382989406586, "rewards/simpleverify_reward/mean": 0.6261160969734192, "rewards/simpleverify_reward/std": 0.48410359025001526, "step": 978 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2637.0, "completions/mean_length": 635.1674194335938, "completions/mean_terminated_length": 560.1892700195312, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 0.09137310490057109, "grad_norm": 0.12401998788118362, "learning_rate": 2e-07, "loss": 0.0313, "num_tokens": 628066184.0, "reward": 0.5145089626312256, "reward_std": 0.2234584391117096, "rewards/simpleverify_reward/mean": 0.5145089030265808, "rewards/simpleverify_reward/std": 0.5000685453414917, "step": 979 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 4068.0, "completions/mean_length": 635.0636596679688, "completions/mean_terminated_length": 584.10986328125, "completions/min_length": 166.0, "completions/min_terminated_length": 166.0, "epoch": 0.09146643800057166, "grad_norm": 0.1190498098731041, "learning_rate": 2e-07, "loss": 0.0304, "num_tokens": 628723977.0, "reward": 0.5993303656578064, "reward_std": 0.1808534413576126, "rewards/simpleverify_reward/mean": 0.5993303656578064, "rewards/simpleverify_reward/std": 0.49030786752700806, "step": 980 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.006696428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3046.0, "completions/mean_length": 584.9710083007812, "completions/mean_terminated_length": 561.3011474609375, "completions/min_length": 158.0, "completions/min_terminated_length": 158.0, "epoch": 0.09155977110057224, "grad_norm": 0.11202730238437653, "learning_rate": 2e-07, "loss": 0.0096, "num_tokens": 629336431.0, "reward": 0.6395089626312256, "reward_std": 0.15991425514221191, "rewards/simpleverify_reward/mean": 0.6395089030265808, "rewards/simpleverify_reward/std": 0.4804111421108246, "step": 981 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3395.0, "completions/mean_length": 624.099365234375, "completions/mean_terminated_length": 588.8714599609375, "completions/min_length": 113.0, "completions/min_terminated_length": 113.0, "epoch": 0.09165310420057284, "grad_norm": 0.12144503742456436, "learning_rate": 2e-07, "loss": 0.0373, "num_tokens": 629974920.0, "reward": 0.6015625, "reward_std": 0.20575843751430511, "rewards/simpleverify_reward/mean": 0.6015625, "rewards/simpleverify_reward/std": 0.48984986543655396, "step": 982 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2793.0, "completions/mean_length": 658.5067138671875, "completions/mean_terminated_length": 603.9432983398438, "completions/min_length": 153.0, "completions/min_terminated_length": 153.0, "epoch": 0.09174643730057341, "grad_norm": 0.12984918057918549, "learning_rate": 2e-07, "loss": 0.0353, "num_tokens": 630656382.0, "reward": 0.5625, "reward_std": 0.24419118463993073, "rewards/simpleverify_reward/mean": 0.5625, "rewards/simpleverify_reward/std": 0.49635544419288635, "step": 983 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 1992.0, "completions/mean_length": 601.3873291015625, "completions/mean_terminated_length": 569.904296875, "completions/min_length": 109.0, "completions/min_terminated_length": 109.0, "epoch": 0.09183977040057399, "grad_norm": 0.1422320455312729, "learning_rate": 2e-07, "loss": 0.0458, "num_tokens": 631274697.0, "reward": 0.5758928656578064, "reward_std": 0.2361973524093628, "rewards/simpleverify_reward/mean": 0.5758928656578064, "rewards/simpleverify_reward/std": 0.49448272585868835, "step": 984 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.005580357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3443.0, "completions/mean_length": 540.1886596679688, "completions/mean_terminated_length": 520.234619140625, "completions/min_length": 116.0, "completions/min_terminated_length": 116.0, "epoch": 0.09193310350057458, "grad_norm": 0.13113300502300262, "learning_rate": 2e-07, "loss": 0.0263, "num_tokens": 631854362.0, "reward": 0.5926339626312256, "reward_std": 0.18832439184188843, "rewards/simpleverify_reward/mean": 0.5926339030265808, "rewards/simpleverify_reward/std": 0.49161845445632935, "step": 985 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2928.0, "completions/mean_length": 624.1585083007812, "completions/mean_terminated_length": 569.0498657226562, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 0.09202643660057516, "grad_norm": 0.11992934346199036, "learning_rate": 2e-07, "loss": 0.0284, "num_tokens": 632491760.0, "reward": 0.5970982313156128, "reward_std": 0.19625434279441833, "rewards/simpleverify_reward/mean": 0.5970982313156128, "rewards/simpleverify_reward/std": 0.4907552897930145, "step": 986 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 1909.0, "completions/mean_length": 624.8873291015625, "completions/mean_terminated_length": 577.7681274414062, "completions/min_length": 117.0, "completions/min_terminated_length": 117.0, "epoch": 0.09211976970057575, "grad_norm": 0.12080389261245728, "learning_rate": 2e-07, "loss": 0.0343, "num_tokens": 633141243.0, "reward": 0.5234375, "reward_std": 0.20557549595832825, "rewards/simpleverify_reward/mean": 0.5234375, "rewards/simpleverify_reward/std": 0.49972933530807495, "step": 987 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 4035.0, "completions/mean_length": 619.9642944335938, "completions/mean_terminated_length": 544.6567993164062, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 0.09221310280057633, "grad_norm": 0.11142529547214508, "learning_rate": 2e-07, "loss": 0.0317, "num_tokens": 633783235.0, "reward": 0.5948660969734192, "reward_std": 0.176078662276268, "rewards/simpleverify_reward/mean": 0.5948660969734192, "rewards/simpleverify_reward/std": 0.49119213223457336, "step": 988 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3060.0, "completions/mean_length": 631.1283569335938, "completions/mean_terminated_length": 595.9718017578125, "completions/min_length": 104.0, "completions/min_terminated_length": 104.0, "epoch": 0.09230643590057691, "grad_norm": 0.11607910692691803, "learning_rate": 2e-07, "loss": 0.0105, "num_tokens": 634438694.0, "reward": 0.5959821939468384, "reward_std": 0.19253379106521606, "rewards/simpleverify_reward/mean": 0.5959821343421936, "rewards/simpleverify_reward/std": 0.490975022315979, "step": 989 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.005580357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 4072.0, "completions/mean_length": 591.5067138671875, "completions/mean_terminated_length": 571.8406372070312, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 0.0923997690005775, "grad_norm": 0.11083544790744781, "learning_rate": 2e-07, "loss": 0.015, "num_tokens": 635048124.0, "reward": 0.6473214626312256, "reward_std": 0.13876360654830933, "rewards/simpleverify_reward/mean": 0.6473214030265808, "rewards/simpleverify_reward/std": 0.47807058691978455, "step": 990 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3703.0, "completions/mean_length": 628.6964721679688, "completions/mean_terminated_length": 577.64892578125, "completions/min_length": 81.0, "completions/min_terminated_length": 81.0, "epoch": 0.09249310210057808, "grad_norm": 0.11890092492103577, "learning_rate": 2e-07, "loss": 0.0376, "num_tokens": 635708748.0, "reward": 0.5535714626312256, "reward_std": 0.19271895289421082, "rewards/simpleverify_reward/mean": 0.5535714030265808, "rewards/simpleverify_reward/std": 0.4973994791507721, "step": 991 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3920.0, "completions/mean_length": 638.0335083007812, "completions/mean_terminated_length": 555.0422973632812, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 0.09258643520057866, "grad_norm": 0.11926812678575516, "learning_rate": 2e-07, "loss": 0.0331, "num_tokens": 636371154.0, "reward": 0.5558035969734192, "reward_std": 0.19249282777309418, "rewards/simpleverify_reward/mean": 0.5558035969734192, "rewards/simpleverify_reward/std": 0.49715372920036316, "step": 992 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3164.0, "completions/mean_length": 685.1217041015625, "completions/mean_terminated_length": 603.2605590820312, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 0.09267976830057925, "grad_norm": 0.12485386431217194, "learning_rate": 2e-07, "loss": 0.0528, "num_tokens": 637061719.0, "reward": 0.5736607313156128, "reward_std": 0.23570877313613892, "rewards/simpleverify_reward/mean": 0.5736607313156128, "rewards/simpleverify_reward/std": 0.4948205351829529, "step": 993 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3705.0, "completions/mean_length": 665.375, "completions/mean_terminated_length": 614.8674926757812, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 0.09277310140057983, "grad_norm": 0.10929486155509949, "learning_rate": 2e-07, "loss": 0.0323, "num_tokens": 637752159.0, "reward": 0.515625, "reward_std": 0.16995109617710114, "rewards/simpleverify_reward/mean": 0.515625, "rewards/simpleverify_reward/std": 0.5000349283218384, "step": 994 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.006696428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2647.0, "completions/mean_length": 569.9598388671875, "completions/mean_terminated_length": 546.1887817382812, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 0.09286643450058042, "grad_norm": 0.12271277606487274, "learning_rate": 2e-07, "loss": 0.0199, "num_tokens": 638342963.0, "reward": 0.6674107313156128, "reward_std": 0.16904954612255096, "rewards/simpleverify_reward/mean": 0.6674107313156128, "rewards/simpleverify_reward/std": 0.47140392661094666, "step": 995 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3931.0, "completions/mean_length": 662.53125, "completions/mean_terminated_length": 600.1045532226562, "completions/min_length": 162.0, "completions/min_terminated_length": 162.0, "epoch": 0.092959767600581, "grad_norm": 0.12525179982185364, "learning_rate": 2e-07, "loss": 0.0427, "num_tokens": 639026279.0, "reward": 0.5502232313156128, "reward_std": 0.20745022594928741, "rewards/simpleverify_reward/mean": 0.5502232313156128, "rewards/simpleverify_reward/std": 0.49774909019470215, "step": 996 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3299.0, "completions/mean_length": 697.0535888671875, "completions/mean_terminated_length": 627.371337890625, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 0.09305310070058158, "grad_norm": 0.104027159512043, "learning_rate": 2e-07, "loss": 0.012, "num_tokens": 639740103.0, "reward": 0.5290178656578064, "reward_std": 0.17115317285060883, "rewards/simpleverify_reward/mean": 0.5290178656578064, "rewards/simpleverify_reward/std": 0.49943602085113525, "step": 997 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.006696428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2703.0, "completions/mean_length": 595.5279541015625, "completions/mean_terminated_length": 571.92919921875, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 0.09314643380058217, "grad_norm": 0.11907639354467392, "learning_rate": 2e-07, "loss": 0.0299, "num_tokens": 640355384.0, "reward": 0.6439732313156128, "reward_std": 0.17841096222400665, "rewards/simpleverify_reward/mean": 0.6439732313156128, "rewards/simpleverify_reward/std": 0.47909072041511536, "step": 998 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2464.0, "completions/mean_length": 622.5324096679688, "completions/mean_terminated_length": 567.39794921875, "completions/min_length": 110.0, "completions/min_terminated_length": 110.0, "epoch": 0.09323976690058275, "grad_norm": 0.12360893934965134, "learning_rate": 2e-07, "loss": 0.0305, "num_tokens": 641003317.0, "reward": 0.5970982313156128, "reward_std": 0.20249111950397491, "rewards/simpleverify_reward/mean": 0.5970982313156128, "rewards/simpleverify_reward/std": 0.49075525999069214, "step": 999 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3392.0, "completions/mean_length": 611.4006958007812, "completions/mean_terminated_length": 568.0892944335938, "completions/min_length": 161.0, "completions/min_terminated_length": 161.0, "epoch": 0.09333310000058333, "grad_norm": 0.1330820471048355, "learning_rate": 2e-07, "loss": 0.027, "num_tokens": 641644268.0, "reward": 0.5792410969734192, "reward_std": 0.21342626214027405, "rewards/simpleverify_reward/mean": 0.5792410969734192, "rewards/simpleverify_reward/std": 0.49395665526390076, "step": 1000 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3017.0, "completions/mean_length": 621.3381958007812, "completions/mean_terminated_length": 574.1708374023438, "completions/min_length": 168.0, "completions/min_terminated_length": 168.0, "epoch": 0.09342643310058392, "grad_norm": 0.1286652386188507, "learning_rate": 2e-07, "loss": 0.0295, "num_tokens": 642286187.0, "reward": 0.5703125, "reward_std": 0.2255645990371704, "rewards/simpleverify_reward/mean": 0.5703125, "rewards/simpleverify_reward/std": 0.49530795216560364, "step": 1001 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3764.0, "completions/mean_length": 634.607177734375, "completions/mean_terminated_length": 567.6632080078125, "completions/min_length": 5.0, "completions/min_terminated_length": 5.0, "epoch": 0.0935197662005845, "grad_norm": 0.12191765755414963, "learning_rate": 2e-07, "loss": 0.053, "num_tokens": 642937467.0, "reward": 0.606026828289032, "reward_std": 0.19805601239204407, "rewards/simpleverify_reward/mean": 0.6060267686843872, "rewards/simpleverify_reward/std": 0.48890194296836853, "step": 1002 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3032.0, "completions/mean_length": 565.5580444335938, "completions/mean_terminated_length": 533.7522583007812, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 0.09361309930058508, "grad_norm": 0.13477444648742676, "learning_rate": 2e-07, "loss": 0.031, "num_tokens": 643523903.0, "reward": 0.6049107313156128, "reward_std": 0.20455309748649597, "rewards/simpleverify_reward/mean": 0.6049107313156128, "rewards/simpleverify_reward/std": 0.48914292454719543, "step": 1003 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2684.0, "completions/mean_length": 663.4319458007812, "completions/mean_terminated_length": 597.0454711914062, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 0.09370643240058567, "grad_norm": 0.11921285837888718, "learning_rate": 2e-07, "loss": 0.0191, "num_tokens": 644208090.0, "reward": 0.578125, "reward_std": 0.18198207020759583, "rewards/simpleverify_reward/mean": 0.578125, "rewards/simpleverify_reward/std": 0.4941346049308777, "step": 1004 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3330.0, "completions/mean_length": 604.3236694335938, "completions/mean_terminated_length": 552.9172973632812, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 0.09379976550058625, "grad_norm": 0.11702471226453781, "learning_rate": 2e-07, "loss": 0.0263, "num_tokens": 644840820.0, "reward": 0.6305803656578064, "reward_std": 0.1821657121181488, "rewards/simpleverify_reward/mean": 0.6305803656578064, "rewards/simpleverify_reward/std": 0.4829172194004059, "step": 1005 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3973.0, "completions/mean_length": 625.7299194335938, "completions/mean_terminated_length": 570.646240234375, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 0.09389309860058684, "grad_norm": 0.13250352442264557, "learning_rate": 2e-07, "loss": 0.0285, "num_tokens": 645482906.0, "reward": 0.6015625, "reward_std": 0.22315673530101776, "rewards/simpleverify_reward/mean": 0.6015625, "rewards/simpleverify_reward/std": 0.48984986543655396, "step": 1006 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3911.0, "completions/mean_length": 643.203125, "completions/mean_terminated_length": 592.3692016601562, "completions/min_length": 166.0, "completions/min_terminated_length": 166.0, "epoch": 0.09398643170058742, "grad_norm": 0.11396531015634537, "learning_rate": 2e-07, "loss": 0.0199, "num_tokens": 646156024.0, "reward": 0.5412946939468384, "reward_std": 0.18028847873210907, "rewards/simpleverify_reward/mean": 0.5412946343421936, "rewards/simpleverify_reward/std": 0.49857014417648315, "step": 1007 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.025669642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3634.0, "completions/mean_length": 683.5022583007812, "completions/mean_terminated_length": 593.5968017578125, "completions/min_length": 163.0, "completions/min_terminated_length": 163.0, "epoch": 0.094079764800588, "grad_norm": 0.10580184310674667, "learning_rate": 2e-07, "loss": 0.0151, "num_tokens": 646860690.0, "reward": 0.5290178656578064, "reward_std": 0.18265105783939362, "rewards/simpleverify_reward/mean": 0.5290178656578064, "rewards/simpleverify_reward/std": 0.49943605065345764, "step": 1008 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.005580357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3812.0, "completions/mean_length": 586.1205444335938, "completions/mean_terminated_length": 566.4242553710938, "completions/min_length": 83.0, "completions/min_terminated_length": 83.0, "epoch": 0.09417309790058859, "grad_norm": 0.127450630068779, "learning_rate": 2e-07, "loss": 0.0135, "num_tokens": 647482238.0, "reward": 0.5948660969734192, "reward_std": 0.2029040902853012, "rewards/simpleverify_reward/mean": 0.5948660969734192, "rewards/simpleverify_reward/std": 0.49119213223457336, "step": 1009 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3386.0, "completions/mean_length": 603.9330444335938, "completions/mean_terminated_length": 564.5192260742188, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 0.09426643100058917, "grad_norm": 0.1367330402135849, "learning_rate": 2e-07, "loss": 0.0184, "num_tokens": 648113234.0, "reward": 0.609375, "reward_std": 0.22424304485321045, "rewards/simpleverify_reward/mean": 0.609375, "rewards/simpleverify_reward/std": 0.48816296458244324, "step": 1010 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2879.0, "completions/mean_length": 618.6517944335938, "completions/mean_terminated_length": 563.455810546875, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 0.09435976410058974, "grad_norm": 0.13595306873321533, "learning_rate": 2e-07, "loss": 0.0212, "num_tokens": 648757058.0, "reward": 0.629464328289032, "reward_std": 0.22206905484199524, "rewards/simpleverify_reward/mean": 0.6294642686843872, "rewards/simpleverify_reward/std": 0.4832179844379425, "step": 1011 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2513.0, "completions/mean_length": 549.5703125, "completions/mean_terminated_length": 505.49041748046875, "completions/min_length": 120.0, "completions/min_terminated_length": 120.0, "epoch": 0.09445309720059034, "grad_norm": 0.123261958360672, "learning_rate": 2e-07, "loss": 0.0214, "num_tokens": 649339001.0, "reward": 0.6506696939468384, "reward_std": 0.15236690640449524, "rewards/simpleverify_reward/mean": 0.6506696343421936, "rewards/simpleverify_reward/std": 0.47702476382255554, "step": 1012 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3082.0, "completions/mean_length": 605.7511596679688, "completions/mean_terminated_length": 546.3258056640625, "completions/min_length": 111.0, "completions/min_terminated_length": 111.0, "epoch": 0.09454643030059091, "grad_norm": 0.12355910986661911, "learning_rate": 2e-07, "loss": 0.0232, "num_tokens": 649978906.0, "reward": 0.5602678656578064, "reward_std": 0.1699945628643036, "rewards/simpleverify_reward/mean": 0.5602678656578064, "rewards/simpleverify_reward/std": 0.4966317415237427, "step": 1013 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3984.0, "completions/mean_length": 620.5625, "completions/mean_terminated_length": 581.3363647460938, "completions/min_length": 165.0, "completions/min_terminated_length": 165.0, "epoch": 0.09463976340059149, "grad_norm": 0.11466887593269348, "learning_rate": 2e-07, "loss": 0.02, "num_tokens": 650618810.0, "reward": 0.5736607313156128, "reward_std": 0.1731494516134262, "rewards/simpleverify_reward/mean": 0.5736607313156128, "rewards/simpleverify_reward/std": 0.4948205351829529, "step": 1014 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3759.0, "completions/mean_length": 658.8203125, "completions/mean_terminated_length": 592.3446655273438, "completions/min_length": 174.0, "completions/min_terminated_length": 174.0, "epoch": 0.09473309650059208, "grad_norm": 0.11998791247606277, "learning_rate": 2e-07, "loss": 0.0328, "num_tokens": 651301657.0, "reward": 0.5412946939468384, "reward_std": 0.20632115006446838, "rewards/simpleverify_reward/mean": 0.5412946343421936, "rewards/simpleverify_reward/std": 0.49857014417648315, "step": 1015 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3689.0, "completions/mean_length": 640.654052734375, "completions/mean_terminated_length": 573.8270874023438, "completions/min_length": 108.0, "completions/min_terminated_length": 108.0, "epoch": 0.09482642960059266, "grad_norm": 0.12132739275693893, "learning_rate": 2e-07, "loss": 0.052, "num_tokens": 651962819.0, "reward": 0.5959821939468384, "reward_std": 0.19107064604759216, "rewards/simpleverify_reward/mean": 0.5959821343421936, "rewards/simpleverify_reward/std": 0.490975022315979, "step": 1016 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.006696428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3808.0, "completions/mean_length": 595.2154541015625, "completions/mean_terminated_length": 571.6146240234375, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 0.09491976270059325, "grad_norm": 0.1227186992764473, "learning_rate": 2e-07, "loss": 0.0212, "num_tokens": 652593780.0, "reward": 0.5892857313156128, "reward_std": 0.18306542932987213, "rewards/simpleverify_reward/mean": 0.5892857313156128, "rewards/simpleverify_reward/std": 0.49223825335502625, "step": 1017 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2466.0, "completions/mean_length": 617.6082763671875, "completions/mean_terminated_length": 574.3740234375, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 0.09501309580059383, "grad_norm": 0.12965670228004456, "learning_rate": 2e-07, "loss": 0.0242, "num_tokens": 653238829.0, "reward": 0.566964328289032, "reward_std": 0.20369867980480194, "rewards/simpleverify_reward/mean": 0.5669642686843872, "rewards/simpleverify_reward/std": 0.49577224254608154, "step": 1018 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 1998.0, "completions/mean_length": 549.0614013671875, "completions/mean_terminated_length": 509.0282287597656, "completions/min_length": 95.0, "completions/min_terminated_length": 95.0, "epoch": 0.09510642890059441, "grad_norm": 0.11110513657331467, "learning_rate": 2e-07, "loss": 0.0147, "num_tokens": 653820844.0, "reward": 0.6238839626312256, "reward_std": 0.11524433642625809, "rewards/simpleverify_reward/mean": 0.6238839030265808, "rewards/simpleverify_reward/std": 0.48468026518821716, "step": 1019 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 2782.0, "completions/mean_length": 579.3203125, "completions/mean_terminated_length": 551.6299438476562, "completions/min_length": 137.0, "completions/min_terminated_length": 137.0, "epoch": 0.095199762000595, "grad_norm": 0.13469479978084564, "learning_rate": 2e-07, "loss": 0.0121, "num_tokens": 654423019.0, "reward": 0.6082589626312256, "reward_std": 0.19291538000106812, "rewards/simpleverify_reward/mean": 0.6082589030265808, "rewards/simpleverify_reward/std": 0.48841196298599243, "step": 1020 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3890.0, "completions/mean_length": 594.6328125, "completions/mean_terminated_length": 547.1029663085938, "completions/min_length": 80.0, "completions/min_terminated_length": 80.0, "epoch": 0.09529309510059558, "grad_norm": 0.12973082065582275, "learning_rate": 2e-07, "loss": 0.0202, "num_tokens": 655040018.0, "reward": 0.629464328289032, "reward_std": 0.18277230858802795, "rewards/simpleverify_reward/mean": 0.6294642686843872, "rewards/simpleverify_reward/std": 0.4832179844379425, "step": 1021 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 4013.0, "completions/mean_length": 594.2667846679688, "completions/mean_terminated_length": 562.7196044921875, "completions/min_length": 157.0, "completions/min_terminated_length": 157.0, "epoch": 0.09538642820059616, "grad_norm": 0.13710083067417145, "learning_rate": 2e-07, "loss": 0.03, "num_tokens": 655661401.0, "reward": 0.559151828289032, "reward_std": 0.217029869556427, "rewards/simpleverify_reward/mean": 0.5591517686843872, "rewards/simpleverify_reward/std": 0.496766060590744, "step": 1022 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2775.0, "completions/mean_length": 623.0457763671875, "completions/mean_terminated_length": 563.9149169921875, "completions/min_length": 165.0, "completions/min_terminated_length": 165.0, "epoch": 0.09547976130059675, "grad_norm": 0.10842672735452652, "learning_rate": 2e-07, "loss": 0.0328, "num_tokens": 656309714.0, "reward": 0.5502232313156128, "reward_std": 0.18437673151493073, "rewards/simpleverify_reward/mean": 0.5502232313156128, "rewards/simpleverify_reward/std": 0.49774909019470215, "step": 1023 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2191.0, "completions/mean_length": 570.1339721679688, "completions/mean_terminated_length": 538.369384765625, "completions/min_length": 117.0, "completions/min_terminated_length": 117.0, "epoch": 0.09557309440059733, "grad_norm": 0.12191404402256012, "learning_rate": 2e-07, "loss": 0.0198, "num_tokens": 656904842.0, "reward": 0.6830357313156128, "reward_std": 0.16886408627033234, "rewards/simpleverify_reward/mean": 0.6830357313156128, "rewards/simpleverify_reward/std": 0.46555325388908386, "step": 1024 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.024553571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3358.0, "completions/mean_length": 720.4810791015625, "completions/mean_terminated_length": 635.513671875, "completions/min_length": 158.0, "completions/min_terminated_length": 158.0, "epoch": 0.09566642750059791, "grad_norm": 0.11775187402963638, "learning_rate": 2e-07, "loss": 0.0341, "num_tokens": 657635001.0, "reward": 0.5111607313156128, "reward_std": 0.20354531705379486, "rewards/simpleverify_reward/mean": 0.5111607313156128, "rewards/simpleverify_reward/std": 0.5001546144485474, "step": 1025 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3934.0, "completions/mean_length": 573.0123291015625, "completions/mean_terminated_length": 537.2660522460938, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 0.0957597606005985, "grad_norm": 0.12729689478874207, "learning_rate": 2e-07, "loss": 0.0117, "num_tokens": 658235692.0, "reward": 0.6104910969734192, "reward_std": 0.19152529537677765, "rewards/simpleverify_reward/mean": 0.6104910969734192, "rewards/simpleverify_reward/std": 0.48791128396987915, "step": 1026 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4041.0, "completions/mean_length": 656.7645263671875, "completions/mean_terminated_length": 590.2491455078125, "completions/min_length": 165.0, "completions/min_terminated_length": 165.0, "epoch": 0.09585309370059908, "grad_norm": 0.111874520778656, "learning_rate": 2e-07, "loss": 0.015, "num_tokens": 658915609.0, "reward": 0.5636160969734192, "reward_std": 0.1692357212305069, "rewards/simpleverify_reward/mean": 0.5636160969734192, "rewards/simpleverify_reward/std": 0.49621346592903137, "step": 1027 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2213.0, "completions/mean_length": 681.591552734375, "completions/mean_terminated_length": 619.5113525390625, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 0.09594642680059967, "grad_norm": 0.1226932629942894, "learning_rate": 2e-07, "loss": 0.038, "num_tokens": 659624851.0, "reward": 0.5301339626312256, "reward_std": 0.20369574427604675, "rewards/simpleverify_reward/mean": 0.5301339030265808, "rewards/simpleverify_reward/std": 0.49936988949775696, "step": 1028 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 4013.0, "completions/mean_length": 631.8917846679688, "completions/mean_terminated_length": 552.802490234375, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 0.09603975990060025, "grad_norm": 0.12296514958143234, "learning_rate": 2e-07, "loss": 0.0468, "num_tokens": 660286338.0, "reward": 0.6305803656578064, "reward_std": 0.1835186630487442, "rewards/simpleverify_reward/mean": 0.6305803656578064, "rewards/simpleverify_reward/std": 0.4829172194004059, "step": 1029 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2075.0, "completions/mean_length": 643.9732666015625, "completions/mean_terminated_length": 608.9469604492188, "completions/min_length": 160.0, "completions/min_terminated_length": 160.0, "epoch": 0.09613309300060083, "grad_norm": 0.11046497523784637, "learning_rate": 2e-07, "loss": 0.0338, "num_tokens": 660959210.0, "reward": 0.546875, "reward_std": 0.19756704568862915, "rewards/simpleverify_reward/mean": 0.546875, "rewards/simpleverify_reward/std": 0.4980759024620056, "step": 1030 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3403.0, "completions/mean_length": 597.075927734375, "completions/mean_terminated_length": 541.5374145507812, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 0.09622642610060142, "grad_norm": 0.126900777220726, "learning_rate": 2e-07, "loss": 0.0358, "num_tokens": 661586222.0, "reward": 0.6261160969734192, "reward_std": 0.20140664279460907, "rewards/simpleverify_reward/mean": 0.6261160969734192, "rewards/simpleverify_reward/std": 0.48410359025001526, "step": 1031 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 1645.0, "completions/mean_length": 635.114990234375, "completions/mean_terminated_length": 568.1808471679688, "completions/min_length": 162.0, "completions/min_terminated_length": 162.0, "epoch": 0.096319759200602, "grad_norm": 0.11477286368608475, "learning_rate": 2e-07, "loss": 0.0145, "num_tokens": 662238821.0, "reward": 0.590401828289032, "reward_std": 0.17446330189704895, "rewards/simpleverify_reward/mean": 0.5904017686843872, "rewards/simpleverify_reward/std": 0.49203425645828247, "step": 1032 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3641.0, "completions/mean_length": 657.0011596679688, "completions/mean_terminated_length": 610.31787109375, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 0.09641309230060258, "grad_norm": 0.1305626779794693, "learning_rate": 2e-07, "loss": 0.0305, "num_tokens": 662926750.0, "reward": 0.4720982313156128, "reward_std": 0.2193303406238556, "rewards/simpleverify_reward/mean": 0.4720982015132904, "rewards/simpleverify_reward/std": 0.49949970841407776, "step": 1033 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3784.0, "completions/mean_length": 588.4096069335938, "completions/mean_terminated_length": 544.8124389648438, "completions/min_length": 120.0, "completions/min_terminated_length": 120.0, "epoch": 0.09650642540060317, "grad_norm": 0.14699125289916992, "learning_rate": 2e-07, "loss": 0.0399, "num_tokens": 663542645.0, "reward": 0.578125, "reward_std": 0.23169443011283875, "rewards/simpleverify_reward/mean": 0.578125, "rewards/simpleverify_reward/std": 0.4941346049308777, "step": 1034 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3083.0, "completions/mean_length": 658.1942138671875, "completions/mean_terminated_length": 599.6618041992188, "completions/min_length": 113.0, "completions/min_terminated_length": 113.0, "epoch": 0.09659975850060375, "grad_norm": 0.11970791965723038, "learning_rate": 2e-07, "loss": 0.0202, "num_tokens": 664234483.0, "reward": 0.5066964626312256, "reward_std": 0.1930234581232071, "rewards/simpleverify_reward/mean": 0.5066964030265808, "rewards/simpleverify_reward/std": 0.5002344250679016, "step": 1035 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3418.0, "completions/mean_length": 589.5234375, "completions/mean_terminated_length": 561.9133911132812, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 0.09669309160060433, "grad_norm": 0.1229545846581459, "learning_rate": 2e-07, "loss": 0.0224, "num_tokens": 664846064.0, "reward": 0.6439732313156128, "reward_std": 0.19144609570503235, "rewards/simpleverify_reward/mean": 0.6439732313156128, "rewards/simpleverify_reward/std": 0.47909072041511536, "step": 1036 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3857.0, "completions/mean_length": 650.833740234375, "completions/mean_terminated_length": 584.20361328125, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 0.09678642470060492, "grad_norm": 0.12406642735004425, "learning_rate": 2e-07, "loss": 0.0326, "num_tokens": 665533419.0, "reward": 0.5580357313156128, "reward_std": 0.19938038289546967, "rewards/simpleverify_reward/mean": 0.5580357313156128, "rewards/simpleverify_reward/std": 0.49689781665802, "step": 1037 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 4067.0, "completions/mean_length": 630.28125, "completions/mean_terminated_length": 575.2698364257812, "completions/min_length": 119.0, "completions/min_terminated_length": 119.0, "epoch": 0.0968797578006055, "grad_norm": 0.12684266269207, "learning_rate": 2e-07, "loss": 0.0372, "num_tokens": 666181791.0, "reward": 0.5558035969734192, "reward_std": 0.18535451591014862, "rewards/simpleverify_reward/mean": 0.5558035969734192, "rewards/simpleverify_reward/std": 0.49715369939804077, "step": 1038 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.005580357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3430.0, "completions/mean_length": 580.1373291015625, "completions/mean_terminated_length": 560.4074096679688, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 0.09697309090060609, "grad_norm": 0.12466080486774445, "learning_rate": 2e-07, "loss": 0.0251, "num_tokens": 666794522.0, "reward": 0.6149553656578064, "reward_std": 0.2039910852909088, "rewards/simpleverify_reward/mean": 0.6149553656578064, "rewards/simpleverify_reward/std": 0.4868776500225067, "step": 1039 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3023.0, "completions/mean_length": 609.484375, "completions/mean_terminated_length": 574.1082153320312, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 0.09706642400060667, "grad_norm": 0.12823428213596344, "learning_rate": 2e-07, "loss": 0.0285, "num_tokens": 667425212.0, "reward": 0.5770089626312256, "reward_std": 0.19215582311153412, "rewards/simpleverify_reward/mean": 0.5770089030265808, "rewards/simpleverify_reward/std": 0.4943099319934845, "step": 1040 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2983.0, "completions/mean_length": 580.65625, "completions/mean_terminated_length": 536.9627075195312, "completions/min_length": 84.0, "completions/min_terminated_length": 84.0, "epoch": 0.09715975710060724, "grad_norm": 0.11967428028583527, "learning_rate": 2e-07, "loss": 0.0224, "num_tokens": 668038792.0, "reward": 0.5926339626312256, "reward_std": 0.17182762920856476, "rewards/simpleverify_reward/mean": 0.5926339030265808, "rewards/simpleverify_reward/std": 0.49161845445632935, "step": 1041 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2994.0, "completions/mean_length": 703.6563110351562, "completions/mean_terminated_length": 634.109375, "completions/min_length": 25.0, "completions/min_terminated_length": 25.0, "epoch": 0.09725309020060784, "grad_norm": 0.11447098106145859, "learning_rate": 2e-07, "loss": 0.0381, "num_tokens": 668752828.0, "reward": 0.5546875, "reward_std": 0.21045678853988647, "rewards/simpleverify_reward/mean": 0.5546875, "rewards/simpleverify_reward/std": 0.4972778558731079, "step": 1042 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3386.0, "completions/mean_length": 612.1261596679688, "completions/mean_terminated_length": 548.782958984375, "completions/min_length": 133.0, "completions/min_terminated_length": 133.0, "epoch": 0.09734642330060841, "grad_norm": 0.13487251102924347, "learning_rate": 2e-07, "loss": 0.0323, "num_tokens": 669384941.0, "reward": 0.5256696939468384, "reward_std": 0.22507745027542114, "rewards/simpleverify_reward/mean": 0.5256696343421936, "rewards/simpleverify_reward/std": 0.4996195435523987, "step": 1043 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3843.0, "completions/mean_length": 683.1730346679688, "completions/mean_terminated_length": 636.8450317382812, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 0.09743975640060899, "grad_norm": 0.11303900927305222, "learning_rate": 2e-07, "loss": 0.0196, "num_tokens": 670096904.0, "reward": 0.5412946939468384, "reward_std": 0.1706678569316864, "rewards/simpleverify_reward/mean": 0.5412946343421936, "rewards/simpleverify_reward/std": 0.49857014417648315, "step": 1044 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2382.0, "completions/mean_length": 639.9866333007812, "completions/mean_terminated_length": 589.1052856445312, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 0.09753308950060958, "grad_norm": 0.13146674633026123, "learning_rate": 2e-07, "loss": 0.0164, "num_tokens": 670770228.0, "reward": 0.4921875298023224, "reward_std": 0.21252721548080444, "rewards/simpleverify_reward/mean": 0.4921875, "rewards/simpleverify_reward/std": 0.5002182126045227, "step": 1045 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2548.0, "completions/mean_length": 592.1674194335938, "completions/mean_terminated_length": 540.5820922851562, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 0.09762642260061016, "grad_norm": 0.11787738651037216, "learning_rate": 2e-07, "loss": 0.0229, "num_tokens": 671385706.0, "reward": 0.6049107313156128, "reward_std": 0.17269554734230042, "rewards/simpleverify_reward/mean": 0.6049107313156128, "rewards/simpleverify_reward/std": 0.48914292454719543, "step": 1046 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3901.0, "completions/mean_length": 658.0167846679688, "completions/mean_terminated_length": 595.5079345703125, "completions/min_length": 94.0, "completions/min_terminated_length": 94.0, "epoch": 0.09771975570061076, "grad_norm": 0.11706984788179398, "learning_rate": 2e-07, "loss": 0.0186, "num_tokens": 672067577.0, "reward": 0.5636160969734192, "reward_std": 0.18505074083805084, "rewards/simpleverify_reward/mean": 0.5636160969734192, "rewards/simpleverify_reward/std": 0.49621346592903137, "step": 1047 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3162.0, "completions/mean_length": 627.6160888671875, "completions/mean_terminated_length": 572.5623779296875, "completions/min_length": 121.0, "completions/min_terminated_length": 121.0, "epoch": 0.09781308880061133, "grad_norm": 0.12439338862895966, "learning_rate": 2e-07, "loss": 0.0338, "num_tokens": 672727049.0, "reward": 0.590401828289032, "reward_std": 0.19343462586402893, "rewards/simpleverify_reward/mean": 0.5904017686843872, "rewards/simpleverify_reward/std": 0.49203425645828247, "step": 1048 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 4091.0, "completions/mean_length": 589.427490234375, "completions/mean_terminated_length": 557.8367309570312, "completions/min_length": 112.0, "completions/min_terminated_length": 112.0, "epoch": 0.09790642190061191, "grad_norm": 0.13321353495121002, "learning_rate": 2e-07, "loss": 0.0414, "num_tokens": 673338056.0, "reward": 0.6160714626312256, "reward_std": 0.2113219052553177, "rewards/simpleverify_reward/mean": 0.6160714030265808, "rewards/simpleverify_reward/std": 0.486612468957901, "step": 1049 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.006696428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3608.0, "completions/mean_length": 607.7991333007812, "completions/mean_terminated_length": 584.2831420898438, "completions/min_length": 161.0, "completions/min_terminated_length": 161.0, "epoch": 0.0979997550006125, "grad_norm": 0.13128618896007538, "learning_rate": 2e-07, "loss": 0.0259, "num_tokens": 673981972.0, "reward": 0.598214328289032, "reward_std": 0.2223721295595169, "rewards/simpleverify_reward/mean": 0.5982142686843872, "rewards/simpleverify_reward/std": 0.49053290486335754, "step": 1050 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3942.0, "completions/mean_length": 662.9107666015625, "completions/mean_terminated_length": 612.366943359375, "completions/min_length": 153.0, "completions/min_terminated_length": 153.0, "epoch": 0.09809308810061308, "grad_norm": 0.12008168548345566, "learning_rate": 2e-07, "loss": 0.0286, "num_tokens": 674659316.0, "reward": 0.6082589626312256, "reward_std": 0.17438800632953644, "rewards/simpleverify_reward/mean": 0.6082589030265808, "rewards/simpleverify_reward/std": 0.4884119927883148, "step": 1051 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2138.0, "completions/mean_length": 599.864990234375, "completions/mean_terminated_length": 540.3394165039062, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 0.09818642120061366, "grad_norm": 0.13248708844184875, "learning_rate": 2e-07, "loss": 0.0116, "num_tokens": 675285635.0, "reward": 0.5479910969734192, "reward_std": 0.20595136284828186, "rewards/simpleverify_reward/mean": 0.5479910969734192, "rewards/simpleverify_reward/std": 0.49796950817108154, "step": 1052 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.006696428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3668.0, "completions/mean_length": 608.7533569335938, "completions/mean_terminated_length": 585.2438354492188, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 0.09827975430061425, "grad_norm": 0.1207674890756607, "learning_rate": 2e-07, "loss": 0.0307, "num_tokens": 675920630.0, "reward": 0.609375, "reward_std": 0.18836717307567596, "rewards/simpleverify_reward/mean": 0.609375, "rewards/simpleverify_reward/std": 0.48816296458244324, "step": 1053 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3350.0, "completions/mean_length": 605.9721069335938, "completions/mean_terminated_length": 566.581298828125, "completions/min_length": 105.0, "completions/min_terminated_length": 105.0, "epoch": 0.09837308740061483, "grad_norm": 0.12426894158124924, "learning_rate": 2e-07, "loss": 0.0274, "num_tokens": 676554917.0, "reward": 0.5959821939468384, "reward_std": 0.17975716292858124, "rewards/simpleverify_reward/mean": 0.5959821343421936, "rewards/simpleverify_reward/std": 0.490975022315979, "step": 1054 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2761.0, "completions/mean_length": 650.614990234375, "completions/mean_terminated_length": 591.9534912109375, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 0.09846642050061541, "grad_norm": 0.11607730388641357, "learning_rate": 2e-07, "loss": 0.0222, "num_tokens": 677234604.0, "reward": 0.5256696939468384, "reward_std": 0.18096107244491577, "rewards/simpleverify_reward/mean": 0.5256696343421936, "rewards/simpleverify_reward/std": 0.4996195435523987, "step": 1055 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2377.0, "completions/mean_length": 634.8069458007812, "completions/mean_terminated_length": 583.849365234375, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 0.098559753600616, "grad_norm": 0.11208048462867737, "learning_rate": 2e-07, "loss": 0.025, "num_tokens": 677895175.0, "reward": 0.5446428656578064, "reward_std": 0.1772405207157135, "rewards/simpleverify_reward/mean": 0.5446428656578064, "rewards/simpleverify_reward/std": 0.4982811510562897, "step": 1056 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2482.0, "completions/mean_length": 631.732177734375, "completions/mean_terminated_length": 600.5225219726562, "completions/min_length": 113.0, "completions/min_terminated_length": 113.0, "epoch": 0.09865308670061658, "grad_norm": 0.11880449205636978, "learning_rate": 2e-07, "loss": 0.0363, "num_tokens": 678561895.0, "reward": 0.5446428656578064, "reward_std": 0.21522608399391174, "rewards/simpleverify_reward/mean": 0.5446428656578064, "rewards/simpleverify_reward/std": 0.4982811510562897, "step": 1057 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 2450.0, "completions/mean_length": 634.294677734375, "completions/mean_terminated_length": 595.2235107421875, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 0.09874641980061717, "grad_norm": 0.11754146218299866, "learning_rate": 2e-07, "loss": 0.0127, "num_tokens": 679216359.0, "reward": 0.6261160969734192, "reward_std": 0.18096107244491577, "rewards/simpleverify_reward/mean": 0.6261160969734192, "rewards/simpleverify_reward/std": 0.48410359025001526, "step": 1058 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3209.0, "completions/mean_length": 611.200927734375, "completions/mean_terminated_length": 563.8959350585938, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 0.09883975290061775, "grad_norm": 0.14008189737796783, "learning_rate": 2e-07, "loss": 0.0137, "num_tokens": 679851115.0, "reward": 0.5770089626312256, "reward_std": 0.23101365566253662, "rewards/simpleverify_reward/mean": 0.5770089030265808, "rewards/simpleverify_reward/std": 0.4943099319934845, "step": 1059 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3723.0, "completions/mean_length": 665.1652221679688, "completions/mean_terminated_length": 598.812255859375, "completions/min_length": 86.0, "completions/min_terminated_length": 86.0, "epoch": 0.09893308600061833, "grad_norm": 0.12862032651901245, "learning_rate": 2e-07, "loss": 0.0516, "num_tokens": 680533415.0, "reward": 0.5691964626312256, "reward_std": 0.20313121378421783, "rewards/simpleverify_reward/mean": 0.5691964030265808, "rewards/simpleverify_reward/std": 0.4954652488231659, "step": 1060 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 4063.0, "completions/mean_length": 575.1942138671875, "completions/mean_terminated_length": 539.4700927734375, "completions/min_length": 165.0, "completions/min_terminated_length": 165.0, "epoch": 0.09902641910061892, "grad_norm": 0.13221673667430878, "learning_rate": 2e-07, "loss": 0.0318, "num_tokens": 681143805.0, "reward": 0.5892857313156128, "reward_std": 0.19813480973243713, "rewards/simpleverify_reward/mean": 0.5892857313156128, "rewards/simpleverify_reward/std": 0.49223825335502625, "step": 1061 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2258.0, "completions/mean_length": 579.0982666015625, "completions/mean_terminated_length": 535.3853149414062, "completions/min_length": 163.0, "completions/min_terminated_length": 163.0, "epoch": 0.0991197522006195, "grad_norm": 0.14048689603805542, "learning_rate": 2e-07, "loss": 0.0077, "num_tokens": 681753621.0, "reward": 0.6071428656578064, "reward_std": 0.20482298731803894, "rewards/simpleverify_reward/mean": 0.6071428656578064, "rewards/simpleverify_reward/std": 0.48865827918052673, "step": 1062 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2789.0, "completions/mean_length": 612.0, "completions/mean_terminated_length": 576.6493530273438, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 0.09921308530062008, "grad_norm": 0.1337037980556488, "learning_rate": 2e-07, "loss": 0.0295, "num_tokens": 682396029.0, "reward": 0.5502232313156128, "reward_std": 0.19430406391620636, "rewards/simpleverify_reward/mean": 0.5502232313156128, "rewards/simpleverify_reward/std": 0.49774909019470215, "step": 1063 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3340.0, "completions/mean_length": 599.1116333007812, "completions/mean_terminated_length": 571.5770874023438, "completions/min_length": 84.0, "completions/min_terminated_length": 84.0, "epoch": 0.09930641840062067, "grad_norm": 0.12273416668176651, "learning_rate": 2e-07, "loss": 0.0062, "num_tokens": 683013913.0, "reward": 0.6462053656578064, "reward_std": 0.19543202221393585, "rewards/simpleverify_reward/mean": 0.6462053656578064, "rewards/simpleverify_reward/std": 0.478413462638855, "step": 1064 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3881.0, "completions/mean_length": 680.9944458007812, "completions/mean_terminated_length": 599.0343017578125, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 0.09939975150062125, "grad_norm": 0.11705074459314346, "learning_rate": 2e-07, "loss": 0.028, "num_tokens": 683708044.0, "reward": 0.5491071939468384, "reward_std": 0.19069229066371918, "rewards/simpleverify_reward/mean": 0.5491071343421936, "rewards/simpleverify_reward/std": 0.49786055088043213, "step": 1065 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 2590.0, "completions/mean_length": 591.0223388671875, "completions/mean_terminated_length": 551.4627685546875, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 0.09949308460062183, "grad_norm": 0.13006873428821564, "learning_rate": 2e-07, "loss": 0.0359, "num_tokens": 684323096.0, "reward": 0.6116071939468384, "reward_std": 0.2085367739200592, "rewards/simpleverify_reward/mean": 0.6116071343421936, "rewards/simpleverify_reward/std": 0.4876568913459778, "step": 1066 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3815.0, "completions/mean_length": 667.2366333007812, "completions/mean_terminated_length": 608.858154296875, "completions/min_length": 92.0, "completions/min_terminated_length": 92.0, "epoch": 0.09958641770062242, "grad_norm": 0.11815237253904343, "learning_rate": 2e-07, "loss": 0.0481, "num_tokens": 685013596.0, "reward": 0.5334821939468384, "reward_std": 0.19794541597366333, "rewards/simpleverify_reward/mean": 0.5334821343421936, "rewards/simpleverify_reward/std": 0.49915632605552673, "step": 1067 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3714.0, "completions/mean_length": 653.364990234375, "completions/mean_terminated_length": 594.7503051757812, "completions/min_length": 108.0, "completions/min_terminated_length": 108.0, "epoch": 0.099679750800623, "grad_norm": 0.12467045336961746, "learning_rate": 2e-07, "loss": 0.0384, "num_tokens": 685692051.0, "reward": 0.5625, "reward_std": 0.17378999292850494, "rewards/simpleverify_reward/mean": 0.5625, "rewards/simpleverify_reward/std": 0.49635544419288635, "step": 1068 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4096.0, "completions/mean_length": 646.9542846679688, "completions/mean_terminated_length": 576.244873046875, "completions/min_length": 113.0, "completions/min_terminated_length": 113.0, "epoch": 0.09977308390062359, "grad_norm": 0.10390090942382812, "learning_rate": 2e-07, "loss": 0.0358, "num_tokens": 686358906.0, "reward": 0.5502232313156128, "reward_std": 0.14989233016967773, "rewards/simpleverify_reward/mean": 0.5502232313156128, "rewards/simpleverify_reward/std": 0.49774909019470215, "step": 1069 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.005580357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2429.0, "completions/mean_length": 591.7421875, "completions/mean_terminated_length": 572.0774536132812, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 0.09986641700062417, "grad_norm": 0.13256676495075226, "learning_rate": 2e-07, "loss": 0.0121, "num_tokens": 686991091.0, "reward": 0.5703125, "reward_std": 0.21353361010551453, "rewards/simpleverify_reward/mean": 0.5703125, "rewards/simpleverify_reward/std": 0.49530795216560364, "step": 1070 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3319.0, "completions/mean_length": 618.765625, "completions/mean_terminated_length": 563.5714111328125, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 0.09995975010062474, "grad_norm": 0.12434583157300949, "learning_rate": 2e-07, "loss": 0.0318, "num_tokens": 687633217.0, "reward": 0.6149553656578064, "reward_std": 0.21335068345069885, "rewards/simpleverify_reward/mean": 0.6149553656578064, "rewards/simpleverify_reward/std": 0.4868776500225067, "step": 1071 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3530.0, "completions/mean_length": 653.146240234375, "completions/mean_terminated_length": 586.5608520507812, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 0.10005308320062534, "grad_norm": 0.1377440243959427, "learning_rate": 2e-07, "loss": 0.0272, "num_tokens": 688302076.0, "reward": 0.5613839626312256, "reward_std": 0.2396903783082962, "rewards/simpleverify_reward/mean": 0.5613839030265808, "rewards/simpleverify_reward/std": 0.496494859457016, "step": 1072 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 2898.0, "completions/mean_length": 636.5848388671875, "completions/mean_terminated_length": 609.3453369140625, "completions/min_length": 158.0, "completions/min_terminated_length": 158.0, "epoch": 0.10014641630062592, "grad_norm": 0.1263028383255005, "learning_rate": 2e-07, "loss": 0.0192, "num_tokens": 688960216.0, "reward": 0.520089328289032, "reward_std": 0.21707263588905334, "rewards/simpleverify_reward/mean": 0.5200892686843872, "rewards/simpleverify_reward/std": 0.4998753070831299, "step": 1073 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3442.0, "completions/mean_length": 623.1674194335938, "completions/mean_terminated_length": 556.0022583007812, "completions/min_length": 163.0, "completions/min_terminated_length": 163.0, "epoch": 0.1002397494006265, "grad_norm": 0.12185796350240707, "learning_rate": 2e-07, "loss": 0.0149, "num_tokens": 689614782.0, "reward": 0.5636160969734192, "reward_std": 0.18599432706832886, "rewards/simpleverify_reward/mean": 0.5636160969734192, "rewards/simpleverify_reward/std": 0.49621346592903137, "step": 1074 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3609.0, "completions/mean_length": 687.8047485351562, "completions/mean_terminated_length": 621.8896484375, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 0.10033308250062709, "grad_norm": 0.12157906591892242, "learning_rate": 2e-07, "loss": 0.0288, "num_tokens": 690323799.0, "reward": 0.6082589626312256, "reward_std": 0.18960711359977722, "rewards/simpleverify_reward/mean": 0.6082589030265808, "rewards/simpleverify_reward/std": 0.4884119927883148, "step": 1075 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.006696428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3104.0, "completions/mean_length": 538.09375, "completions/mean_terminated_length": 514.1078491210938, "completions/min_length": 137.0, "completions/min_terminated_length": 137.0, "epoch": 0.10042641560062766, "grad_norm": 0.13731496036052704, "learning_rate": 2e-07, "loss": 0.0302, "num_tokens": 690889755.0, "reward": 0.6774553656578064, "reward_std": 0.18186262249946594, "rewards/simpleverify_reward/mean": 0.6774553656578064, "rewards/simpleverify_reward/std": 0.4677111804485321, "step": 1076 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2351.0, "completions/mean_length": 631.8761596679688, "completions/mean_terminated_length": 576.8900146484375, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 0.10051974870062824, "grad_norm": 0.13632279634475708, "learning_rate": 2e-07, "loss": 0.0244, "num_tokens": 691550324.0, "reward": 0.5736607313156128, "reward_std": 0.23232382535934448, "rewards/simpleverify_reward/mean": 0.5736607313156128, "rewards/simpleverify_reward/std": 0.4948205351829529, "step": 1077 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3771.0, "completions/mean_length": 669.8828125, "completions/mean_terminated_length": 591.6609497070312, "completions/min_length": 96.0, "completions/min_terminated_length": 96.0, "epoch": 0.10061308180062883, "grad_norm": 0.11745911836624146, "learning_rate": 2e-07, "loss": 0.0207, "num_tokens": 692243011.0, "reward": 0.559151828289032, "reward_std": 0.1634196639060974, "rewards/simpleverify_reward/mean": 0.5591517686843872, "rewards/simpleverify_reward/std": 0.496766060590744, "step": 1078 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3840.0, "completions/mean_length": 612.7734375, "completions/mean_terminated_length": 573.4594116210938, "completions/min_length": 77.0, "completions/min_terminated_length": 77.0, "epoch": 0.10070641490062941, "grad_norm": 0.12628400325775146, "learning_rate": 2e-07, "loss": 0.0328, "num_tokens": 692885512.0, "reward": 0.640625, "reward_std": 0.18332615494728088, "rewards/simpleverify_reward/mean": 0.640625, "rewards/simpleverify_reward/std": 0.48008525371551514, "step": 1079 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0267857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3838.0, "completions/mean_length": 691.505615234375, "completions/mean_terminated_length": 597.8038940429688, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 0.10079974800063, "grad_norm": 0.10536954551935196, "learning_rate": 2e-07, "loss": 0.0208, "num_tokens": 693594861.0, "reward": 0.5256696939468384, "reward_std": 0.16435188055038452, "rewards/simpleverify_reward/mean": 0.5256696343421936, "rewards/simpleverify_reward/std": 0.4996195435523987, "step": 1080 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2856.0, "completions/mean_length": 649.700927734375, "completions/mean_terminated_length": 606.8655395507812, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 0.10089308110063058, "grad_norm": 0.12285309284925461, "learning_rate": 2e-07, "loss": 0.0109, "num_tokens": 694272345.0, "reward": 0.5502232313156128, "reward_std": 0.1896056979894638, "rewards/simpleverify_reward/mean": 0.5502232313156128, "rewards/simpleverify_reward/std": 0.49774909019470215, "step": 1081 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.005580357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2245.0, "completions/mean_length": 561.8973388671875, "completions/mean_terminated_length": 542.0651245117188, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 0.10098641420063116, "grad_norm": 0.13235244154930115, "learning_rate": 2e-07, "loss": 0.0317, "num_tokens": 694871533.0, "reward": 0.6462053656578064, "reward_std": 0.1907682567834854, "rewards/simpleverify_reward/mean": 0.6462053656578064, "rewards/simpleverify_reward/std": 0.478413462638855, "step": 1082 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 4056.0, "completions/mean_length": 614.0379638671875, "completions/mean_terminated_length": 574.7381591796875, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 0.10107974730063175, "grad_norm": 0.11114060133695602, "learning_rate": 2e-07, "loss": 0.0315, "num_tokens": 695520895.0, "reward": 0.6227678656578064, "reward_std": 0.16104546189308167, "rewards/simpleverify_reward/mean": 0.6227678656578064, "rewards/simpleverify_reward/std": 0.4849644899368286, "step": 1083 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3767.0, "completions/mean_length": 654.513427734375, "completions/mean_terminated_length": 591.94091796875, "completions/min_length": 181.0, "completions/min_terminated_length": 181.0, "epoch": 0.10117308040063233, "grad_norm": 0.10779467970132828, "learning_rate": 2e-07, "loss": 0.022, "num_tokens": 696194563.0, "reward": 0.5636160969734192, "reward_std": 0.1871953308582306, "rewards/simpleverify_reward/mean": 0.5636160969734192, "rewards/simpleverify_reward/std": 0.49621346592903137, "step": 1084 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3104.0, "completions/mean_length": 576.5513916015625, "completions/mean_terminated_length": 528.7760620117188, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 0.10126641350063291, "grad_norm": 0.14008013904094696, "learning_rate": 2e-07, "loss": 0.0314, "num_tokens": 696805633.0, "reward": 0.6473214626312256, "reward_std": 0.18570080399513245, "rewards/simpleverify_reward/mean": 0.6473214030265808, "rewards/simpleverify_reward/std": 0.47807061672210693, "step": 1085 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3885.0, "completions/mean_length": 642.1350708007812, "completions/mean_terminated_length": 591.285400390625, "completions/min_length": 94.0, "completions/min_terminated_length": 94.0, "epoch": 0.1013597466006335, "grad_norm": 0.11414835602045059, "learning_rate": 2e-07, "loss": 0.0159, "num_tokens": 697479674.0, "reward": 0.551339328289032, "reward_std": 0.16633719205856323, "rewards/simpleverify_reward/mean": 0.5513392686843872, "rewards/simpleverify_reward/std": 0.4976350665092468, "step": 1086 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4015.0, "completions/mean_length": 643.6138916015625, "completions/mean_terminated_length": 572.8359985351562, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 0.10145307970063408, "grad_norm": 0.12822304666042328, "learning_rate": 2e-07, "loss": 0.0226, "num_tokens": 698135848.0, "reward": 0.598214328289032, "reward_std": 0.18698212504386902, "rewards/simpleverify_reward/mean": 0.5982142686843872, "rewards/simpleverify_reward/std": 0.49053287506103516, "step": 1087 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 4053.0, "completions/mean_length": 605.7935791015625, "completions/mean_terminated_length": 566.4006958007812, "completions/min_length": 162.0, "completions/min_terminated_length": 162.0, "epoch": 0.10154641280063466, "grad_norm": 0.11935700476169586, "learning_rate": 2e-07, "loss": 0.0211, "num_tokens": 698776351.0, "reward": 0.5491071939468384, "reward_std": 0.179875910282135, "rewards/simpleverify_reward/mean": 0.5491071343421936, "rewards/simpleverify_reward/std": 0.49786055088043213, "step": 1088 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3538.0, "completions/mean_length": 630.6138916015625, "completions/mean_terminated_length": 591.5011596679688, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 0.10163974590063525, "grad_norm": 0.11508334428071976, "learning_rate": 2e-07, "loss": 0.0411, "num_tokens": 699433853.0, "reward": 0.5479910969734192, "reward_std": 0.19858691096305847, "rewards/simpleverify_reward/mean": 0.5479910969734192, "rewards/simpleverify_reward/std": 0.49796947836875916, "step": 1089 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3016.0, "completions/mean_length": 614.5870971679688, "completions/mean_terminated_length": 563.331787109375, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 0.10173307900063583, "grad_norm": 0.1334446370601654, "learning_rate": 2e-07, "loss": 0.0368, "num_tokens": 700077515.0, "reward": 0.5736607313156128, "reward_std": 0.1842365711927414, "rewards/simpleverify_reward/mean": 0.5736607313156128, "rewards/simpleverify_reward/std": 0.4948205351829529, "step": 1090 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2379.0, "completions/mean_length": 627.2131958007812, "completions/mean_terminated_length": 584.0983276367188, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 0.10182641210063642, "grad_norm": 0.11898814886808395, "learning_rate": 2e-07, "loss": 0.0192, "num_tokens": 700735466.0, "reward": 0.6116071939468384, "reward_std": 0.19982405006885529, "rewards/simpleverify_reward/mean": 0.6116071343421936, "rewards/simpleverify_reward/std": 0.48765692114830017, "step": 1091 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 4013.0, "completions/mean_length": 585.1027221679688, "completions/mean_terminated_length": 537.4434814453125, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 0.101919745200637, "grad_norm": 0.1286686807870865, "learning_rate": 2e-07, "loss": 0.0191, "num_tokens": 701349110.0, "reward": 0.6305803656578064, "reward_std": 0.1661524474620819, "rewards/simpleverify_reward/mean": 0.6305803656578064, "rewards/simpleverify_reward/std": 0.4829172194004059, "step": 1092 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3664.0, "completions/mean_length": 625.0123291015625, "completions/mean_terminated_length": 597.6817016601562, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 0.10201307830063758, "grad_norm": 0.12326326966285706, "learning_rate": 2e-07, "loss": 0.0311, "num_tokens": 701991777.0, "reward": 0.5691964626312256, "reward_std": 0.20377060770988464, "rewards/simpleverify_reward/mean": 0.5691964030265808, "rewards/simpleverify_reward/std": 0.4954652786254883, "step": 1093 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3547.0, "completions/mean_length": 653.6004638671875, "completions/mean_terminated_length": 591.0113525390625, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 0.10210641140063817, "grad_norm": 0.13054893910884857, "learning_rate": 2e-07, "loss": 0.0231, "num_tokens": 702662187.0, "reward": 0.5424107313156128, "reward_std": 0.2310193032026291, "rewards/simpleverify_reward/mean": 0.5424107313156128, "rewards/simpleverify_reward/std": 0.4984763562679291, "step": 1094 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3494.0, "completions/mean_length": 659.8426513671875, "completions/mean_terminated_length": 617.1333618164062, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 0.10219974450063875, "grad_norm": 0.11638720333576202, "learning_rate": 2e-07, "loss": 0.0306, "num_tokens": 703338430.0, "reward": 0.5658482313156128, "reward_std": 0.1846388727426529, "rewards/simpleverify_reward/mean": 0.5658482313156128, "rewards/simpleverify_reward/std": 0.49592188000679016, "step": 1095 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 1836.0, "completions/mean_length": 582.1138916015625, "completions/mean_terminated_length": 550.4572143554688, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 0.10229307760063933, "grad_norm": 0.1402270793914795, "learning_rate": 2e-07, "loss": 0.0498, "num_tokens": 703948884.0, "reward": 0.5970982313156128, "reward_std": 0.22833971679210663, "rewards/simpleverify_reward/mean": 0.5970982313156128, "rewards/simpleverify_reward/std": 0.49075525999069214, "step": 1096 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.004464285714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 4095.0, "completions/mean_length": 644.9486694335938, "completions/mean_terminated_length": 629.47314453125, "completions/min_length": 117.0, "completions/min_terminated_length": 117.0, "epoch": 0.10238641070063992, "grad_norm": 0.12496225535869598, "learning_rate": 2e-07, "loss": 0.018, "num_tokens": 704611846.0, "reward": 0.5535714626312256, "reward_std": 0.22033743560314178, "rewards/simpleverify_reward/mean": 0.5535714030265808, "rewards/simpleverify_reward/std": 0.4973995089530945, "step": 1097 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3198.0, "completions/mean_length": 597.013427734375, "completions/mean_terminated_length": 553.523193359375, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 0.1024797438006405, "grad_norm": 0.12066938728094101, "learning_rate": 2e-07, "loss": 0.0078, "num_tokens": 705236922.0, "reward": 0.6194196939468384, "reward_std": 0.1652188003063202, "rewards/simpleverify_reward/mean": 0.6194196343421936, "rewards/simpleverify_reward/std": 0.48580074310302734, "step": 1098 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2754.0, "completions/mean_length": 581.5848388671875, "completions/mean_terminated_length": 545.925537109375, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 0.10257307690064109, "grad_norm": 0.12148687243461609, "learning_rate": 2e-07, "loss": 0.0467, "num_tokens": 705841702.0, "reward": 0.6930803656578064, "reward_std": 0.17318198084831238, "rewards/simpleverify_reward/mean": 0.6930803656578064, "rewards/simpleverify_reward/std": 0.46147337555885315, "step": 1099 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3675.0, "completions/mean_length": 636.8671875, "completions/mean_terminated_length": 605.703857421875, "completions/min_length": 173.0, "completions/min_terminated_length": 173.0, "epoch": 0.10266641000064167, "grad_norm": 0.12852159142494202, "learning_rate": 2e-07, "loss": 0.0264, "num_tokens": 706515839.0, "reward": 0.5502232313156128, "reward_std": 0.2069309651851654, "rewards/simpleverify_reward/mean": 0.5502232313156128, "rewards/simpleverify_reward/std": 0.49774909019470215, "step": 1100 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3942.0, "completions/mean_length": 663.568115234375, "completions/mean_terminated_length": 628.7406616210938, "completions/min_length": 110.0, "completions/min_terminated_length": 110.0, "epoch": 0.10275974310064225, "grad_norm": 0.11225276440382004, "learning_rate": 2e-07, "loss": 0.0189, "num_tokens": 707196636.0, "reward": 0.5379464626312256, "reward_std": 0.17010192573070526, "rewards/simpleverify_reward/mean": 0.5379464030265808, "rewards/simpleverify_reward/std": 0.4988364577293396, "step": 1101 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 2367.0, "completions/mean_length": 581.9877319335938, "completions/mean_terminated_length": 554.318359375, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 0.10285307620064284, "grad_norm": 0.11342346668243408, "learning_rate": 2e-07, "loss": 0.0038, "num_tokens": 707798969.0, "reward": 0.6082589626312256, "reward_std": 0.1670539826154709, "rewards/simpleverify_reward/mean": 0.6082589030265808, "rewards/simpleverify_reward/std": 0.4884119927883148, "step": 1102 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.006696428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3925.0, "completions/mean_length": 617.255615234375, "completions/mean_terminated_length": 593.8034057617188, "completions/min_length": 6.0, "completions/min_terminated_length": 6.0, "epoch": 0.10294640930064342, "grad_norm": 0.11916676163673401, "learning_rate": 2e-07, "loss": -0.004, "num_tokens": 708453862.0, "reward": 0.609375, "reward_std": 0.16201861202716827, "rewards/simpleverify_reward/mean": 0.609375, "rewards/simpleverify_reward/std": 0.48816296458244324, "step": 1103 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3438.0, "completions/mean_length": 655.9788208007812, "completions/mean_terminated_length": 585.4544677734375, "completions/min_length": 118.0, "completions/min_terminated_length": 118.0, "epoch": 0.103039742400644, "grad_norm": 0.11711705476045609, "learning_rate": 2e-07, "loss": 0.0383, "num_tokens": 709126995.0, "reward": 0.5569196939468384, "reward_std": 0.18103525042533875, "rewards/simpleverify_reward/mean": 0.5569196343421936, "rewards/simpleverify_reward/std": 0.49702703952789307, "step": 1104 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3136.0, "completions/mean_length": 634.4710083007812, "completions/mean_terminated_length": 563.5057373046875, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 0.10313307550064459, "grad_norm": 0.11611688882112503, "learning_rate": 2e-07, "loss": 0.0364, "num_tokens": 709781305.0, "reward": 0.5758928656578064, "reward_std": 0.162215456366539, "rewards/simpleverify_reward/mean": 0.5758928656578064, "rewards/simpleverify_reward/std": 0.49448272585868835, "step": 1105 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2068.0, "completions/mean_length": 556.388427734375, "completions/mean_terminated_length": 504.27630615234375, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 0.10322640860064516, "grad_norm": 0.13344806432724, "learning_rate": 2e-07, "loss": 0.0089, "num_tokens": 710378733.0, "reward": 0.5993303656578064, "reward_std": 0.196027934551239, "rewards/simpleverify_reward/mean": 0.5993303656578064, "rewards/simpleverify_reward/std": 0.49030786752700806, "step": 1106 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2960.0, "completions/mean_length": 644.810302734375, "completions/mean_terminated_length": 586.0499877929688, "completions/min_length": 121.0, "completions/min_terminated_length": 121.0, "epoch": 0.10331974170064574, "grad_norm": 0.12176943570375443, "learning_rate": 2e-07, "loss": 0.0426, "num_tokens": 711037923.0, "reward": 0.6104910969734192, "reward_std": 0.18881294131278992, "rewards/simpleverify_reward/mean": 0.6104910969734192, "rewards/simpleverify_reward/std": 0.48791128396987915, "step": 1107 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3206.0, "completions/mean_length": 630.247802734375, "completions/mean_terminated_length": 587.170654296875, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 0.10341307480064633, "grad_norm": 0.12616625428199768, "learning_rate": 2e-07, "loss": 0.0153, "num_tokens": 711696529.0, "reward": 0.6127232313156128, "reward_std": 0.1985880434513092, "rewards/simpleverify_reward/mean": 0.6127232313156128, "rewards/simpleverify_reward/std": 0.4873998463153839, "step": 1108 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 1850.0, "completions/mean_length": 655.8046875, "completions/mean_terminated_length": 589.270751953125, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 0.10350640790064691, "grad_norm": 0.1265367865562439, "learning_rate": 2e-07, "loss": 0.021, "num_tokens": 712374914.0, "reward": 0.5948660969734192, "reward_std": 0.21289560198783875, "rewards/simpleverify_reward/mean": 0.5948660969734192, "rewards/simpleverify_reward/std": 0.49119213223457336, "step": 1109 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 4042.0, "completions/mean_length": 633.1763916015625, "completions/mean_terminated_length": 586.1697387695312, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 0.1035997410006475, "grad_norm": 0.1170983836054802, "learning_rate": 2e-07, "loss": 0.0158, "num_tokens": 713033368.0, "reward": 0.5267857313156128, "reward_std": 0.18889032304286957, "rewards/simpleverify_reward/mean": 0.5267857313156128, "rewards/simpleverify_reward/std": 0.4995608627796173, "step": 1110 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3941.0, "completions/mean_length": 613.0256958007812, "completions/mean_terminated_length": 561.7474365234375, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 0.10369307410064808, "grad_norm": 0.1250627040863037, "learning_rate": 2e-07, "loss": 0.0425, "num_tokens": 713669567.0, "reward": 0.5736607313156128, "reward_std": 0.2101161628961563, "rewards/simpleverify_reward/mean": 0.5736607313156128, "rewards/simpleverify_reward/std": 0.4948205351829529, "step": 1111 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3323.0, "completions/mean_length": 591.7455444335938, "completions/mean_terminated_length": 515.82666015625, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 0.10378640720064866, "grad_norm": 0.11778108030557632, "learning_rate": 2e-07, "loss": 0.0251, "num_tokens": 714277107.0, "reward": 0.6741071939468384, "reward_std": 0.1648397445678711, "rewards/simpleverify_reward/mean": 0.6741071343421936, "rewards/simpleverify_reward/std": 0.4689692258834839, "step": 1112 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 2327.0, "completions/mean_length": 609.6830444335938, "completions/mean_terminated_length": 570.3341064453125, "completions/min_length": 98.0, "completions/min_terminated_length": 98.0, "epoch": 0.10387974030064925, "grad_norm": 0.11568685621023178, "learning_rate": 2e-07, "loss": 0.0318, "num_tokens": 714901615.0, "reward": 0.621651828289032, "reward_std": 0.1653621941804886, "rewards/simpleverify_reward/mean": 0.6216517686843872, "rewards/simpleverify_reward/std": 0.4852459728717804, "step": 1113 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3355.0, "completions/mean_length": 643.8192138671875, "completions/mean_terminated_length": 560.9668579101562, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 0.10397307340064983, "grad_norm": 0.1432749330997467, "learning_rate": 2e-07, "loss": 0.0255, "num_tokens": 715560797.0, "reward": 0.5457589626312256, "reward_std": 0.19073615968227386, "rewards/simpleverify_reward/mean": 0.5457589030265808, "rewards/simpleverify_reward/std": 0.4981798231601715, "step": 1114 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0022321428571429047, "completions/max_length": 4096.0, "completions/max_terminated_length": 2358.0, "completions/mean_length": 580.3248291015625, "completions/mean_terminated_length": 572.459716796875, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 0.10406640650065041, "grad_norm": 0.13089516758918762, "learning_rate": 2e-07, "loss": 0.0193, "num_tokens": 716167280.0, "reward": 0.65625, "reward_std": 0.1694304496049881, "rewards/simpleverify_reward/mean": 0.65625, "rewards/simpleverify_reward/std": 0.4752241373062134, "step": 1115 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3995.0, "completions/mean_length": 696.3359985351562, "completions/mean_terminated_length": 638.4529418945312, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 0.104159739600651, "grad_norm": 0.10389790683984756, "learning_rate": 2e-07, "loss": 0.0175, "num_tokens": 716880277.0, "reward": 0.5379464626312256, "reward_std": 0.18077494204044342, "rewards/simpleverify_reward/mean": 0.5379464030265808, "rewards/simpleverify_reward/std": 0.4988364279270172, "step": 1116 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3006.0, "completions/mean_length": 624.443115234375, "completions/mean_terminated_length": 589.2186889648438, "completions/min_length": 95.0, "completions/min_terminated_length": 95.0, "epoch": 0.10425307270065158, "grad_norm": 0.12513814866542816, "learning_rate": 2e-07, "loss": 0.0317, "num_tokens": 717532970.0, "reward": 0.5390625, "reward_std": 0.19415070116519928, "rewards/simpleverify_reward/mean": 0.5390625, "rewards/simpleverify_reward/std": 0.4987502098083496, "step": 1117 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2910.0, "completions/mean_length": 642.9140625, "completions/mean_terminated_length": 596.0396118164062, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 0.10434640580065216, "grad_norm": 0.12829791009426117, "learning_rate": 2e-07, "loss": 0.0197, "num_tokens": 718198517.0, "reward": 0.5837053656578064, "reward_std": 0.20467285811901093, "rewards/simpleverify_reward/mean": 0.5837053656578064, "rewards/simpleverify_reward/std": 0.49321895837783813, "step": 1118 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3711.0, "completions/mean_length": 624.1517944335938, "completions/mean_terminated_length": 577.0226440429688, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 0.10443973890065275, "grad_norm": 0.11354951560497284, "learning_rate": 2e-07, "loss": 0.0292, "num_tokens": 718837701.0, "reward": 0.6316964626312256, "reward_std": 0.17243199050426483, "rewards/simpleverify_reward/mean": 0.6316964030265808, "rewards/simpleverify_reward/std": 0.4826137125492096, "step": 1119 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 1726.0, "completions/mean_length": 607.3092041015625, "completions/mean_terminated_length": 559.9513549804688, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 0.10453307200065333, "grad_norm": 0.12719784677028656, "learning_rate": 2e-07, "loss": 0.0221, "num_tokens": 719471762.0, "reward": 0.6305803656578064, "reward_std": 0.18148283660411835, "rewards/simpleverify_reward/mean": 0.6305803656578064, "rewards/simpleverify_reward/std": 0.4829172194004059, "step": 1120 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2941.0, "completions/mean_length": 599.7600708007812, "completions/mean_terminated_length": 556.303955078125, "completions/min_length": 157.0, "completions/min_terminated_length": 157.0, "epoch": 0.10462640510065392, "grad_norm": 0.11273051053285599, "learning_rate": 2e-07, "loss": 0.0164, "num_tokens": 720094651.0, "reward": 0.6238839626312256, "reward_std": 0.17580877244472504, "rewards/simpleverify_reward/mean": 0.6238839030265808, "rewards/simpleverify_reward/std": 0.4846802353858948, "step": 1121 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3009.0, "completions/mean_length": 582.8549194335938, "completions/mean_terminated_length": 551.2049560546875, "completions/min_length": 167.0, "completions/min_terminated_length": 167.0, "epoch": 0.1047197382006545, "grad_norm": 0.13642646372318268, "learning_rate": 2e-07, "loss": 0.0047, "num_tokens": 720710561.0, "reward": 0.6171875, "reward_std": 0.2100858837366104, "rewards/simpleverify_reward/mean": 0.6171875, "rewards/simpleverify_reward/std": 0.4863446056842804, "step": 1122 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3724.0, "completions/mean_length": 612.2522583007812, "completions/mean_terminated_length": 576.9041137695312, "completions/min_length": 123.0, "completions/min_terminated_length": 123.0, "epoch": 0.10481307130065508, "grad_norm": 0.11195480823516846, "learning_rate": 2e-07, "loss": 0.0137, "num_tokens": 721355515.0, "reward": 0.5580357313156128, "reward_std": 0.15015378594398499, "rewards/simpleverify_reward/mean": 0.5580357313156128, "rewards/simpleverify_reward/std": 0.49689781665802, "step": 1123 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3778.0, "completions/mean_length": 604.5725708007812, "completions/mean_terminated_length": 565.1659545898438, "completions/min_length": 162.0, "completions/min_terminated_length": 162.0, "epoch": 0.10490640440065567, "grad_norm": 0.11008071154356003, "learning_rate": 2e-07, "loss": 0.012, "num_tokens": 721980924.0, "reward": 0.6272321939468384, "reward_std": 0.15544694662094116, "rewards/simpleverify_reward/mean": 0.6272321343421936, "rewards/simpleverify_reward/std": 0.4838111400604248, "step": 1124 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3606.0, "completions/mean_length": 648.2589721679688, "completions/mean_terminated_length": 617.1982421875, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 0.10499973750065625, "grad_norm": 0.11244987696409225, "learning_rate": 2e-07, "loss": 0.0192, "num_tokens": 722656796.0, "reward": 0.5993303656578064, "reward_std": 0.1874663382768631, "rewards/simpleverify_reward/mean": 0.5993303656578064, "rewards/simpleverify_reward/std": 0.49030786752700806, "step": 1125 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3018.0, "completions/mean_length": 642.9453125, "completions/mean_terminated_length": 607.9086303710938, "completions/min_length": 166.0, "completions/min_terminated_length": 166.0, "epoch": 0.10509307060065683, "grad_norm": 0.13063888251781464, "learning_rate": 2e-07, "loss": 0.0249, "num_tokens": 723323339.0, "reward": 0.5848214626312256, "reward_std": 0.22079278528690338, "rewards/simpleverify_reward/mean": 0.5848214030265808, "rewards/simpleverify_reward/std": 0.49302801489830017, "step": 1126 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3919.0, "completions/mean_length": 617.1395263671875, "completions/mean_terminated_length": 561.9194946289062, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 0.10518640370065742, "grad_norm": 0.12170901149511337, "learning_rate": 2e-07, "loss": 0.0233, "num_tokens": 723965552.0, "reward": 0.6037946939468384, "reward_std": 0.1738227903842926, "rewards/simpleverify_reward/mean": 0.6037946343421936, "rewards/simpleverify_reward/std": 0.48938122391700745, "step": 1127 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3019.0, "completions/mean_length": 619.5324096679688, "completions/mean_terminated_length": 560.3416748046875, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 0.105279736800658, "grad_norm": 0.12402714043855667, "learning_rate": 2e-07, "loss": 0.0301, "num_tokens": 724604981.0, "reward": 0.6640625, "reward_std": 0.17254258692264557, "rewards/simpleverify_reward/mean": 0.6640625, "rewards/simpleverify_reward/std": 0.4725809693336487, "step": 1128 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 2875.0, "completions/mean_length": 572.8147583007812, "completions/mean_terminated_length": 533.0496826171875, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 0.10537306990065858, "grad_norm": 0.13337017595767975, "learning_rate": 2e-07, "loss": 0.0222, "num_tokens": 725199559.0, "reward": 0.6339285969734192, "reward_std": 0.1609034538269043, "rewards/simpleverify_reward/mean": 0.6339285969734192, "rewards/simpleverify_reward/std": 0.48199838399887085, "step": 1129 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3465.0, "completions/mean_length": 611.1127319335938, "completions/mean_terminated_length": 567.7977294921875, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 0.10546640300065917, "grad_norm": 0.12480348348617554, "learning_rate": 2e-07, "loss": 0.0173, "num_tokens": 725834628.0, "reward": 0.5915178656578064, "reward_std": 0.1731148660182953, "rewards/simpleverify_reward/mean": 0.5915178656578064, "rewards/simpleverify_reward/std": 0.49182769656181335, "step": 1130 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3253.0, "completions/mean_length": 651.4699096679688, "completions/mean_terminated_length": 612.5925903320312, "completions/min_length": 105.0, "completions/min_terminated_length": 105.0, "epoch": 0.10555973610065975, "grad_norm": 0.1269901990890503, "learning_rate": 2e-07, "loss": 0.0296, "num_tokens": 726501385.0, "reward": 0.5167410969734192, "reward_std": 0.20076724886894226, "rewards/simpleverify_reward/mean": 0.5167410969734192, "rewards/simpleverify_reward/std": 0.4999987483024597, "step": 1131 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3551.0, "completions/mean_length": 612.8270263671875, "completions/mean_terminated_length": 561.5458374023438, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 0.10565306920066034, "grad_norm": 0.12764500081539154, "learning_rate": 2e-07, "loss": 0.0324, "num_tokens": 727148094.0, "reward": 0.621651828289032, "reward_std": 0.1738559752702713, "rewards/simpleverify_reward/mean": 0.6216517686843872, "rewards/simpleverify_reward/std": 0.4852459728717804, "step": 1132 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3587.0, "completions/mean_length": 624.1730346679688, "completions/mean_terminated_length": 581.0203247070312, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 0.10574640230066092, "grad_norm": 0.11654733866453171, "learning_rate": 2e-07, "loss": 0.0153, "num_tokens": 727806241.0, "reward": 0.5703125, "reward_std": 0.19384507834911346, "rewards/simpleverify_reward/mean": 0.5703125, "rewards/simpleverify_reward/std": 0.49530795216560364, "step": 1133 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4036.0, "completions/mean_length": 674.7221069335938, "completions/mean_terminated_length": 604.58203125, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 0.1058397354006615, "grad_norm": 0.13671660423278809, "learning_rate": 2e-07, "loss": 0.0352, "num_tokens": 728498024.0, "reward": 0.5859375, "reward_std": 0.18964172899723053, "rewards/simpleverify_reward/mean": 0.5859375, "rewards/simpleverify_reward/std": 0.4928344786167145, "step": 1134 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 4023.0, "completions/mean_length": 629.671875, "completions/mean_terminated_length": 578.6387329101562, "completions/min_length": 116.0, "completions/min_terminated_length": 116.0, "epoch": 0.10593306850066209, "grad_norm": 0.1322758048772812, "learning_rate": 2e-07, "loss": 0.0202, "num_tokens": 729150514.0, "reward": 0.5558035969734192, "reward_std": 0.22331053018569946, "rewards/simpleverify_reward/mean": 0.5558035969734192, "rewards/simpleverify_reward/std": 0.49715372920036316, "step": 1135 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4037.0, "completions/mean_length": 672.84375, "completions/mean_terminated_length": 606.6393432617188, "completions/min_length": 110.0, "completions/min_terminated_length": 110.0, "epoch": 0.10602640160066266, "grad_norm": 0.12478887289762497, "learning_rate": 2e-07, "loss": 0.0232, "num_tokens": 729850766.0, "reward": 0.6127232313156128, "reward_std": 0.19347669184207916, "rewards/simpleverify_reward/mean": 0.6127232313156128, "rewards/simpleverify_reward/std": 0.4873998463153839, "step": 1136 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3963.0, "completions/mean_length": 628.8694458007812, "completions/mean_terminated_length": 585.775146484375, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 0.10611973470066324, "grad_norm": 0.11405444890260696, "learning_rate": 2e-07, "loss": 0.0183, "num_tokens": 730502929.0, "reward": 0.5993303656578064, "reward_std": 0.15875452756881714, "rewards/simpleverify_reward/mean": 0.5993303656578064, "rewards/simpleverify_reward/std": 0.49030786752700806, "step": 1137 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2372.0, "completions/mean_length": 627.5089721679688, "completions/mean_terminated_length": 592.3156127929688, "completions/min_length": 115.0, "completions/min_terminated_length": 115.0, "epoch": 0.10621306780066384, "grad_norm": 0.14537815749645233, "learning_rate": 2e-07, "loss": 0.0252, "num_tokens": 731156801.0, "reward": 0.5625, "reward_std": 0.22041161358356476, "rewards/simpleverify_reward/mean": 0.5625, "rewards/simpleverify_reward/std": 0.49635544419288635, "step": 1138 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 2825.0, "completions/mean_length": 619.234375, "completions/mean_terminated_length": 579.9932250976562, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 0.10630640090066441, "grad_norm": 0.1240154355764389, "learning_rate": 2e-07, "loss": 0.0362, "num_tokens": 731807403.0, "reward": 0.6104910969734192, "reward_std": 0.2039974480867386, "rewards/simpleverify_reward/mean": 0.6104910969734192, "rewards/simpleverify_reward/std": 0.48791125416755676, "step": 1139 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3060.0, "completions/mean_length": 624.3136596679688, "completions/mean_terminated_length": 569.20751953125, "completions/min_length": 97.0, "completions/min_terminated_length": 97.0, "epoch": 0.10639973400066499, "grad_norm": 0.13257543742656708, "learning_rate": 2e-07, "loss": 0.0383, "num_tokens": 732446692.0, "reward": 0.609375, "reward_std": 0.2207135707139969, "rewards/simpleverify_reward/mean": 0.609375, "rewards/simpleverify_reward/std": 0.48816296458244324, "step": 1140 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2992.0, "completions/mean_length": 650.9754638671875, "completions/mean_terminated_length": 580.3485717773438, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 0.10649306710066558, "grad_norm": 0.13041844964027405, "learning_rate": 2e-07, "loss": 0.0161, "num_tokens": 733126494.0, "reward": 0.5334821939468384, "reward_std": 0.20940229296684265, "rewards/simpleverify_reward/mean": 0.5334821343421936, "rewards/simpleverify_reward/std": 0.49915629625320435, "step": 1141 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3214.0, "completions/mean_length": 579.4263916015625, "completions/mean_terminated_length": 547.7454833984375, "completions/min_length": 156.0, "completions/min_terminated_length": 156.0, "epoch": 0.10658640020066616, "grad_norm": 0.12233099341392517, "learning_rate": 2e-07, "loss": 0.0256, "num_tokens": 733725868.0, "reward": 0.6707589626312256, "reward_std": 0.18170924484729767, "rewards/simpleverify_reward/mean": 0.6707589030265808, "rewards/simpleverify_reward/std": 0.4702001214027405, "step": 1142 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2072.0, "completions/mean_length": 649.0267944335938, "completions/mean_terminated_length": 574.348876953125, "completions/min_length": 120.0, "completions/min_terminated_length": 120.0, "epoch": 0.10667973330066675, "grad_norm": 0.1313389092683792, "learning_rate": 2e-07, "loss": 0.0369, "num_tokens": 734402916.0, "reward": 0.5770089626312256, "reward_std": 0.23799718916416168, "rewards/simpleverify_reward/mean": 0.5770089030265808, "rewards/simpleverify_reward/std": 0.4943099319934845, "step": 1143 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3344.0, "completions/mean_length": 634.3426513671875, "completions/mean_terminated_length": 591.31640625, "completions/min_length": 111.0, "completions/min_terminated_length": 111.0, "epoch": 0.10677306640066733, "grad_norm": 0.12336692214012146, "learning_rate": 2e-07, "loss": 0.0064, "num_tokens": 735069007.0, "reward": 0.5625, "reward_std": 0.1828376203775406, "rewards/simpleverify_reward/mean": 0.5625, "rewards/simpleverify_reward/std": 0.49635544419288635, "step": 1144 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3199.0, "completions/mean_length": 670.6964721679688, "completions/mean_terminated_length": 592.4931030273438, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 0.10686639950066791, "grad_norm": 0.10794051736593246, "learning_rate": 2e-07, "loss": 0.0418, "num_tokens": 735749047.0, "reward": 0.559151828289032, "reward_std": 0.1536022275686264, "rewards/simpleverify_reward/mean": 0.5591517686843872, "rewards/simpleverify_reward/std": 0.496766060590744, "step": 1145 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3286.0, "completions/mean_length": 605.5848388671875, "completions/mean_terminated_length": 562.201171875, "completions/min_length": 97.0, "completions/min_terminated_length": 97.0, "epoch": 0.1069597326006685, "grad_norm": 0.1257300078868866, "learning_rate": 2e-07, "loss": 0.0544, "num_tokens": 736384611.0, "reward": 0.6004464626312256, "reward_std": 0.18960639834403992, "rewards/simpleverify_reward/mean": 0.6004464030265808, "rewards/simpleverify_reward/std": 0.49008017778396606, "step": 1146 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3837.0, "completions/mean_length": 601.2835083007812, "completions/mean_terminated_length": 569.799560546875, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 0.10705306570066908, "grad_norm": 0.11136110126972198, "learning_rate": 2e-07, "loss": 0.0176, "num_tokens": 737019545.0, "reward": 0.5691964626312256, "reward_std": 0.1550350785255432, "rewards/simpleverify_reward/mean": 0.5691964030265808, "rewards/simpleverify_reward/std": 0.4954652488231659, "step": 1147 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2835.0, "completions/mean_length": 642.8002319335938, "completions/mean_terminated_length": 587.987548828125, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 0.10714639880066966, "grad_norm": 0.13565942645072937, "learning_rate": 2e-07, "loss": 0.0398, "num_tokens": 737674918.0, "reward": 0.5892857313156128, "reward_std": 0.23341716825962067, "rewards/simpleverify_reward/mean": 0.5892857313156128, "rewards/simpleverify_reward/std": 0.49223825335502625, "step": 1148 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3931.0, "completions/mean_length": 608.2567138671875, "completions/mean_terminated_length": 560.9118041992188, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 0.10723973190067025, "grad_norm": 0.12107432633638382, "learning_rate": 2e-07, "loss": 0.022, "num_tokens": 738318668.0, "reward": 0.6037946939468384, "reward_std": 0.1696154773235321, "rewards/simpleverify_reward/mean": 0.6037946343421936, "rewards/simpleverify_reward/std": 0.48938122391700745, "step": 1149 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3872.0, "completions/mean_length": 601.7455444335938, "completions/mean_terminated_length": 554.312255859375, "completions/min_length": 158.0, "completions/min_terminated_length": 158.0, "epoch": 0.10733306500067083, "grad_norm": 0.1175415962934494, "learning_rate": 2e-07, "loss": 0.0283, "num_tokens": 738953376.0, "reward": 0.640625, "reward_std": 0.16401740908622742, "rewards/simpleverify_reward/mean": 0.640625, "rewards/simpleverify_reward/std": 0.48008525371551514, "step": 1150 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4037.0, "completions/mean_length": 655.0033569335938, "completions/mean_terminated_length": 588.4539184570312, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 0.10742639810067142, "grad_norm": 0.12616229057312012, "learning_rate": 2e-07, "loss": 0.0272, "num_tokens": 739641867.0, "reward": 0.5, "reward_std": 0.20264378190040588, "rewards/simpleverify_reward/mean": 0.5, "rewards/simpleverify_reward/std": 0.5002792477607727, "step": 1151 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3238.0, "completions/mean_length": 700.8828735351562, "completions/mean_terminated_length": 627.328369140625, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 0.107519731200672, "grad_norm": 0.11618806421756744, "learning_rate": 2e-07, "loss": 0.0221, "num_tokens": 740362610.0, "reward": 0.5502232313156128, "reward_std": 0.19866107404232025, "rewards/simpleverify_reward/mean": 0.5502232313156128, "rewards/simpleverify_reward/std": 0.49774909019470215, "step": 1152 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2216.0, "completions/mean_length": 622.9140625, "completions/mean_terminated_length": 551.7118530273438, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 0.10761306430067258, "grad_norm": 0.12501151859760284, "learning_rate": 2e-07, "loss": 0.0132, "num_tokens": 741013309.0, "reward": 0.5803571939468384, "reward_std": 0.1755894124507904, "rewards/simpleverify_reward/mean": 0.5803571343421936, "rewards/simpleverify_reward/std": 0.4937761127948761, "step": 1153 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2793.0, "completions/mean_length": 645.8326416015625, "completions/mean_terminated_length": 587.0897216796875, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 0.10770639740067317, "grad_norm": 0.10696237534284592, "learning_rate": 2e-07, "loss": 0.0168, "num_tokens": 741677207.0, "reward": 0.5502232313156128, "reward_std": 0.15706273913383484, "rewards/simpleverify_reward/mean": 0.5502232313156128, "rewards/simpleverify_reward/std": 0.49774909019470215, "step": 1154 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3943.0, "completions/mean_length": 643.3136596679688, "completions/mean_terminated_length": 588.5090942382812, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 0.10779973050067375, "grad_norm": 0.12219742685556412, "learning_rate": 2e-07, "loss": 0.0291, "num_tokens": 742349168.0, "reward": 0.6071428656578064, "reward_std": 0.1766085922718048, "rewards/simpleverify_reward/mean": 0.6071428656578064, "rewards/simpleverify_reward/std": 0.48865827918052673, "step": 1155 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3853.0, "completions/mean_length": 647.3705444335938, "completions/mean_terminated_length": 600.5565795898438, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 0.10789306360067433, "grad_norm": 0.10916987806558609, "learning_rate": 2e-07, "loss": 0.011, "num_tokens": 743021196.0, "reward": 0.5569196939468384, "reward_std": 0.17363843321800232, "rewards/simpleverify_reward/mean": 0.5569196343421936, "rewards/simpleverify_reward/std": 0.4970270097255707, "step": 1156 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3709.0, "completions/mean_length": 621.5926513671875, "completions/mean_terminated_length": 570.4405517578125, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 0.10798639670067492, "grad_norm": 0.12731987237930298, "learning_rate": 2e-07, "loss": 0.0235, "num_tokens": 743662783.0, "reward": 0.6071428656578064, "reward_std": 0.20467214286327362, "rewards/simpleverify_reward/mean": 0.6071428656578064, "rewards/simpleverify_reward/std": 0.48865827918052673, "step": 1157 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3266.0, "completions/mean_length": 653.3627319335938, "completions/mean_terminated_length": 582.7847900390625, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 0.1080797298006755, "grad_norm": 0.12102968990802765, "learning_rate": 2e-07, "loss": 0.0443, "num_tokens": 744332948.0, "reward": 0.6640625, "reward_std": 0.21214745938777924, "rewards/simpleverify_reward/mean": 0.6640625, "rewards/simpleverify_reward/std": 0.4725809693336487, "step": 1158 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2052.0, "completions/mean_length": 589.099365234375, "completions/mean_terminated_length": 553.5162963867188, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 0.10817306290067608, "grad_norm": 0.12138650566339493, "learning_rate": 2e-07, "loss": 0.0197, "num_tokens": 744951341.0, "reward": 0.5602678656578064, "reward_std": 0.1867111176252365, "rewards/simpleverify_reward/mean": 0.5602678656578064, "rewards/simpleverify_reward/std": 0.4966317415237427, "step": 1159 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.024553571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3101.0, "completions/mean_length": 658.661865234375, "completions/mean_terminated_length": 572.138427734375, "completions/min_length": 117.0, "completions/min_terminated_length": 117.0, "epoch": 0.10826639600067667, "grad_norm": 0.1365383118391037, "learning_rate": 2e-07, "loss": 0.0406, "num_tokens": 745637310.0, "reward": 0.5424107313156128, "reward_std": 0.21466590464115143, "rewards/simpleverify_reward/mean": 0.5424107313156128, "rewards/simpleverify_reward/std": 0.4984763562679291, "step": 1160 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 1863.0, "completions/mean_length": 613.4944458007812, "completions/mean_terminated_length": 574.1884765625, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 0.10835972910067725, "grad_norm": 0.14086824655532837, "learning_rate": 2e-07, "loss": 0.0308, "num_tokens": 746282417.0, "reward": 0.6037946939468384, "reward_std": 0.22485215961933136, "rewards/simpleverify_reward/mean": 0.6037946343421936, "rewards/simpleverify_reward/std": 0.48938122391700745, "step": 1161 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3617.0, "completions/mean_length": 665.8449096679688, "completions/mean_terminated_length": 615.34423828125, "completions/min_length": 112.0, "completions/min_terminated_length": 112.0, "epoch": 0.10845306220067784, "grad_norm": 0.1228458434343338, "learning_rate": 2e-07, "loss": 0.018, "num_tokens": 746975790.0, "reward": 0.5658482313156128, "reward_std": 0.2022647112607956, "rewards/simpleverify_reward/mean": 0.5658482313156128, "rewards/simpleverify_reward/std": 0.49592188000679016, "step": 1162 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3676.0, "completions/mean_length": 605.9252319335938, "completions/mean_terminated_length": 554.54248046875, "completions/min_length": 117.0, "completions/min_terminated_length": 117.0, "epoch": 0.10854639530067842, "grad_norm": 0.119713194668293, "learning_rate": 2e-07, "loss": 0.0236, "num_tokens": 747599595.0, "reward": 0.6205357313156128, "reward_std": 0.16116195917129517, "rewards/simpleverify_reward/mean": 0.6205357313156128, "rewards/simpleverify_reward/std": 0.4855247139930725, "step": 1163 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.006696428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3925.0, "completions/mean_length": 630.734375, "completions/mean_terminated_length": 607.373046875, "completions/min_length": 166.0, "completions/min_terminated_length": 166.0, "epoch": 0.108639728400679, "grad_norm": 0.12448924779891968, "learning_rate": 2e-07, "loss": 0.0226, "num_tokens": 748260181.0, "reward": 0.5580357313156128, "reward_std": 0.1784769594669342, "rewards/simpleverify_reward/mean": 0.5580357313156128, "rewards/simpleverify_reward/std": 0.49689781665802, "step": 1164 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2067.0, "completions/mean_length": 636.8917846679688, "completions/mean_terminated_length": 581.9852905273438, "completions/min_length": 119.0, "completions/min_terminated_length": 119.0, "epoch": 0.10873306150067959, "grad_norm": 0.126312717795372, "learning_rate": 2e-07, "loss": 0.0262, "num_tokens": 748917052.0, "reward": 0.6149553656578064, "reward_std": 0.20531335473060608, "rewards/simpleverify_reward/mean": 0.6149553656578064, "rewards/simpleverify_reward/std": 0.4868776500225067, "step": 1165 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 4013.0, "completions/mean_length": 648.7723388671875, "completions/mean_terminated_length": 605.9254150390625, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 0.10882639460068017, "grad_norm": 0.12156440317630768, "learning_rate": 2e-07, "loss": 0.0297, "num_tokens": 749586280.0, "reward": 0.5803571939468384, "reward_std": 0.18937748670578003, "rewards/simpleverify_reward/mean": 0.5803571343421936, "rewards/simpleverify_reward/std": 0.4937761425971985, "step": 1166 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.006696428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3304.0, "completions/mean_length": 564.5636596679688, "completions/mean_terminated_length": 540.7561645507812, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 0.10891972770068074, "grad_norm": 0.12063419073820114, "learning_rate": 2e-07, "loss": 0.0316, "num_tokens": 750176945.0, "reward": 0.6082589626312256, "reward_std": 0.18381401896476746, "rewards/simpleverify_reward/mean": 0.6082589030265808, "rewards/simpleverify_reward/std": 0.4884119927883148, "step": 1167 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3463.0, "completions/mean_length": 646.0904541015625, "completions/mean_terminated_length": 595.2989501953125, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 0.10901306080068134, "grad_norm": 0.11393984407186508, "learning_rate": 2e-07, "loss": 0.0508, "num_tokens": 750850418.0, "reward": 0.566964328289032, "reward_std": 0.20177824795246124, "rewards/simpleverify_reward/mean": 0.5669642686843872, "rewards/simpleverify_reward/std": 0.49577224254608154, "step": 1168 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3680.0, "completions/mean_length": 642.4263916015625, "completions/mean_terminated_length": 583.6254272460938, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 0.10910639390068191, "grad_norm": 0.11879995465278625, "learning_rate": 2e-07, "loss": 0.0149, "num_tokens": 751516424.0, "reward": 0.546875, "reward_std": 0.19618089497089386, "rewards/simpleverify_reward/mean": 0.546875, "rewards/simpleverify_reward/std": 0.4980759024620056, "step": 1169 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3082.0, "completions/mean_length": 645.8582763671875, "completions/mean_terminated_length": 599.0238037109375, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 0.10919972700068249, "grad_norm": 0.12567496299743652, "learning_rate": 2e-07, "loss": 0.0402, "num_tokens": 752178729.0, "reward": 0.5546875, "reward_std": 0.188890740275383, "rewards/simpleverify_reward/mean": 0.5546875, "rewards/simpleverify_reward/std": 0.4972778558731079, "step": 1170 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3383.0, "completions/mean_length": 607.8471069335938, "completions/mean_terminated_length": 572.4542846679688, "completions/min_length": 3.0, "completions/min_terminated_length": 3.0, "epoch": 0.10929306010068308, "grad_norm": 0.12694984674453735, "learning_rate": 2e-07, "loss": 0.0095, "num_tokens": 752807832.0, "reward": 0.6149553656578064, "reward_std": 0.17975644767284393, "rewards/simpleverify_reward/mean": 0.6149553656578064, "rewards/simpleverify_reward/std": 0.4868776500225067, "step": 1171 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 4000.0, "completions/mean_length": 613.1049194335938, "completions/mean_terminated_length": 557.8208618164062, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 0.10938639320068366, "grad_norm": 0.11755306273698807, "learning_rate": 2e-07, "loss": 0.0354, "num_tokens": 753443846.0, "reward": 0.5680803656578064, "reward_std": 0.1698736995458603, "rewards/simpleverify_reward/mean": 0.5680803656578064, "rewards/simpleverify_reward/std": 0.4956200420856476, "step": 1172 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2507.0, "completions/mean_length": 609.864990234375, "completions/mean_terminated_length": 558.5401611328125, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 0.10947972630068425, "grad_norm": 0.1304589956998825, "learning_rate": 2e-07, "loss": 0.0238, "num_tokens": 754073021.0, "reward": 0.6205357313156128, "reward_std": 0.1799500733613968, "rewards/simpleverify_reward/mean": 0.6205357313156128, "rewards/simpleverify_reward/std": 0.4855247139930725, "step": 1173 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2680.0, "completions/mean_length": 610.6105346679688, "completions/mean_terminated_length": 575.2457275390625, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 0.10957305940068483, "grad_norm": 0.12990857660770416, "learning_rate": 2e-07, "loss": 0.0297, "num_tokens": 754724024.0, "reward": 0.6194196939468384, "reward_std": 0.18532174825668335, "rewards/simpleverify_reward/mean": 0.6194196343421936, "rewards/simpleverify_reward/std": 0.48580074310302734, "step": 1174 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.004464285714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2250.0, "completions/mean_length": 524.4420166015625, "completions/mean_terminated_length": 508.426025390625, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 0.10966639250068541, "grad_norm": 0.14154276251792908, "learning_rate": 2e-07, "loss": 0.0087, "num_tokens": 755279740.0, "reward": 0.6595982313156128, "reward_std": 0.1925688087940216, "rewards/simpleverify_reward/mean": 0.6595982313156128, "rewards/simpleverify_reward/std": 0.4741089344024658, "step": 1175 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2777.0, "completions/mean_length": 654.661865234375, "completions/mean_terminated_length": 603.99658203125, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 0.109759725600686, "grad_norm": 0.09743129462003708, "learning_rate": 2e-07, "loss": 0.0316, "num_tokens": 755964413.0, "reward": 0.5580357313156128, "reward_std": 0.14289532601833344, "rewards/simpleverify_reward/mean": 0.5580357313156128, "rewards/simpleverify_reward/std": 0.49689781665802, "step": 1176 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3464.0, "completions/mean_length": 691.1038208007812, "completions/mean_terminated_length": 613.3663940429688, "completions/min_length": 173.0, "completions/min_terminated_length": 173.0, "epoch": 0.10985305870068658, "grad_norm": 0.10793482512235641, "learning_rate": 2e-07, "loss": 0.0239, "num_tokens": 756683194.0, "reward": 0.5234375, "reward_std": 0.16078400611877441, "rewards/simpleverify_reward/mean": 0.5234375, "rewards/simpleverify_reward/std": 0.49972933530807495, "step": 1177 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 4049.0, "completions/mean_length": 549.8203125, "completions/mean_terminated_length": 505.7434997558594, "completions/min_length": 122.0, "completions/min_terminated_length": 122.0, "epoch": 0.10994639180068716, "grad_norm": 0.1368190199136734, "learning_rate": 2e-07, "loss": 0.0194, "num_tokens": 757263657.0, "reward": 0.6707589626312256, "reward_std": 0.1413230001926422, "rewards/simpleverify_reward/mean": 0.6707589030265808, "rewards/simpleverify_reward/std": 0.4702001214027405, "step": 1178 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 4064.0, "completions/mean_length": 656.9944458007812, "completions/mean_terminated_length": 598.4415893554688, "completions/min_length": 167.0, "completions/min_terminated_length": 167.0, "epoch": 0.11003972490068775, "grad_norm": 0.12319916486740112, "learning_rate": 2e-07, "loss": 0.0497, "num_tokens": 757944580.0, "reward": 0.6104910969734192, "reward_std": 0.2009081095457077, "rewards/simpleverify_reward/mean": 0.6104910969734192, "rewards/simpleverify_reward/std": 0.48791125416755676, "step": 1179 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.006696428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2480.0, "completions/mean_length": 577.9576416015625, "completions/mean_terminated_length": 554.240478515625, "completions/min_length": 101.0, "completions/min_terminated_length": 101.0, "epoch": 0.11013305800068833, "grad_norm": 0.13784824311733246, "learning_rate": 2e-07, "loss": 0.0373, "num_tokens": 758550646.0, "reward": 0.609375, "reward_std": 0.2085774540901184, "rewards/simpleverify_reward/mean": 0.609375, "rewards/simpleverify_reward/std": 0.48816296458244324, "step": 1180 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3026.0, "completions/mean_length": 647.3426513671875, "completions/mean_terminated_length": 604.4779663085938, "completions/min_length": 109.0, "completions/min_terminated_length": 109.0, "epoch": 0.11022639110068891, "grad_norm": 0.1248989924788475, "learning_rate": 2e-07, "loss": 0.0212, "num_tokens": 759222825.0, "reward": 0.5926339626312256, "reward_std": 0.19535532593727112, "rewards/simpleverify_reward/mean": 0.5926339030265808, "rewards/simpleverify_reward/std": 0.49161845445632935, "step": 1181 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3995.0, "completions/mean_length": 643.747802734375, "completions/mean_terminated_length": 580.9795532226562, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 0.1103197242006895, "grad_norm": 0.13336779177188873, "learning_rate": 2e-07, "loss": 0.0122, "num_tokens": 759883927.0, "reward": 0.574776828289032, "reward_std": 0.2098262757062912, "rewards/simpleverify_reward/mean": 0.5747767686843872, "rewards/simpleverify_reward/std": 0.49465295672416687, "step": 1182 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3800.0, "completions/mean_length": 659.5457763671875, "completions/mean_terminated_length": 628.5867309570312, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 0.11041305730069008, "grad_norm": 0.11741915345191956, "learning_rate": 2e-07, "loss": 0.0321, "num_tokens": 760567824.0, "reward": 0.5703125, "reward_std": 0.1953539252281189, "rewards/simpleverify_reward/mean": 0.5703125, "rewards/simpleverify_reward/std": 0.49530795216560364, "step": 1183 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2423.0, "completions/mean_length": 604.8381958007812, "completions/mean_terminated_length": 557.4468383789062, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 0.11050639040069067, "grad_norm": 0.11618225276470184, "learning_rate": 2e-07, "loss": 0.0191, "num_tokens": 761195479.0, "reward": 0.6071428656578064, "reward_std": 0.16581936180591583, "rewards/simpleverify_reward/mean": 0.6071428656578064, "rewards/simpleverify_reward/std": 0.48865827918052673, "step": 1184 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3129.0, "completions/mean_length": 594.3471069335938, "completions/mean_terminated_length": 538.7653198242188, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 0.11059972350069125, "grad_norm": 0.1329689919948578, "learning_rate": 2e-07, "loss": 0.0347, "num_tokens": 761815502.0, "reward": 0.6238839626312256, "reward_std": 0.1862974315881729, "rewards/simpleverify_reward/mean": 0.6238839030265808, "rewards/simpleverify_reward/std": 0.48468026518821716, "step": 1185 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2878.0, "completions/mean_length": 612.3995971679688, "completions/mean_terminated_length": 561.1121215820312, "completions/min_length": 160.0, "completions/min_terminated_length": 160.0, "epoch": 0.11069305660069183, "grad_norm": 0.1341446340084076, "learning_rate": 2e-07, "loss": 0.0294, "num_tokens": 762457252.0, "reward": 0.6383928656578064, "reward_std": 0.20595458149909973, "rewards/simpleverify_reward/mean": 0.6383928656578064, "rewards/simpleverify_reward/std": 0.4807341992855072, "step": 1186 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3183.0, "completions/mean_length": 655.0189819335938, "completions/mean_terminated_length": 604.3590087890625, "completions/min_length": 158.0, "completions/min_terminated_length": 158.0, "epoch": 0.11078638970069242, "grad_norm": 0.12247715145349503, "learning_rate": 2e-07, "loss": 0.0344, "num_tokens": 763131573.0, "reward": 0.566964328289032, "reward_std": 0.19201315939426422, "rewards/simpleverify_reward/mean": 0.5669642686843872, "rewards/simpleverify_reward/std": 0.49577224254608154, "step": 1187 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3826.0, "completions/mean_length": 627.3795166015625, "completions/mean_terminated_length": 576.3125610351562, "completions/min_length": 175.0, "completions/min_terminated_length": 175.0, "epoch": 0.110879722800693, "grad_norm": 0.11695588380098343, "learning_rate": 2e-07, "loss": 0.0593, "num_tokens": 763779777.0, "reward": 0.5859375, "reward_std": 0.19340254366397858, "rewards/simpleverify_reward/mean": 0.5859375, "rewards/simpleverify_reward/std": 0.4928344786167145, "step": 1188 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 2700.0, "completions/mean_length": 579.8046875, "completions/mean_terminated_length": 540.1185302734375, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 0.11097305590069358, "grad_norm": 0.13287648558616638, "learning_rate": 2e-07, "loss": 0.0281, "num_tokens": 764383330.0, "reward": 0.660714328289032, "reward_std": 0.20617099106311798, "rewards/simpleverify_reward/mean": 0.6607142686843872, "rewards/simpleverify_reward/std": 0.4737313687801361, "step": 1189 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3206.0, "completions/mean_length": 621.4475708007812, "completions/mean_terminated_length": 574.2816772460938, "completions/min_length": 161.0, "completions/min_terminated_length": 161.0, "epoch": 0.11106638900069417, "grad_norm": 0.14028112590312958, "learning_rate": 2e-07, "loss": 0.0369, "num_tokens": 765038571.0, "reward": 0.5535714626312256, "reward_std": 0.18321627378463745, "rewards/simpleverify_reward/mean": 0.5535714030265808, "rewards/simpleverify_reward/std": 0.4973994791507721, "step": 1190 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.006696428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2660.0, "completions/mean_length": 589.8671875, "completions/mean_terminated_length": 566.2303466796875, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 0.11115972210069475, "grad_norm": 0.13686317205429077, "learning_rate": 2e-07, "loss": 0.0449, "num_tokens": 765650660.0, "reward": 0.6283482313156128, "reward_std": 0.20775331556797028, "rewards/simpleverify_reward/mean": 0.6283482313156128, "rewards/simpleverify_reward/std": 0.4835159480571747, "step": 1191 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.005580357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2565.0, "completions/mean_length": 540.6975708007812, "completions/mean_terminated_length": 520.7463989257812, "completions/min_length": 116.0, "completions/min_terminated_length": 116.0, "epoch": 0.11125305520069532, "grad_norm": 0.130348801612854, "learning_rate": 2e-07, "loss": 0.0299, "num_tokens": 766220429.0, "reward": 0.6305803656578064, "reward_std": 0.17401821911334991, "rewards/simpleverify_reward/mean": 0.6305803656578064, "rewards/simpleverify_reward/std": 0.4829172194004059, "step": 1192 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3351.0, "completions/mean_length": 634.0357666015625, "completions/mean_terminated_length": 594.961669921875, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 0.11134638830069592, "grad_norm": 0.12337139993906021, "learning_rate": 2e-07, "loss": 0.0255, "num_tokens": 766874165.0, "reward": 0.5926339626312256, "reward_std": 0.20418289303779602, "rewards/simpleverify_reward/mean": 0.5926339030265808, "rewards/simpleverify_reward/std": 0.49161845445632935, "step": 1193 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3859.0, "completions/mean_length": 637.8995971679688, "completions/mean_terminated_length": 594.9175415039062, "completions/min_length": 169.0, "completions/min_terminated_length": 169.0, "epoch": 0.1114397214006965, "grad_norm": 0.12425526231527328, "learning_rate": 2e-07, "loss": 0.0276, "num_tokens": 767531659.0, "reward": 0.5546875, "reward_std": 0.19497555494308472, "rewards/simpleverify_reward/mean": 0.5546875, "rewards/simpleverify_reward/std": 0.4972778558731079, "step": 1194 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2180.0, "completions/mean_length": 577.1205444335938, "completions/mean_terminated_length": 533.383056640625, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 0.11153305450069709, "grad_norm": 0.12817025184631348, "learning_rate": 2e-07, "loss": 0.0301, "num_tokens": 768136727.0, "reward": 0.6082589626312256, "reward_std": 0.16807454824447632, "rewards/simpleverify_reward/mean": 0.6082589030265808, "rewards/simpleverify_reward/std": 0.48841196298599243, "step": 1195 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 4096.0, "completions/max_terminated_length": 2704.0, "completions/mean_length": 636.1171875, "completions/mean_terminated_length": 553.0800170898438, "completions/min_length": 156.0, "completions/min_terminated_length": 156.0, "epoch": 0.11162638760069767, "grad_norm": 0.12749150395393372, "learning_rate": 2e-07, "loss": 0.0429, "num_tokens": 768790192.0, "reward": 0.5970982313156128, "reward_std": 0.18314141035079956, "rewards/simpleverify_reward/mean": 0.5970982313156128, "rewards/simpleverify_reward/std": 0.4907552897930145, "step": 1196 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3677.0, "completions/mean_length": 616.552490234375, "completions/mean_terminated_length": 557.31103515625, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 0.11171972070069824, "grad_norm": 0.127771258354187, "learning_rate": 2e-07, "loss": 0.0424, "num_tokens": 769433079.0, "reward": 0.6026785969734192, "reward_std": 0.18554744124412537, "rewards/simpleverify_reward/mean": 0.6026785969734192, "rewards/simpleverify_reward/std": 0.48961687088012695, "step": 1197 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3912.0, "completions/mean_length": 605.8895263671875, "completions/mean_terminated_length": 554.5062255859375, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 0.11181305380069884, "grad_norm": 0.12312200665473938, "learning_rate": 2e-07, "loss": 0.0231, "num_tokens": 770068756.0, "reward": 0.5915178656578064, "reward_std": 0.17600379884243011, "rewards/simpleverify_reward/mean": 0.5915178656578064, "rewards/simpleverify_reward/std": 0.49182769656181335, "step": 1198 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3649.0, "completions/mean_length": 608.2935791015625, "completions/mean_terminated_length": 536.7915649414062, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 0.11190638690069941, "grad_norm": 0.12863563001155853, "learning_rate": 2e-07, "loss": 0.0282, "num_tokens": 770708283.0, "reward": 0.640625, "reward_std": 0.16871324181556702, "rewards/simpleverify_reward/mean": 0.640625, "rewards/simpleverify_reward/std": 0.48008525371551514, "step": 1199 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3022.0, "completions/mean_length": 581.6451416015625, "completions/mean_terminated_length": 553.9730224609375, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 0.11199972000069999, "grad_norm": 0.14044499397277832, "learning_rate": 2e-07, "loss": 0.0315, "num_tokens": 771324021.0, "reward": 0.6026785969734192, "reward_std": 0.1958431601524353, "rewards/simpleverify_reward/mean": 0.6026785969734192, "rewards/simpleverify_reward/std": 0.48961687088012695, "step": 1200 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3677.0, "completions/mean_length": 598.5703125, "completions/mean_terminated_length": 563.0833740234375, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 0.11209305310070058, "grad_norm": 0.12249316275119781, "learning_rate": 2e-07, "loss": 0.0296, "num_tokens": 771944652.0, "reward": 0.6227678656578064, "reward_std": 0.16717232763767242, "rewards/simpleverify_reward/mean": 0.6227678656578064, "rewards/simpleverify_reward/std": 0.4849644899368286, "step": 1201 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 4024.0, "completions/mean_length": 618.7924194335938, "completions/mean_terminated_length": 559.589111328125, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 0.11218638620070116, "grad_norm": 0.12598787248134613, "learning_rate": 2e-07, "loss": 0.0185, "num_tokens": 772582082.0, "reward": 0.609375, "reward_std": 0.18441949784755707, "rewards/simpleverify_reward/mean": 0.609375, "rewards/simpleverify_reward/std": 0.48816296458244324, "step": 1202 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2726.0, "completions/mean_length": 607.8482666015625, "completions/mean_terminated_length": 552.480712890625, "completions/min_length": 105.0, "completions/min_terminated_length": 105.0, "epoch": 0.11227971930070174, "grad_norm": 0.12348093092441559, "learning_rate": 2e-07, "loss": 0.0219, "num_tokens": 773211210.0, "reward": 0.613839328289032, "reward_std": 0.18367022275924683, "rewards/simpleverify_reward/mean": 0.6138392686843872, "rewards/simpleverify_reward/std": 0.48714008927345276, "step": 1203 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 2776.0, "completions/mean_length": 620.3995971679688, "completions/mean_terminated_length": 581.1715698242188, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 0.11237305240070233, "grad_norm": 0.12423436343669891, "learning_rate": 2e-07, "loss": 0.0154, "num_tokens": 773847960.0, "reward": 0.629464328289032, "reward_std": 0.1848759800195694, "rewards/simpleverify_reward/mean": 0.6294642686843872, "rewards/simpleverify_reward/std": 0.4832179844379425, "step": 1204 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3008.0, "completions/mean_length": 612.247802734375, "completions/mean_terminated_length": 584.816650390625, "completions/min_length": 86.0, "completions/min_terminated_length": 86.0, "epoch": 0.11246638550070291, "grad_norm": 0.13648225367069244, "learning_rate": 2e-07, "loss": 0.0341, "num_tokens": 774481574.0, "reward": 0.6305803656578064, "reward_std": 0.2194698005914688, "rewards/simpleverify_reward/mean": 0.6305803656578064, "rewards/simpleverify_reward/std": 0.4829172194004059, "step": 1205 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2955.0, "completions/mean_length": 593.9442138671875, "completions/mean_terminated_length": 558.4103393554688, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 0.1125597186007035, "grad_norm": 0.13137632608413696, "learning_rate": 2e-07, "loss": 0.0237, "num_tokens": 775096340.0, "reward": 0.6774553656578064, "reward_std": 0.18832579255104065, "rewards/simpleverify_reward/mean": 0.6774553656578064, "rewards/simpleverify_reward/std": 0.4677111804485321, "step": 1206 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2858.0, "completions/mean_length": 660.5145263671875, "completions/mean_terminated_length": 625.6561279296875, "completions/min_length": 121.0, "completions/min_terminated_length": 121.0, "epoch": 0.11265305170070408, "grad_norm": 0.12305217236280441, "learning_rate": 2e-07, "loss": 0.0151, "num_tokens": 775772457.0, "reward": 0.5770089626312256, "reward_std": 0.1883269101381302, "rewards/simpleverify_reward/mean": 0.5770089030265808, "rewards/simpleverify_reward/std": 0.4943099319934845, "step": 1207 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3458.0, "completions/mean_length": 613.4252319335938, "completions/mean_terminated_length": 570.1389770507812, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 0.11274638480070466, "grad_norm": 0.12265150994062424, "learning_rate": 2e-07, "loss": 0.0395, "num_tokens": 776409614.0, "reward": 0.5959821939468384, "reward_std": 0.19234944880008698, "rewards/simpleverify_reward/mean": 0.5959821343421936, "rewards/simpleverify_reward/std": 0.490975022315979, "step": 1208 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2141.0, "completions/mean_length": 578.7176513671875, "completions/mean_terminated_length": 535.0, "completions/min_length": 158.0, "completions/min_terminated_length": 158.0, "epoch": 0.11283971790070525, "grad_norm": 0.12379486858844757, "learning_rate": 2e-07, "loss": 0.0394, "num_tokens": 777016841.0, "reward": 0.6484375, "reward_std": 0.17506060004234314, "rewards/simpleverify_reward/mean": 0.6484375, "rewards/simpleverify_reward/std": 0.4777248501777649, "step": 1209 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3891.0, "completions/mean_length": 647.5859375, "completions/mean_terminated_length": 592.8492431640625, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 0.11293305100070583, "grad_norm": 0.12186974287033081, "learning_rate": 2e-07, "loss": 0.0284, "num_tokens": 777696110.0, "reward": 0.5915178656578064, "reward_std": 0.1803198605775833, "rewards/simpleverify_reward/mean": 0.5915178656578064, "rewards/simpleverify_reward/std": 0.49182769656181335, "step": 1210 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3111.0, "completions/mean_length": 661.5089721679688, "completions/mean_terminated_length": 630.5675659179688, "completions/min_length": 169.0, "completions/min_terminated_length": 169.0, "epoch": 0.11302638410070641, "grad_norm": 0.12309057265520096, "learning_rate": 2e-07, "loss": 0.0064, "num_tokens": 778375878.0, "reward": 0.5546875, "reward_std": 0.18419378995895386, "rewards/simpleverify_reward/mean": 0.5546875, "rewards/simpleverify_reward/std": 0.4972778558731079, "step": 1211 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3875.0, "completions/mean_length": 635.6842041015625, "completions/mean_terminated_length": 580.7584838867188, "completions/min_length": 92.0, "completions/min_terminated_length": 92.0, "epoch": 0.113119717200707, "grad_norm": 0.12913279235363007, "learning_rate": 2e-07, "loss": 0.0282, "num_tokens": 779033139.0, "reward": 0.6305803656578064, "reward_std": 0.19355227053165436, "rewards/simpleverify_reward/mean": 0.6305803656578064, "rewards/simpleverify_reward/std": 0.4829172194004059, "step": 1212 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3138.0, "completions/mean_length": 601.3705444335938, "completions/mean_terminated_length": 549.9207153320312, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 0.11321305030070758, "grad_norm": 0.12138450890779495, "learning_rate": 2e-07, "loss": 0.0289, "num_tokens": 779660759.0, "reward": 0.5926339626312256, "reward_std": 0.1639414280653, "rewards/simpleverify_reward/mean": 0.5926339030265808, "rewards/simpleverify_reward/std": 0.49161845445632935, "step": 1213 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 4019.0, "completions/mean_length": 655.28125, "completions/mean_terminated_length": 612.5152587890625, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 0.11330638340070817, "grad_norm": 0.12158390879631042, "learning_rate": 2e-07, "loss": 0.0211, "num_tokens": 780335675.0, "reward": 0.5948660969734192, "reward_std": 0.21016255021095276, "rewards/simpleverify_reward/mean": 0.5948660969734192, "rewards/simpleverify_reward/std": 0.491192102432251, "step": 1214 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3375.0, "completions/mean_length": 637.3616333007812, "completions/mean_terminated_length": 586.441650390625, "completions/min_length": 159.0, "completions/min_terminated_length": 159.0, "epoch": 0.11339971650070875, "grad_norm": 0.12108080834150314, "learning_rate": 2e-07, "loss": 0.024, "num_tokens": 781003295.0, "reward": 0.6082589626312256, "reward_std": 0.20031191408634186, "rewards/simpleverify_reward/mean": 0.6082589030265808, "rewards/simpleverify_reward/std": 0.4884119927883148, "step": 1215 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0033482142857143016, "completions/max_length": 4096.0, "completions/max_terminated_length": 3947.0, "completions/mean_length": 597.5703125, "completions/mean_terminated_length": 585.8175048828125, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 0.11349304960070933, "grad_norm": 0.10842233896255493, "learning_rate": 2e-07, "loss": 0.0135, "num_tokens": 781621998.0, "reward": 0.5926339626312256, "reward_std": 0.14883995056152344, "rewards/simpleverify_reward/mean": 0.5926339030265808, "rewards/simpleverify_reward/std": 0.49161845445632935, "step": 1216 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3222.0, "completions/mean_length": 703.9319458007812, "completions/mean_terminated_length": 630.4435424804688, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 0.11358638270070992, "grad_norm": 0.10779372602701187, "learning_rate": 2e-07, "loss": 0.0156, "num_tokens": 782334953.0, "reward": 0.5803571939468384, "reward_std": 0.1803530752658844, "rewards/simpleverify_reward/mean": 0.5803571343421936, "rewards/simpleverify_reward/std": 0.4937761425971985, "step": 1217 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2531.0, "completions/mean_length": 627.7600708007812, "completions/mean_terminated_length": 592.5693359375, "completions/min_length": 157.0, "completions/min_terminated_length": 157.0, "epoch": 0.1136797158007105, "grad_norm": 0.12111151963472366, "learning_rate": 2e-07, "loss": 0.0193, "num_tokens": 782977618.0, "reward": 0.5948660969734192, "reward_std": 0.1756347119808197, "rewards/simpleverify_reward/mean": 0.5948660969734192, "rewards/simpleverify_reward/std": 0.49119213223457336, "step": 1218 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.006696428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2718.0, "completions/mean_length": 614.1395263671875, "completions/mean_terminated_length": 590.6663208007812, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 0.11377304890071108, "grad_norm": 0.13654902577400208, "learning_rate": 2e-07, "loss": 0.029, "num_tokens": 783614799.0, "reward": 0.6238839626312256, "reward_std": 0.22477297484874725, "rewards/simpleverify_reward/mean": 0.6238839030265808, "rewards/simpleverify_reward/std": 0.48468026518821716, "step": 1219 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3300.0, "completions/mean_length": 592.8973388671875, "completions/mean_terminated_length": 561.3378295898438, "completions/min_length": 169.0, "completions/min_terminated_length": 169.0, "epoch": 0.11386638200071167, "grad_norm": 0.12279467284679413, "learning_rate": 2e-07, "loss": 0.0096, "num_tokens": 784227667.0, "reward": 0.6238839626312256, "reward_std": 0.14841923117637634, "rewards/simpleverify_reward/mean": 0.6238839030265808, "rewards/simpleverify_reward/std": 0.4846802353858948, "step": 1220 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2467.0, "completions/mean_length": 639.5491333007812, "completions/mean_terminated_length": 584.684814453125, "completions/min_length": 105.0, "completions/min_terminated_length": 105.0, "epoch": 0.11395971510071225, "grad_norm": 0.11967703700065613, "learning_rate": 2e-07, "loss": 0.0395, "num_tokens": 784890879.0, "reward": 0.5870535969734192, "reward_std": 0.19696973264217377, "rewards/simpleverify_reward/mean": 0.5870535969734192, "rewards/simpleverify_reward/std": 0.49263837933540344, "step": 1221 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 1794.0, "completions/mean_length": 638.036865234375, "completions/mean_terminated_length": 606.884033203125, "completions/min_length": 133.0, "completions/min_terminated_length": 133.0, "epoch": 0.11405304820071283, "grad_norm": 0.11740057915449142, "learning_rate": 2e-07, "loss": 0.0083, "num_tokens": 785550216.0, "reward": 0.566964328289032, "reward_std": 0.18013553321361542, "rewards/simpleverify_reward/mean": 0.5669642686843872, "rewards/simpleverify_reward/std": 0.49577224254608154, "step": 1222 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2647.0, "completions/mean_length": 627.6283569335938, "completions/mean_terminated_length": 576.5651245117188, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 0.11414638130071342, "grad_norm": 0.12653380632400513, "learning_rate": 2e-07, "loss": 0.0274, "num_tokens": 786204579.0, "reward": 0.6283482313156128, "reward_std": 0.18201345205307007, "rewards/simpleverify_reward/mean": 0.6283482313156128, "rewards/simpleverify_reward/std": 0.4835159480571747, "step": 1223 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3244.0, "completions/mean_length": 634.6350708007812, "completions/mean_terminated_length": 579.6927490234375, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 0.114239714400714, "grad_norm": 0.12150687724351883, "learning_rate": 2e-07, "loss": 0.0345, "num_tokens": 786856108.0, "reward": 0.6350446939468384, "reward_std": 0.18667720258235931, "rewards/simpleverify_reward/mean": 0.6350446343421936, "rewards/simpleverify_reward/std": 0.4816865026950836, "step": 1224 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2662.0, "completions/mean_length": 556.9642944335938, "completions/mean_terminated_length": 500.78912353515625, "completions/min_length": 157.0, "completions/min_terminated_length": 157.0, "epoch": 0.11433304750071459, "grad_norm": 0.1225874200463295, "learning_rate": 2e-07, "loss": -0.0031, "num_tokens": 787440700.0, "reward": 0.6540178656578064, "reward_std": 0.14124701917171478, "rewards/simpleverify_reward/mean": 0.6540178656578064, "rewards/simpleverify_reward/std": 0.475953072309494, "step": 1225 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3254.0, "completions/mean_length": 627.880615234375, "completions/mean_terminated_length": 584.7740478515625, "completions/min_length": 161.0, "completions/min_terminated_length": 161.0, "epoch": 0.11442638060071517, "grad_norm": 0.16105642914772034, "learning_rate": 2e-07, "loss": 0.0124, "num_tokens": 788085169.0, "reward": 0.6328125, "reward_std": 0.15244358777999878, "rewards/simpleverify_reward/mean": 0.6328125, "rewards/simpleverify_reward/std": 0.48230743408203125, "step": 1226 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3840.0, "completions/mean_length": 670.1517944335938, "completions/mean_terminated_length": 599.9180297851562, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 0.11451971370071574, "grad_norm": 0.11227365583181381, "learning_rate": 2e-07, "loss": 0.0353, "num_tokens": 788768481.0, "reward": 0.6227678656578064, "reward_std": 0.1942266821861267, "rewards/simpleverify_reward/mean": 0.6227678656578064, "rewards/simpleverify_reward/std": 0.4849644601345062, "step": 1227 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.006696428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2993.0, "completions/mean_length": 573.2801513671875, "completions/mean_terminated_length": 549.531494140625, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 0.11461304680071634, "grad_norm": 0.13222280144691467, "learning_rate": 2e-07, "loss": 0.0288, "num_tokens": 789364380.0, "reward": 0.645089328289032, "reward_std": 0.17074385285377502, "rewards/simpleverify_reward/mean": 0.6450892686843872, "rewards/simpleverify_reward/std": 0.4787535071372986, "step": 1228 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 4086.0, "completions/mean_length": 705.7813110351562, "completions/mean_terminated_length": 663.6429443359375, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 0.11470637990071691, "grad_norm": 0.1152879074215889, "learning_rate": 2e-07, "loss": 0.0291, "num_tokens": 790088096.0, "reward": 0.5859375, "reward_std": 0.1959180384874344, "rewards/simpleverify_reward/mean": 0.5859375, "rewards/simpleverify_reward/std": 0.4928344786167145, "step": 1229 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2853.0, "completions/mean_length": 629.7567138671875, "completions/mean_terminated_length": 578.7247924804688, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 0.1147997130007175, "grad_norm": 0.11445927619934082, "learning_rate": 2e-07, "loss": 0.0197, "num_tokens": 790745118.0, "reward": 0.598214328289032, "reward_std": 0.17630550265312195, "rewards/simpleverify_reward/mean": 0.5982142686843872, "rewards/simpleverify_reward/std": 0.49053290486335754, "step": 1230 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2886.0, "completions/mean_length": 642.8861694335938, "completions/mean_terminated_length": 592.0475463867188, "completions/min_length": 113.0, "completions/min_terminated_length": 113.0, "epoch": 0.11489304610071809, "grad_norm": 0.12012942135334015, "learning_rate": 2e-07, "loss": 0.0222, "num_tokens": 791416952.0, "reward": 0.5256696939468384, "reward_std": 0.18524505198001862, "rewards/simpleverify_reward/mean": 0.5256696343421936, "rewards/simpleverify_reward/std": 0.4996195137500763, "step": 1231 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.006696428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2486.0, "completions/mean_length": 585.2767944335938, "completions/mean_terminated_length": 561.6090087890625, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 0.11498637920071866, "grad_norm": 0.138370081782341, "learning_rate": 2e-07, "loss": 0.0345, "num_tokens": 792023232.0, "reward": 0.6729910969734192, "reward_std": 0.21718069911003113, "rewards/simpleverify_reward/mean": 0.6729910969734192, "rewards/simpleverify_reward/std": 0.46938255429267883, "step": 1232 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 2369.0, "completions/mean_length": 614.0011596679688, "completions/mean_terminated_length": 574.700927734375, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 0.11507971230071924, "grad_norm": 0.12456195056438446, "learning_rate": 2e-07, "loss": 0.0067, "num_tokens": 792668377.0, "reward": 0.59375, "reward_std": 0.1817506104707718, "rewards/simpleverify_reward/mean": 0.59375, "rewards/simpleverify_reward/std": 0.4914066195487976, "step": 1233 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3316.0, "completions/mean_length": 618.9174194335938, "completions/mean_terminated_length": 571.7172241210938, "completions/min_length": 158.0, "completions/min_terminated_length": 158.0, "epoch": 0.11517304540071983, "grad_norm": 0.11249042302370071, "learning_rate": 2e-07, "loss": 0.0305, "num_tokens": 793301991.0, "reward": 0.6037946939468384, "reward_std": 0.17505809664726257, "rewards/simpleverify_reward/mean": 0.6037946343421936, "rewards/simpleverify_reward/std": 0.48938122391700745, "step": 1234 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 4023.0, "completions/mean_length": 569.6027221679688, "completions/mean_terminated_length": 529.8013916015625, "completions/min_length": 114.0, "completions/min_terminated_length": 114.0, "epoch": 0.11526637850072041, "grad_norm": 0.1373307704925537, "learning_rate": 2e-07, "loss": 0.0228, "num_tokens": 793889531.0, "reward": 0.6227678656578064, "reward_std": 0.16769658029079437, "rewards/simpleverify_reward/mean": 0.6227678656578064, "rewards/simpleverify_reward/std": 0.4849644899368286, "step": 1235 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3418.0, "completions/mean_length": 650.802490234375, "completions/mean_terminated_length": 611.9176025390625, "completions/min_length": 106.0, "completions/min_terminated_length": 106.0, "epoch": 0.115359711600721, "grad_norm": 0.12202195078134537, "learning_rate": 2e-07, "loss": 0.0363, "num_tokens": 794564818.0, "reward": 0.5658482313156128, "reward_std": 0.19914822280406952, "rewards/simpleverify_reward/mean": 0.5658482313156128, "rewards/simpleverify_reward/std": 0.49592188000679016, "step": 1236 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2644.0, "completions/mean_length": 619.364990234375, "completions/mean_terminated_length": 576.1525268554688, "completions/min_length": 159.0, "completions/min_terminated_length": 159.0, "epoch": 0.11545304470072158, "grad_norm": 0.12383969873189926, "learning_rate": 2e-07, "loss": 0.0346, "num_tokens": 795215193.0, "reward": 0.5881696939468384, "reward_std": 0.18787044286727905, "rewards/simpleverify_reward/mean": 0.5881696343421936, "rewards/simpleverify_reward/std": 0.4924395978450775, "step": 1237 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3742.0, "completions/mean_length": 601.4955444335938, "completions/mean_terminated_length": 566.038330078125, "completions/min_length": 158.0, "completions/min_terminated_length": 158.0, "epoch": 0.11554637780072216, "grad_norm": 0.12782709300518036, "learning_rate": 2e-07, "loss": 0.0191, "num_tokens": 795833469.0, "reward": 0.6964285969734192, "reward_std": 0.15808232128620148, "rewards/simpleverify_reward/mean": 0.6964285969734192, "rewards/simpleverify_reward/std": 0.4600565433502197, "step": 1238 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3876.0, "completions/mean_length": 667.0647583007812, "completions/mean_terminated_length": 596.7677001953125, "completions/min_length": 170.0, "completions/min_terminated_length": 170.0, "epoch": 0.11563971090072275, "grad_norm": 0.1239503026008606, "learning_rate": 2e-07, "loss": 0.0458, "num_tokens": 796516439.0, "reward": 0.6183035969734192, "reward_std": 0.20906348526477814, "rewards/simpleverify_reward/mean": 0.6183035969734192, "rewards/simpleverify_reward/std": 0.4860740303993225, "step": 1239 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3011.0, "completions/mean_length": 659.2433471679688, "completions/mean_terminated_length": 608.6455078125, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 0.11573304400072333, "grad_norm": 0.13337722420692444, "learning_rate": 2e-07, "loss": 0.034, "num_tokens": 797200233.0, "reward": 0.606026828289032, "reward_std": 0.22871951758861542, "rewards/simpleverify_reward/mean": 0.6060267686843872, "rewards/simpleverify_reward/std": 0.48890194296836853, "step": 1240 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 2850.0, "completions/mean_length": 588.9296875, "completions/mean_terminated_length": 549.3464965820312, "completions/min_length": 121.0, "completions/min_terminated_length": 121.0, "epoch": 0.11582637710072391, "grad_norm": 0.11911056190729141, "learning_rate": 2e-07, "loss": 0.0197, "num_tokens": 797817466.0, "reward": 0.6183035969734192, "reward_std": 0.148537278175354, "rewards/simpleverify_reward/mean": 0.6183035969734192, "rewards/simpleverify_reward/std": 0.4860740303993225, "step": 1241 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3154.0, "completions/mean_length": 646.359375, "completions/mean_terminated_length": 615.2815551757812, "completions/min_length": 66.0, "completions/min_terminated_length": 66.0, "epoch": 0.1159197102007245, "grad_norm": 0.12406861782073975, "learning_rate": 2e-07, "loss": 0.0204, "num_tokens": 798477372.0, "reward": 0.640625, "reward_std": 0.2010619044303894, "rewards/simpleverify_reward/mean": 0.640625, "rewards/simpleverify_reward/std": 0.48008525371551514, "step": 1242 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2801.0, "completions/mean_length": 654.7142944335938, "completions/mean_terminated_length": 600.0906982421875, "completions/min_length": 158.0, "completions/min_terminated_length": 158.0, "epoch": 0.11601304330072508, "grad_norm": 0.11860467493534088, "learning_rate": 2e-07, "loss": 0.0472, "num_tokens": 799156556.0, "reward": 0.504464328289032, "reward_std": 0.18257686495780945, "rewards/simpleverify_reward/mean": 0.5044642686843872, "rewards/simpleverify_reward/std": 0.5002593398094177, "step": 1243 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 2404.0, "completions/mean_length": 580.9386596679688, "completions/mean_terminated_length": 553.260986328125, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 0.11610637640072566, "grad_norm": 0.14656050503253937, "learning_rate": 2e-07, "loss": 0.0253, "num_tokens": 799772741.0, "reward": 0.621651828289032, "reward_std": 0.2170708328485489, "rewards/simpleverify_reward/mean": 0.6216517686843872, "rewards/simpleverify_reward/std": 0.485245943069458, "step": 1244 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 4069.0, "completions/mean_length": 642.4609375, "completions/mean_terminated_length": 579.6693115234375, "completions/min_length": 168.0, "completions/min_terminated_length": 168.0, "epoch": 0.11619970950072625, "grad_norm": 0.12833578884601593, "learning_rate": 2e-07, "loss": 0.0261, "num_tokens": 800445898.0, "reward": 0.5848214626312256, "reward_std": 0.20072560012340546, "rewards/simpleverify_reward/mean": 0.5848214030265808, "rewards/simpleverify_reward/std": 0.49302801489830017, "step": 1245 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2584.0, "completions/mean_length": 635.872802734375, "completions/mean_terminated_length": 584.930908203125, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 0.11629304260072683, "grad_norm": 0.11817750334739685, "learning_rate": 2e-07, "loss": 0.0404, "num_tokens": 801111912.0, "reward": 0.5401785969734192, "reward_std": 0.19607071578502655, "rewards/simpleverify_reward/mean": 0.5401785969734192, "rewards/simpleverify_reward/std": 0.49866142868995667, "step": 1246 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3599.0, "completions/mean_length": 604.5870971679688, "completions/mean_terminated_length": 573.1328735351562, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 0.11638637570072742, "grad_norm": 0.12968480587005615, "learning_rate": 2e-07, "loss": 0.0431, "num_tokens": 801743046.0, "reward": 0.6272321939468384, "reward_std": 0.18434280157089233, "rewards/simpleverify_reward/mean": 0.6272321343421936, "rewards/simpleverify_reward/std": 0.4838111698627472, "step": 1247 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3991.0, "completions/mean_length": 580.4631958007812, "completions/mean_terminated_length": 536.7672729492188, "completions/min_length": 105.0, "completions/min_terminated_length": 105.0, "epoch": 0.116479708800728, "grad_norm": 0.13683609664440155, "learning_rate": 2e-07, "loss": 0.0317, "num_tokens": 802344357.0, "reward": 0.6852678656578064, "reward_std": 0.19913867115974426, "rewards/simpleverify_reward/mean": 0.6852678656578064, "rewards/simpleverify_reward/std": 0.46466848254203796, "step": 1248 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2457.0, "completions/mean_length": 646.677490234375, "completions/mean_terminated_length": 615.6024780273438, "completions/min_length": 173.0, "completions/min_terminated_length": 173.0, "epoch": 0.11657304190072858, "grad_norm": 0.11594443768262863, "learning_rate": 2e-07, "loss": 0.0311, "num_tokens": 803011420.0, "reward": 0.5948660969734192, "reward_std": 0.18558277189731598, "rewards/simpleverify_reward/mean": 0.5948660969734192, "rewards/simpleverify_reward/std": 0.49119213223457336, "step": 1249 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3979.0, "completions/mean_length": 663.4230346679688, "completions/mean_terminated_length": 593.05126953125, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 0.11666637500072917, "grad_norm": 0.1425982564687729, "learning_rate": 2e-07, "loss": 0.0447, "num_tokens": 803699247.0, "reward": 0.590401828289032, "reward_std": 0.2468361258506775, "rewards/simpleverify_reward/mean": 0.5904017686843872, "rewards/simpleverify_reward/std": 0.49203425645828247, "step": 1250 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3765.0, "completions/mean_length": 643.0535888671875, "completions/mean_terminated_length": 596.1810302734375, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 0.11675970810072975, "grad_norm": 0.12109268456697464, "learning_rate": 2e-07, "loss": 0.0132, "num_tokens": 804365431.0, "reward": 0.5736607313156128, "reward_std": 0.17844374477863312, "rewards/simpleverify_reward/mean": 0.5736607313156128, "rewards/simpleverify_reward/std": 0.4948205351829529, "step": 1251 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3518.0, "completions/mean_length": 611.4017944335938, "completions/mean_terminated_length": 556.0906982421875, "completions/min_length": 159.0, "completions/min_terminated_length": 159.0, "epoch": 0.11685304120073033, "grad_norm": 0.12880602478981018, "learning_rate": 2e-07, "loss": 0.0262, "num_tokens": 804998279.0, "reward": 0.6305803656578064, "reward_std": 0.19607001543045044, "rewards/simpleverify_reward/mean": 0.6305803656578064, "rewards/simpleverify_reward/std": 0.4829172194004059, "step": 1252 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3408.0, "completions/mean_length": 604.8638916015625, "completions/mean_terminated_length": 561.47119140625, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 0.11694637430073092, "grad_norm": 0.12838159501552582, "learning_rate": 2e-07, "loss": 0.0171, "num_tokens": 805638309.0, "reward": 0.5725446939468384, "reward_std": 0.1737472116947174, "rewards/simpleverify_reward/mean": 0.5725446343421936, "rewards/simpleverify_reward/std": 0.49498558044433594, "step": 1253 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 1993.0, "completions/mean_length": 602.7522583007812, "completions/mean_terminated_length": 547.3038330078125, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 0.1170397074007315, "grad_norm": 0.13448667526245117, "learning_rate": 2e-07, "loss": 0.0381, "num_tokens": 806274191.0, "reward": 0.621651828289032, "reward_std": 0.2128635197877884, "rewards/simpleverify_reward/mean": 0.6216517686843872, "rewards/simpleverify_reward/std": 0.4852459728717804, "step": 1254 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3222.0, "completions/mean_length": 641.8192138671875, "completions/mean_terminated_length": 575.0147705078125, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 0.11713304050073207, "grad_norm": 0.1417413353919983, "learning_rate": 2e-07, "loss": 0.045, "num_tokens": 806937861.0, "reward": 0.640625, "reward_std": 0.1669459044933319, "rewards/simpleverify_reward/mean": 0.640625, "rewards/simpleverify_reward/std": 0.48008525371551514, "step": 1255 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3649.0, "completions/mean_length": 619.638427734375, "completions/mean_terminated_length": 576.4293823242188, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 0.11722637360073267, "grad_norm": 0.11654700338840485, "learning_rate": 2e-07, "loss": 0.0094, "num_tokens": 807591433.0, "reward": 0.5524553656578064, "reward_std": 0.17329075932502747, "rewards/simpleverify_reward/mean": 0.5524553656578064, "rewards/simpleverify_reward/std": 0.49751853942871094, "step": 1256 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3601.0, "completions/mean_length": 606.2924194335938, "completions/mean_terminated_length": 550.9002075195312, "completions/min_length": 133.0, "completions/min_terminated_length": 133.0, "epoch": 0.11731970670073325, "grad_norm": 0.11060921102762222, "learning_rate": 2e-07, "loss": 0.011, "num_tokens": 808218711.0, "reward": 0.5970982313156128, "reward_std": 0.1420711725950241, "rewards/simpleverify_reward/mean": 0.5970982313156128, "rewards/simpleverify_reward/std": 0.4907552897930145, "step": 1257 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.005580357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2486.0, "completions/mean_length": 611.872802734375, "completions/mean_terminated_length": 592.321044921875, "completions/min_length": 94.0, "completions/min_terminated_length": 94.0, "epoch": 0.11741303980073384, "grad_norm": 0.12671327590942383, "learning_rate": 2e-07, "loss": 0.0311, "num_tokens": 808851493.0, "reward": 0.5792410969734192, "reward_std": 0.1884024739265442, "rewards/simpleverify_reward/mean": 0.5792410969734192, "rewards/simpleverify_reward/std": 0.49395665526390076, "step": 1258 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3228.0, "completions/mean_length": 625.3192138671875, "completions/mean_terminated_length": 582.1807861328125, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 0.11750637290073442, "grad_norm": 0.1394948661327362, "learning_rate": 2e-07, "loss": 0.0213, "num_tokens": 809499147.0, "reward": 0.546875, "reward_std": 0.22969955205917358, "rewards/simpleverify_reward/mean": 0.546875, "rewards/simpleverify_reward/std": 0.4980759024620056, "step": 1259 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 2141.0, "completions/mean_length": 631.171875, "completions/mean_terminated_length": 592.0654907226562, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 0.117599706000735, "grad_norm": 0.14241690933704376, "learning_rate": 2e-07, "loss": 0.0416, "num_tokens": 810152029.0, "reward": 0.6026785969734192, "reward_std": 0.2278876006603241, "rewards/simpleverify_reward/mean": 0.6026785969734192, "rewards/simpleverify_reward/std": 0.48961687088012695, "step": 1260 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2322.0, "completions/mean_length": 596.4542846679688, "completions/mean_terminated_length": 564.9268188476562, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 0.11769303910073559, "grad_norm": 0.1288197785615921, "learning_rate": 2e-07, "loss": 0.0204, "num_tokens": 810771692.0, "reward": 0.6205357313156128, "reward_std": 0.2029048055410385, "rewards/simpleverify_reward/mean": 0.6205357313156128, "rewards/simpleverify_reward/std": 0.4855247139930725, "step": 1261 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 2787.0, "completions/mean_length": 580.7467041015625, "completions/mean_terminated_length": 553.0675048828125, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 0.11778637220073616, "grad_norm": 0.12602661550045013, "learning_rate": 2e-07, "loss": 0.0142, "num_tokens": 811378641.0, "reward": 0.625, "reward_std": 0.18096037209033966, "rewards/simpleverify_reward/mean": 0.625, "rewards/simpleverify_reward/std": 0.48439329862594604, "step": 1262 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2158.0, "completions/mean_length": 595.1473388671875, "completions/mean_terminated_length": 563.6080932617188, "completions/min_length": 106.0, "completions/min_terminated_length": 106.0, "epoch": 0.11787970530073674, "grad_norm": 0.13474039733409882, "learning_rate": 2e-07, "loss": 0.0428, "num_tokens": 811988893.0, "reward": 0.6227678656578064, "reward_std": 0.19881263375282288, "rewards/simpleverify_reward/mean": 0.6227678656578064, "rewards/simpleverify_reward/std": 0.4849644899368286, "step": 1263 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3598.0, "completions/mean_length": 660.5926513671875, "completions/mean_terminated_length": 594.1513061523438, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 0.11797303840073733, "grad_norm": 0.13145439326763153, "learning_rate": 2e-07, "loss": 0.0457, "num_tokens": 812666080.0, "reward": 0.5993303656578064, "reward_std": 0.22424373030662537, "rewards/simpleverify_reward/mean": 0.5993303656578064, "rewards/simpleverify_reward/std": 0.49030786752700806, "step": 1264 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.030133928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3982.0, "completions/mean_length": 721.2589721679688, "completions/mean_terminated_length": 616.4050903320312, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 0.11806637150073791, "grad_norm": 0.11728496849536896, "learning_rate": 2e-07, "loss": 0.0213, "num_tokens": 813398224.0, "reward": 0.527901828289032, "reward_std": 0.18870669603347778, "rewards/simpleverify_reward/mean": 0.5279017686843872, "rewards/simpleverify_reward/std": 0.49949970841407776, "step": 1265 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3706.0, "completions/mean_length": 608.0022583007812, "completions/mean_terminated_length": 564.6486206054688, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 0.1181597046007385, "grad_norm": 0.1099834144115448, "learning_rate": 2e-07, "loss": -0.0023, "num_tokens": 814025698.0, "reward": 0.609375, "reward_std": 0.172130286693573, "rewards/simpleverify_reward/mean": 0.609375, "rewards/simpleverify_reward/std": 0.48816296458244324, "step": 1266 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3018.0, "completions/mean_length": 632.0714721679688, "completions/mean_terminated_length": 600.8648681640625, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 0.11825303770073908, "grad_norm": 0.12830586731433868, "learning_rate": 2e-07, "loss": 0.0222, "num_tokens": 814676178.0, "reward": 0.625, "reward_std": 0.2113219052553177, "rewards/simpleverify_reward/mean": 0.625, "rewards/simpleverify_reward/std": 0.48439329862594604, "step": 1267 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3411.0, "completions/mean_length": 624.2824096679688, "completions/mean_terminated_length": 577.155029296875, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 0.11834637080073966, "grad_norm": 0.12108606100082397, "learning_rate": 2e-07, "loss": 0.0232, "num_tokens": 815321223.0, "reward": 0.6194196939468384, "reward_std": 0.17920008301734924, "rewards/simpleverify_reward/mean": 0.6194196343421936, "rewards/simpleverify_reward/std": 0.48580074310302734, "step": 1268 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3946.0, "completions/mean_length": 646.3828125, "completions/mean_terminated_length": 619.2205200195312, "completions/min_length": 185.0, "completions/min_terminated_length": 185.0, "epoch": 0.11843970390074025, "grad_norm": 0.12310416996479034, "learning_rate": 2e-07, "loss": 0.0232, "num_tokens": 815989662.0, "reward": 0.5948660969734192, "reward_std": 0.19764582812786102, "rewards/simpleverify_reward/mean": 0.5948660969734192, "rewards/simpleverify_reward/std": 0.49119213223457336, "step": 1269 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.005580357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3240.0, "completions/mean_length": 582.0792846679688, "completions/mean_terminated_length": 562.3602905273438, "completions/min_length": 159.0, "completions/min_terminated_length": 159.0, "epoch": 0.11853303700074083, "grad_norm": 0.1425761580467224, "learning_rate": 2e-07, "loss": 0.012, "num_tokens": 816599205.0, "reward": 0.6339285969734192, "reward_std": 0.2222888171672821, "rewards/simpleverify_reward/mean": 0.6339285969734192, "rewards/simpleverify_reward/std": 0.48199835419654846, "step": 1270 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3772.0, "completions/mean_length": 629.7489013671875, "completions/mean_terminated_length": 566.7261352539062, "completions/min_length": 94.0, "completions/min_terminated_length": 94.0, "epoch": 0.11862637010074141, "grad_norm": 0.12219898402690887, "learning_rate": 2e-07, "loss": 0.0292, "num_tokens": 817259700.0, "reward": 0.5915178656578064, "reward_std": 0.17010192573070526, "rewards/simpleverify_reward/mean": 0.5915178656578064, "rewards/simpleverify_reward/std": 0.49182769656181335, "step": 1271 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3506.0, "completions/mean_length": 638.3013916015625, "completions/mean_terminated_length": 563.39111328125, "completions/min_length": 160.0, "completions/min_terminated_length": 160.0, "epoch": 0.118719703200742, "grad_norm": 0.11987047642469406, "learning_rate": 2e-07, "loss": 0.0433, "num_tokens": 817924410.0, "reward": 0.6049107313156128, "reward_std": 0.192794531583786, "rewards/simpleverify_reward/mean": 0.6049107313156128, "rewards/simpleverify_reward/std": 0.48914292454719543, "step": 1272 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2576.0, "completions/mean_length": 619.2444458007812, "completions/mean_terminated_length": 564.057861328125, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 0.11881303630074258, "grad_norm": 0.12485351413488388, "learning_rate": 2e-07, "loss": 0.016, "num_tokens": 818566021.0, "reward": 0.6037946939468384, "reward_std": 0.16837625205516815, "rewards/simpleverify_reward/mean": 0.6037946343421936, "rewards/simpleverify_reward/std": 0.48938122391700745, "step": 1273 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3775.0, "completions/mean_length": 641.2756958007812, "completions/mean_terminated_length": 570.4498901367188, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 0.11890636940074316, "grad_norm": 0.1252390593290329, "learning_rate": 2e-07, "loss": 0.0214, "num_tokens": 819230292.0, "reward": 0.5959821939468384, "reward_std": 0.20914408564567566, "rewards/simpleverify_reward/mean": 0.5959821343421936, "rewards/simpleverify_reward/std": 0.490975022315979, "step": 1274 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.024553571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2748.0, "completions/mean_length": 662.4721069335938, "completions/mean_terminated_length": 576.0446166992188, "completions/min_length": 117.0, "completions/min_terminated_length": 117.0, "epoch": 0.11899970250074375, "grad_norm": 0.13983292877674103, "learning_rate": 2e-07, "loss": 0.0407, "num_tokens": 819927107.0, "reward": 0.5569196939468384, "reward_std": 0.22462210059165955, "rewards/simpleverify_reward/mean": 0.5569196343421936, "rewards/simpleverify_reward/std": 0.49702703952789307, "step": 1275 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3534.0, "completions/mean_length": 635.4710083007812, "completions/mean_terminated_length": 604.2950439453125, "completions/min_length": 94.0, "completions/min_terminated_length": 94.0, "epoch": 0.11909303560074433, "grad_norm": 0.11964929103851318, "learning_rate": 2e-07, "loss": 0.0229, "num_tokens": 820582073.0, "reward": 0.6116071939468384, "reward_std": 0.17333604395389557, "rewards/simpleverify_reward/mean": 0.6116071343421936, "rewards/simpleverify_reward/std": 0.48765692114830017, "step": 1276 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3713.0, "completions/mean_length": 647.0267944335938, "completions/mean_terminated_length": 619.8695678710938, "completions/min_length": 122.0, "completions/min_terminated_length": 122.0, "epoch": 0.11918636870074492, "grad_norm": 0.12051428109407425, "learning_rate": 2e-07, "loss": 0.0251, "num_tokens": 821255137.0, "reward": 0.5970982313156128, "reward_std": 0.1927606165409088, "rewards/simpleverify_reward/mean": 0.5970982313156128, "rewards/simpleverify_reward/std": 0.49075525999069214, "step": 1277 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3511.0, "completions/mean_length": 627.7824096679688, "completions/mean_terminated_length": 596.5371704101562, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 0.1192797018007455, "grad_norm": 0.1340145319700241, "learning_rate": 2e-07, "loss": 0.029, "num_tokens": 821905422.0, "reward": 0.590401828289032, "reward_std": 0.23326560854911804, "rewards/simpleverify_reward/mean": 0.5904017686843872, "rewards/simpleverify_reward/std": 0.49203425645828247, "step": 1278 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3948.0, "completions/mean_length": 642.1116333007812, "completions/mean_terminated_length": 599.1819458007812, "completions/min_length": 173.0, "completions/min_terminated_length": 173.0, "epoch": 0.11937303490074608, "grad_norm": 0.12167755514383316, "learning_rate": 2e-07, "loss": 0.025, "num_tokens": 822564650.0, "reward": 0.6316964626312256, "reward_std": 0.16860447824001312, "rewards/simpleverify_reward/mean": 0.6316964030265808, "rewards/simpleverify_reward/std": 0.4826137125492096, "step": 1279 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3921.0, "completions/mean_length": 657.8560791015625, "completions/mean_terminated_length": 595.3442993164062, "completions/min_length": 122.0, "completions/min_terminated_length": 122.0, "epoch": 0.11946636800074667, "grad_norm": 0.13061466813087463, "learning_rate": 2e-07, "loss": 0.0293, "num_tokens": 823239609.0, "reward": 0.5647321939468384, "reward_std": 0.22090013325214386, "rewards/simpleverify_reward/mean": 0.5647321343421936, "rewards/simpleverify_reward/std": 0.49606895446777344, "step": 1280 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3880.0, "completions/mean_length": 684.2734985351562, "completions/mean_terminated_length": 610.3591918945312, "completions/min_length": 153.0, "completions/min_terminated_length": 153.0, "epoch": 0.11955970110074725, "grad_norm": 0.11987433582544327, "learning_rate": 2e-07, "loss": 0.0208, "num_tokens": 823934982.0, "reward": 0.5703125, "reward_std": 0.19813159108161926, "rewards/simpleverify_reward/mean": 0.5703125, "rewards/simpleverify_reward/std": 0.49530795216560364, "step": 1281 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3829.0, "completions/mean_length": 647.724365234375, "completions/mean_terminated_length": 589.013671875, "completions/min_length": 112.0, "completions/min_terminated_length": 112.0, "epoch": 0.11965303420074783, "grad_norm": 0.1365022361278534, "learning_rate": 2e-07, "loss": 0.0356, "num_tokens": 824603807.0, "reward": 0.5770089626312256, "reward_std": 0.2021891474723816, "rewards/simpleverify_reward/mean": 0.5770089030265808, "rewards/simpleverify_reward/std": 0.4943099617958069, "step": 1282 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 2768.0, "completions/mean_length": 605.3035888671875, "completions/mean_terminated_length": 565.9052124023438, "completions/min_length": 116.0, "completions/min_terminated_length": 116.0, "epoch": 0.11974636730074842, "grad_norm": 0.12956158816814423, "learning_rate": 2e-07, "loss": 0.0274, "num_tokens": 825243911.0, "reward": 0.6238839626312256, "reward_std": 0.21387244760990143, "rewards/simpleverify_reward/mean": 0.6238839030265808, "rewards/simpleverify_reward/std": 0.48468026518821716, "step": 1283 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3625.0, "completions/mean_length": 662.8314819335938, "completions/mean_terminated_length": 600.4102172851562, "completions/min_length": 174.0, "completions/min_terminated_length": 174.0, "epoch": 0.119839700400749, "grad_norm": 0.12065350264310837, "learning_rate": 2e-07, "loss": 0.0225, "num_tokens": 825937328.0, "reward": 0.5524553656578064, "reward_std": 0.1762281209230423, "rewards/simpleverify_reward/mean": 0.5524553656578064, "rewards/simpleverify_reward/std": 0.49751853942871094, "step": 1284 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3989.0, "completions/mean_length": 585.1339721679688, "completions/mean_terminated_length": 549.5106811523438, "completions/min_length": 111.0, "completions/min_terminated_length": 111.0, "epoch": 0.11993303350074958, "grad_norm": 0.1288270652294159, "learning_rate": 2e-07, "loss": 0.0048, "num_tokens": 826559960.0, "reward": 0.6071428656578064, "reward_std": 0.1932421177625656, "rewards/simpleverify_reward/mean": 0.6071428656578064, "rewards/simpleverify_reward/std": 0.48865827918052673, "step": 1285 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3430.0, "completions/mean_length": 631.6373291015625, "completions/mean_terminated_length": 604.3588256835938, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 0.12002636660075017, "grad_norm": 0.12980197370052338, "learning_rate": 2e-07, "loss": 0.0296, "num_tokens": 827217419.0, "reward": 0.5457589626312256, "reward_std": 0.20072488486766815, "rewards/simpleverify_reward/mean": 0.5457589030265808, "rewards/simpleverify_reward/std": 0.4981798231601715, "step": 1286 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3979.0, "completions/mean_length": 599.1027221679688, "completions/mean_terminated_length": 539.5641479492188, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 0.12011969970075075, "grad_norm": 0.1339821219444275, "learning_rate": 2e-07, "loss": 0.046, "num_tokens": 827840007.0, "reward": 0.645089328289032, "reward_std": 0.18798880279064178, "rewards/simpleverify_reward/mean": 0.6450892686843872, "rewards/simpleverify_reward/std": 0.4787535071372986, "step": 1287 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2247.0, "completions/mean_length": 562.6239013671875, "completions/mean_terminated_length": 518.7062377929688, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 0.12021303280075134, "grad_norm": 0.13939377665519714, "learning_rate": 2e-07, "loss": 0.0445, "num_tokens": 828420110.0, "reward": 0.6227678656578064, "reward_std": 0.20989085733890533, "rewards/simpleverify_reward/mean": 0.6227678656578064, "rewards/simpleverify_reward/std": 0.4849644601345062, "step": 1288 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2510.0, "completions/mean_length": 640.0480346679688, "completions/mean_terminated_length": 585.1915893554688, "completions/min_length": 120.0, "completions/min_terminated_length": 120.0, "epoch": 0.12030636590075192, "grad_norm": 0.13602396845817566, "learning_rate": 2e-07, "loss": 0.0256, "num_tokens": 829084489.0, "reward": 0.6361607313156128, "reward_std": 0.2141762375831604, "rewards/simpleverify_reward/mean": 0.6361607313156128, "rewards/simpleverify_reward/std": 0.4813718795776367, "step": 1289 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3355.0, "completions/mean_length": 617.3303833007812, "completions/mean_terminated_length": 582.0338134765625, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 0.1203996990007525, "grad_norm": 0.12453526258468628, "learning_rate": 2e-07, "loss": 0.0365, "num_tokens": 829729321.0, "reward": 0.5814732313156128, "reward_std": 0.2008325457572937, "rewards/simpleverify_reward/mean": 0.5814732313156128, "rewards/simpleverify_reward/std": 0.4935929775238037, "step": 1290 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 4067.0, "completions/mean_length": 619.96875, "completions/mean_terminated_length": 560.7855224609375, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 0.12049303210075309, "grad_norm": 0.12715065479278564, "learning_rate": 2e-07, "loss": 0.0308, "num_tokens": 830379653.0, "reward": 0.5870535969734192, "reward_std": 0.19032247364521027, "rewards/simpleverify_reward/mean": 0.5870535969734192, "rewards/simpleverify_reward/std": 0.49263837933540344, "step": 1291 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2307.0, "completions/mean_length": 650.6328125, "completions/mean_terminated_length": 579.9989013671875, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 0.12058636520075366, "grad_norm": 0.11268657445907593, "learning_rate": 2e-07, "loss": 0.019, "num_tokens": 831050444.0, "reward": 0.6205357313156128, "reward_std": 0.17908315360546112, "rewards/simpleverify_reward/mean": 0.6205357313156128, "rewards/simpleverify_reward/std": 0.4855247139930725, "step": 1292 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3784.0, "completions/mean_length": 648.8303833007812, "completions/mean_terminated_length": 609.9232788085938, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 0.12067969830075424, "grad_norm": 0.12002041190862656, "learning_rate": 2e-07, "loss": 0.011, "num_tokens": 831720220.0, "reward": 0.5803571939468384, "reward_std": 0.18077632784843445, "rewards/simpleverify_reward/mean": 0.5803571343421936, "rewards/simpleverify_reward/std": 0.4937761127948761, "step": 1293 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3540.0, "completions/mean_length": 664.9989013671875, "completions/mean_terminated_length": 590.6670532226562, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 0.12077303140075484, "grad_norm": 0.12053172290325165, "learning_rate": 2e-07, "loss": 0.0175, "num_tokens": 832398635.0, "reward": 0.5926339626312256, "reward_std": 0.16134923696517944, "rewards/simpleverify_reward/mean": 0.5926339030265808, "rewards/simpleverify_reward/std": 0.49161845445632935, "step": 1294 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3087.0, "completions/mean_length": 670.7310791015625, "completions/mean_terminated_length": 628.1571044921875, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 0.12086636450075541, "grad_norm": 0.12406682223081589, "learning_rate": 2e-07, "loss": 0.027, "num_tokens": 833092170.0, "reward": 0.5647321939468384, "reward_std": 0.21207258105278015, "rewards/simpleverify_reward/mean": 0.5647321343421936, "rewards/simpleverify_reward/std": 0.49606895446777344, "step": 1295 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3172.0, "completions/mean_length": 571.3671875, "completions/mean_terminated_length": 527.5582275390625, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 0.12095969760075599, "grad_norm": 0.13819728791713715, "learning_rate": 2e-07, "loss": 0.0298, "num_tokens": 833688763.0, "reward": 0.6886160969734192, "reward_std": 0.19896669685840607, "rewards/simpleverify_reward/mean": 0.6886160969734192, "rewards/simpleverify_reward/std": 0.46331799030303955, "step": 1296 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2125.0, "completions/mean_length": 587.3739013671875, "completions/mean_terminated_length": 543.7638549804688, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 0.12105303070075658, "grad_norm": 0.12885859608650208, "learning_rate": 2e-07, "loss": 0.0306, "num_tokens": 834299930.0, "reward": 0.6305803656578064, "reward_std": 0.19749216735363007, "rewards/simpleverify_reward/mean": 0.6305803656578064, "rewards/simpleverify_reward/std": 0.4829172194004059, "step": 1297 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.025669642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 4091.0, "completions/mean_length": 651.7511596679688, "completions/mean_terminated_length": 561.0092163085938, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 0.12114636380075716, "grad_norm": 0.11645719408988953, "learning_rate": 2e-07, "loss": 0.016, "num_tokens": 834970451.0, "reward": 0.6272321939468384, "reward_std": 0.15379837155342102, "rewards/simpleverify_reward/mean": 0.6272321343421936, "rewards/simpleverify_reward/std": 0.4838111400604248, "step": 1298 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3589.0, "completions/mean_length": 637.9642944335938, "completions/mean_terminated_length": 575.0908813476562, "completions/min_length": 171.0, "completions/min_terminated_length": 171.0, "epoch": 0.12123969690075775, "grad_norm": 0.13494428992271423, "learning_rate": 2e-07, "loss": 0.0157, "num_tokens": 835631331.0, "reward": 0.590401828289032, "reward_std": 0.20459838211536407, "rewards/simpleverify_reward/mean": 0.5904017686843872, "rewards/simpleverify_reward/std": 0.49203425645828247, "step": 1299 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2503.0, "completions/mean_length": 676.5625, "completions/mean_terminated_length": 630.1448364257812, "completions/min_length": 122.0, "completions/min_terminated_length": 122.0, "epoch": 0.12133303000075833, "grad_norm": 0.12610860168933868, "learning_rate": 2e-07, "loss": 0.0351, "num_tokens": 836330059.0, "reward": 0.5256696939468384, "reward_std": 0.1949741542339325, "rewards/simpleverify_reward/mean": 0.5256696343421936, "rewards/simpleverify_reward/std": 0.4996195435523987, "step": 1300 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.005580357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2242.0, "completions/mean_length": 582.5033569335938, "completions/mean_terminated_length": 562.7868041992188, "completions/min_length": 105.0, "completions/min_terminated_length": 105.0, "epoch": 0.12142636310075891, "grad_norm": 0.1379399448633194, "learning_rate": 2e-07, "loss": 0.0325, "num_tokens": 836938542.0, "reward": 0.6540178656578064, "reward_std": 0.18352049589157104, "rewards/simpleverify_reward/mean": 0.6540178656578064, "rewards/simpleverify_reward/std": 0.4759531021118164, "step": 1301 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3352.0, "completions/mean_length": 676.3326416015625, "completions/mean_terminated_length": 610.1956787109375, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 0.1215196962007595, "grad_norm": 0.11602798104286194, "learning_rate": 2e-07, "loss": 0.0133, "num_tokens": 837623760.0, "reward": 0.5602678656578064, "reward_std": 0.18216389417648315, "rewards/simpleverify_reward/mean": 0.5602678656578064, "rewards/simpleverify_reward/std": 0.4966317117214203, "step": 1302 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3704.0, "completions/mean_length": 691.224365234375, "completions/mean_terminated_length": 617.4606323242188, "completions/min_length": 174.0, "completions/min_terminated_length": 174.0, "epoch": 0.12161302930076008, "grad_norm": 0.12646476924419403, "learning_rate": 2e-07, "loss": 0.0233, "num_tokens": 838336985.0, "reward": 0.5502232313156128, "reward_std": 0.20418430864810944, "rewards/simpleverify_reward/mean": 0.5502232313156128, "rewards/simpleverify_reward/std": 0.49774909019470215, "step": 1303 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.006696428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3938.0, "completions/mean_length": 561.716552734375, "completions/mean_terminated_length": 537.889892578125, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 0.12170636240076066, "grad_norm": 0.12023667991161346, "learning_rate": 2e-07, "loss": 0.0299, "num_tokens": 838930227.0, "reward": 0.606026828289032, "reward_std": 0.16003583371639252, "rewards/simpleverify_reward/mean": 0.6060267686843872, "rewards/simpleverify_reward/std": 0.48890194296836853, "step": 1304 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3786.0, "completions/mean_length": 643.5245971679688, "completions/mean_terminated_length": 576.7531127929688, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 0.12179969550076125, "grad_norm": 0.12733688950538635, "learning_rate": 2e-07, "loss": 0.0155, "num_tokens": 839599345.0, "reward": 0.6082589626312256, "reward_std": 0.1934332251548767, "rewards/simpleverify_reward/mean": 0.6082589030265808, "rewards/simpleverify_reward/std": 0.4884119927883148, "step": 1305 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3748.0, "completions/mean_length": 630.2835083007812, "completions/mean_terminated_length": 599.0608520507812, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 0.12189302860076183, "grad_norm": 0.12182847410440445, "learning_rate": 2e-07, "loss": 0.036, "num_tokens": 840246903.0, "reward": 0.6238839626312256, "reward_std": 0.1947900950908661, "rewards/simpleverify_reward/mean": 0.6238839030265808, "rewards/simpleverify_reward/std": 0.48468026518821716, "step": 1306 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.006696428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2620.0, "completions/mean_length": 596.0580444335938, "completions/mean_terminated_length": 572.4629516601562, "completions/min_length": 120.0, "completions/min_terminated_length": 120.0, "epoch": 0.12198636170076241, "grad_norm": 0.12303058058023453, "learning_rate": 2e-07, "loss": 0.0285, "num_tokens": 840871795.0, "reward": 0.609375, "reward_std": 0.18884685635566711, "rewards/simpleverify_reward/mean": 0.609375, "rewards/simpleverify_reward/std": 0.48816296458244324, "step": 1307 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2291.0, "completions/mean_length": 643.3627319335938, "completions/mean_terminated_length": 580.5874633789062, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 0.122079694800763, "grad_norm": 0.12957428395748138, "learning_rate": 2e-07, "loss": 0.0113, "num_tokens": 841525480.0, "reward": 0.5758928656578064, "reward_std": 0.18704882264137268, "rewards/simpleverify_reward/mean": 0.5758928656578064, "rewards/simpleverify_reward/std": 0.49448275566101074, "step": 1308 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 1751.0, "completions/mean_length": 606.1205444335938, "completions/mean_terminated_length": 570.710205078125, "completions/min_length": 162.0, "completions/min_terminated_length": 162.0, "epoch": 0.12217302790076358, "grad_norm": 0.1344517022371292, "learning_rate": 2e-07, "loss": 0.0168, "num_tokens": 842160772.0, "reward": 0.6160714626312256, "reward_std": 0.20944830775260925, "rewards/simpleverify_reward/mean": 0.6160714030265808, "rewards/simpleverify_reward/std": 0.486612468957901, "step": 1309 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.006696428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 1916.0, "completions/mean_length": 562.5178833007812, "completions/mean_terminated_length": 538.6966552734375, "completions/min_length": 96.0, "completions/min_terminated_length": 96.0, "epoch": 0.12226636100076417, "grad_norm": 0.11305554211139679, "learning_rate": 2e-07, "loss": 0.0195, "num_tokens": 842749724.0, "reward": 0.6339285969734192, "reward_std": 0.1580723375082016, "rewards/simpleverify_reward/mean": 0.6339285969734192, "rewards/simpleverify_reward/std": 0.48199835419654846, "step": 1310 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3126.0, "completions/mean_length": 627.4921875, "completions/mean_terminated_length": 588.34423828125, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 0.12235969410076475, "grad_norm": 0.13462217152118683, "learning_rate": 2e-07, "loss": 0.0252, "num_tokens": 843399021.0, "reward": 0.559151828289032, "reward_std": 0.20287089049816132, "rewards/simpleverify_reward/mean": 0.5591517686843872, "rewards/simpleverify_reward/std": 0.496766060590744, "step": 1311 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3639.0, "completions/mean_length": 645.2935791015625, "completions/mean_terminated_length": 594.4903564453125, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 0.12245302720076533, "grad_norm": 0.12324226647615433, "learning_rate": 2e-07, "loss": 0.0211, "num_tokens": 844068716.0, "reward": 0.6071428656578064, "reward_std": 0.19017162919044495, "rewards/simpleverify_reward/mean": 0.6071428656578064, "rewards/simpleverify_reward/std": 0.48865827918052673, "step": 1312 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2444.0, "completions/mean_length": 593.6183471679688, "completions/mean_terminated_length": 558.0811157226562, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 0.12254636030076592, "grad_norm": 0.13976357877254486, "learning_rate": 2e-07, "loss": 0.0173, "num_tokens": 844694478.0, "reward": 0.6037946939468384, "reward_std": 0.2043362706899643, "rewards/simpleverify_reward/mean": 0.6037946343421936, "rewards/simpleverify_reward/std": 0.48938122391700745, "step": 1313 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3845.0, "completions/mean_length": 664.4486694335938, "completions/mean_terminated_length": 606.022705078125, "completions/min_length": 123.0, "completions/min_terminated_length": 123.0, "epoch": 0.1226396934007665, "grad_norm": 0.12893414497375488, "learning_rate": 2e-07, "loss": 0.0437, "num_tokens": 845388080.0, "reward": 0.551339328289032, "reward_std": 0.20786388218402863, "rewards/simpleverify_reward/mean": 0.5513392686843872, "rewards/simpleverify_reward/std": 0.4976350665092468, "step": 1314 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2624.0, "completions/mean_length": 659.7589721679688, "completions/mean_terminated_length": 609.168701171875, "completions/min_length": 167.0, "completions/min_terminated_length": 167.0, "epoch": 0.12273302650076708, "grad_norm": 0.11944909393787384, "learning_rate": 2e-07, "loss": 0.0321, "num_tokens": 846077512.0, "reward": 0.5703125, "reward_std": 0.18381541967391968, "rewards/simpleverify_reward/mean": 0.5703125, "rewards/simpleverify_reward/std": 0.49530795216560364, "step": 1315 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3628.0, "completions/mean_length": 573.3482666015625, "completions/mean_terminated_length": 545.61083984375, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 0.12282635960076767, "grad_norm": 0.1204969584941864, "learning_rate": 2e-07, "loss": 0.0126, "num_tokens": 846677960.0, "reward": 0.6383928656578064, "reward_std": 0.14951187372207642, "rewards/simpleverify_reward/mean": 0.6383928656578064, "rewards/simpleverify_reward/std": 0.4807341992855072, "step": 1316 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2876.0, "completions/mean_length": 619.125, "completions/mean_terminated_length": 567.9365844726562, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 0.12291969270076825, "grad_norm": 0.12668249011039734, "learning_rate": 2e-07, "loss": 0.0343, "num_tokens": 847319656.0, "reward": 0.6350446939468384, "reward_std": 0.1856580376625061, "rewards/simpleverify_reward/mean": 0.6350446343421936, "rewards/simpleverify_reward/std": 0.481686532497406, "step": 1317 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3511.0, "completions/mean_length": 626.875, "completions/mean_terminated_length": 583.7559204101562, "completions/min_length": 157.0, "completions/min_terminated_length": 157.0, "epoch": 0.12301302580076884, "grad_norm": 0.1362314224243164, "learning_rate": 2e-07, "loss": 0.0224, "num_tokens": 847963616.0, "reward": 0.6127232313156128, "reward_std": 0.21887390315532684, "rewards/simpleverify_reward/mean": 0.6127232313156128, "rewards/simpleverify_reward/std": 0.4873998463153839, "step": 1318 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 2760.0, "completions/mean_length": 574.1373291015625, "completions/mean_terminated_length": 546.4060668945312, "completions/min_length": 165.0, "completions/min_terminated_length": 165.0, "epoch": 0.12310635890076942, "grad_norm": 0.12621919810771942, "learning_rate": 2e-07, "loss": 0.0181, "num_tokens": 848558611.0, "reward": 0.6238839626312256, "reward_std": 0.18825021386146545, "rewards/simpleverify_reward/mean": 0.6238839030265808, "rewards/simpleverify_reward/std": 0.4846802353858948, "step": 1319 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3983.0, "completions/mean_length": 674.3225708007812, "completions/mean_terminated_length": 616.0647583007812, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 0.12319969200077, "grad_norm": 0.12599647045135498, "learning_rate": 2e-07, "loss": 0.0548, "num_tokens": 849249476.0, "reward": 0.5926339626312256, "reward_std": 0.19633355736732483, "rewards/simpleverify_reward/mean": 0.5926339030265808, "rewards/simpleverify_reward/std": 0.49161845445632935, "step": 1320 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4002.0, "completions/mean_length": 635.2489013671875, "completions/mean_terminated_length": 568.3173828125, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 0.12329302510077059, "grad_norm": 0.13631154596805573, "learning_rate": 2e-07, "loss": 0.0288, "num_tokens": 849900411.0, "reward": 0.6194196939468384, "reward_std": 0.16273680329322815, "rewards/simpleverify_reward/mean": 0.6194196343421936, "rewards/simpleverify_reward/std": 0.48580074310302734, "step": 1321 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2403.0, "completions/mean_length": 612.7857666015625, "completions/mean_terminated_length": 561.5039672851562, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 0.12338635820077117, "grad_norm": 0.12428365647792816, "learning_rate": 2e-07, "loss": 0.0271, "num_tokens": 850536667.0, "reward": 0.5546875, "reward_std": 0.18953153491020203, "rewards/simpleverify_reward/mean": 0.5546875, "rewards/simpleverify_reward/std": 0.4972778558731079, "step": 1322 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.024553571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3835.0, "completions/mean_length": 692.7667846679688, "completions/mean_terminated_length": 607.101806640625, "completions/min_length": 121.0, "completions/min_terminated_length": 121.0, "epoch": 0.12347969130077174, "grad_norm": 0.12557530403137207, "learning_rate": 2e-07, "loss": 0.0437, "num_tokens": 851246458.0, "reward": 0.5915178656578064, "reward_std": 0.20775288343429565, "rewards/simpleverify_reward/mean": 0.5915178656578064, "rewards/simpleverify_reward/std": 0.49182769656181335, "step": 1323 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 4061.0, "completions/mean_length": 662.8114013671875, "completions/mean_terminated_length": 600.3897705078125, "completions/min_length": 110.0, "completions/min_terminated_length": 110.0, "epoch": 0.12357302440077234, "grad_norm": 0.10938151180744171, "learning_rate": 2e-07, "loss": 0.0226, "num_tokens": 851938273.0, "reward": 0.5959821939468384, "reward_std": 0.1626594066619873, "rewards/simpleverify_reward/mean": 0.5959821343421936, "rewards/simpleverify_reward/std": 0.490975022315979, "step": 1324 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3250.0, "completions/mean_length": 608.9855346679688, "completions/mean_terminated_length": 553.6360473632812, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 0.12366635750077291, "grad_norm": 0.13824404776096344, "learning_rate": 2e-07, "loss": 0.0282, "num_tokens": 852584268.0, "reward": 0.590401828289032, "reward_std": 0.20625585317611694, "rewards/simpleverify_reward/mean": 0.5904017686843872, "rewards/simpleverify_reward/std": 0.49203425645828247, "step": 1325 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2929.0, "completions/mean_length": 631.9096069335938, "completions/mean_terminated_length": 580.9093627929688, "completions/min_length": 108.0, "completions/min_terminated_length": 108.0, "epoch": 0.12375969060077349, "grad_norm": 0.12294593453407288, "learning_rate": 2e-07, "loss": 0.0248, "num_tokens": 853236163.0, "reward": 0.59375, "reward_std": 0.18265357613563538, "rewards/simpleverify_reward/mean": 0.59375, "rewards/simpleverify_reward/std": 0.4914066195487976, "step": 1326 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3498.0, "completions/mean_length": 682.5435791015625, "completions/mean_terminated_length": 604.6107177734375, "completions/min_length": 156.0, "completions/min_terminated_length": 156.0, "epoch": 0.12385302370077408, "grad_norm": 0.1163269504904747, "learning_rate": 2e-07, "loss": 0.022, "num_tokens": 853943962.0, "reward": 0.5558035969734192, "reward_std": 0.15728957951068878, "rewards/simpleverify_reward/mean": 0.5558035969734192, "rewards/simpleverify_reward/std": 0.49715369939804077, "step": 1327 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2989.0, "completions/mean_length": 602.640625, "completions/mean_terminated_length": 571.1689453125, "completions/min_length": 167.0, "completions/min_terminated_length": 167.0, "epoch": 0.12394635680077466, "grad_norm": 0.1248951405286789, "learning_rate": 2e-07, "loss": 0.0175, "num_tokens": 854577216.0, "reward": 0.6071428656578064, "reward_std": 0.15416815876960754, "rewards/simpleverify_reward/mean": 0.6071428656578064, "rewards/simpleverify_reward/std": 0.48865827918052673, "step": 1328 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.006696428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2333.0, "completions/mean_length": 555.2824096679688, "completions/mean_terminated_length": 531.412353515625, "completions/min_length": 165.0, "completions/min_terminated_length": 165.0, "epoch": 0.12403968990077525, "grad_norm": 0.14561721682548523, "learning_rate": 2e-07, "loss": 0.0162, "num_tokens": 855166757.0, "reward": 0.5870535969734192, "reward_std": 0.2130482792854309, "rewards/simpleverify_reward/mean": 0.5870535969734192, "rewards/simpleverify_reward/std": 0.49263837933540344, "step": 1329 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.024553571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3676.0, "completions/mean_length": 699.2031860351562, "completions/mean_terminated_length": 613.7001953125, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 0.12413302300077583, "grad_norm": 0.10150649398565292, "learning_rate": 2e-07, "loss": 0.0267, "num_tokens": 855873299.0, "reward": 0.609375, "reward_std": 0.17649096250534058, "rewards/simpleverify_reward/mean": 0.609375, "rewards/simpleverify_reward/std": 0.48816296458244324, "step": 1330 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3449.0, "completions/mean_length": 600.2332763671875, "completions/mean_terminated_length": 552.7794189453125, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 0.12422635610077641, "grad_norm": 0.1161896362900734, "learning_rate": 2e-07, "loss": 0.0206, "num_tokens": 856492932.0, "reward": 0.6428571939468384, "reward_std": 0.16003401577472687, "rewards/simpleverify_reward/mean": 0.6428571343421936, "rewards/simpleverify_reward/std": 0.4794250428676605, "step": 1331 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3932.0, "completions/mean_length": 700.505615234375, "completions/mean_terminated_length": 634.836181640625, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 0.124319689200777, "grad_norm": 0.12217621505260468, "learning_rate": 2e-07, "loss": 0.042, "num_tokens": 857206625.0, "reward": 0.5424107313156128, "reward_std": 0.1994456797838211, "rewards/simpleverify_reward/mean": 0.5424107313156128, "rewards/simpleverify_reward/std": 0.4984763562679291, "step": 1332 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2282.0, "completions/mean_length": 614.0904541015625, "completions/mean_terminated_length": 566.82470703125, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 0.12441302230077758, "grad_norm": 0.11676912754774094, "learning_rate": 2e-07, "loss": 0.029, "num_tokens": 857840778.0, "reward": 0.6584821939468384, "reward_std": 0.16499312222003937, "rewards/simpleverify_reward/mean": 0.6584821343421936, "rewards/simpleverify_reward/std": 0.4744836091995239, "step": 1333 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3369.0, "completions/mean_length": 638.1361694335938, "completions/mean_terminated_length": 583.2494506835938, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 0.12450635540077816, "grad_norm": 0.1263483315706253, "learning_rate": 2e-07, "loss": 0.0274, "num_tokens": 858499124.0, "reward": 0.668526828289032, "reward_std": 0.15857158601284027, "rewards/simpleverify_reward/mean": 0.6685267686843872, "rewards/simpleverify_reward/std": 0.4710056483745575, "step": 1334 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2518.0, "completions/mean_length": 618.2232666015625, "completions/mean_terminated_length": 586.8919067382812, "completions/min_length": 164.0, "completions/min_terminated_length": 164.0, "epoch": 0.12459968850077875, "grad_norm": 0.1255546510219574, "learning_rate": 2e-07, "loss": 0.0166, "num_tokens": 859138572.0, "reward": 0.6662946939468384, "reward_std": 0.19422627985477448, "rewards/simpleverify_reward/mean": 0.6662946343421936, "rewards/simpleverify_reward/std": 0.47179922461509705, "step": 1335 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 2516.0, "completions/mean_length": 654.2533569335938, "completions/mean_terminated_length": 627.1530151367188, "completions/min_length": 114.0, "completions/min_terminated_length": 114.0, "epoch": 0.12469302160077933, "grad_norm": 0.13070261478424072, "learning_rate": 2e-07, "loss": 0.0135, "num_tokens": 859814215.0, "reward": 0.6015625, "reward_std": 0.2083926945924759, "rewards/simpleverify_reward/mean": 0.6015625, "rewards/simpleverify_reward/std": 0.48984986543655396, "step": 1336 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3998.0, "completions/mean_length": 645.75, "completions/mean_terminated_length": 598.9140625, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 0.12478635470077991, "grad_norm": 0.11148051917552948, "learning_rate": 2e-07, "loss": 0.0236, "num_tokens": 860474399.0, "reward": 0.6227678656578064, "reward_std": 0.16919927299022675, "rewards/simpleverify_reward/mean": 0.6227678656578064, "rewards/simpleverify_reward/std": 0.4849644899368286, "step": 1337 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2673.0, "completions/mean_length": 650.0480346679688, "completions/mean_terminated_length": 607.2169799804688, "completions/min_length": 160.0, "completions/min_terminated_length": 160.0, "epoch": 0.1248796878007805, "grad_norm": 0.12295583635568619, "learning_rate": 2e-07, "loss": 0.0295, "num_tokens": 861153402.0, "reward": 0.5770089626312256, "reward_std": 0.19625546038150787, "rewards/simpleverify_reward/mean": 0.5770089030265808, "rewards/simpleverify_reward/std": 0.4943099319934845, "step": 1338 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3871.0, "completions/mean_length": 687.2031860351562, "completions/mean_terminated_length": 648.7291259765625, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 0.12497302090078108, "grad_norm": 0.11266667395830154, "learning_rate": 2e-07, "loss": 0.0331, "num_tokens": 861854192.0, "reward": 0.5256696939468384, "reward_std": 0.18989881873130798, "rewards/simpleverify_reward/mean": 0.5256696343421936, "rewards/simpleverify_reward/std": 0.4996195435523987, "step": 1339 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3840.0, "completions/mean_length": 630.5189819335938, "completions/mean_terminated_length": 547.347412109375, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 0.12506635400078167, "grad_norm": 0.1356811225414276, "learning_rate": 2e-07, "loss": 0.0298, "num_tokens": 862524721.0, "reward": 0.5390625, "reward_std": 0.2039603888988495, "rewards/simpleverify_reward/mean": 0.5390625, "rewards/simpleverify_reward/std": 0.4987502098083496, "step": 1340 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3123.0, "completions/mean_length": 618.7957763671875, "completions/mean_terminated_length": 587.4696044921875, "completions/min_length": 92.0, "completions/min_terminated_length": 92.0, "epoch": 0.12515968710078224, "grad_norm": 0.1339723765850067, "learning_rate": 2e-07, "loss": 0.0348, "num_tokens": 863155002.0, "reward": 0.6484375, "reward_std": 0.20523668825626373, "rewards/simpleverify_reward/mean": 0.6484375, "rewards/simpleverify_reward/std": 0.4777248501777649, "step": 1341 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3749.0, "completions/mean_length": 640.6975708007812, "completions/mean_terminated_length": 589.8267211914062, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 0.12525302020078283, "grad_norm": 0.1205964982509613, "learning_rate": 2e-07, "loss": 0.0331, "num_tokens": 863815195.0, "reward": 0.5848214626312256, "reward_std": 0.16717344522476196, "rewards/simpleverify_reward/mean": 0.5848214030265808, "rewards/simpleverify_reward/std": 0.49302801489830017, "step": 1342 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 2706.0, "completions/mean_length": 557.8248291015625, "completions/mean_terminated_length": 529.9651489257812, "completions/min_length": 111.0, "completions/min_terminated_length": 111.0, "epoch": 0.12534635330078342, "grad_norm": 0.13859859108924866, "learning_rate": 2e-07, "loss": 0.0193, "num_tokens": 864405574.0, "reward": 0.640625, "reward_std": 0.17810744047164917, "rewards/simpleverify_reward/mean": 0.640625, "rewards/simpleverify_reward/std": 0.48008525371551514, "step": 1343 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.006696428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2850.0, "completions/mean_length": 590.9520263671875, "completions/mean_terminated_length": 567.3224487304688, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 0.125439686400784, "grad_norm": 0.12944215536117554, "learning_rate": 2e-07, "loss": 0.0166, "num_tokens": 865025851.0, "reward": 0.6305803656578064, "reward_std": 0.18137265741825104, "rewards/simpleverify_reward/mean": 0.6305803656578064, "rewards/simpleverify_reward/std": 0.48291724920272827, "step": 1344 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3981.0, "completions/mean_length": 661.3248291015625, "completions/mean_terminated_length": 586.913330078125, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 0.12553301950078458, "grad_norm": 0.11690637469291687, "learning_rate": 2e-07, "loss": 0.0273, "num_tokens": 865710102.0, "reward": 0.6127232313156128, "reward_std": 0.1684529334306717, "rewards/simpleverify_reward/mean": 0.6127232313156128, "rewards/simpleverify_reward/std": 0.4873998463153839, "step": 1345 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 4094.0, "completions/mean_length": 605.888427734375, "completions/mean_terminated_length": 562.5084838867188, "completions/min_length": 94.0, "completions/min_terminated_length": 94.0, "epoch": 0.12562635260078517, "grad_norm": 0.13340604305267334, "learning_rate": 2e-07, "loss": 0.0134, "num_tokens": 866333954.0, "reward": 0.6729910969734192, "reward_std": 0.17074204981327057, "rewards/simpleverify_reward/mean": 0.6729910969734192, "rewards/simpleverify_reward/std": 0.46938255429267883, "step": 1346 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.006696428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3595.0, "completions/mean_length": 632.5892944335938, "completions/mean_terminated_length": 609.240478515625, "completions/min_length": 163.0, "completions/min_terminated_length": 163.0, "epoch": 0.12571968570078576, "grad_norm": 0.12096016108989716, "learning_rate": 2e-07, "loss": 0.0207, "num_tokens": 866981546.0, "reward": 0.6428571939468384, "reward_std": 0.17690393328666687, "rewards/simpleverify_reward/mean": 0.6428571343421936, "rewards/simpleverify_reward/std": 0.4794250428676605, "step": 1347 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.025669642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3397.0, "completions/mean_length": 714.9107666015625, "completions/mean_terminated_length": 625.832763671875, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 0.12581301880078632, "grad_norm": 0.11940786987543106, "learning_rate": 2e-07, "loss": 0.0266, "num_tokens": 867717714.0, "reward": 0.5245535969734192, "reward_std": 0.17784644663333893, "rewards/simpleverify_reward/mean": 0.5245535969734192, "rewards/simpleverify_reward/std": 0.4996756911277771, "step": 1348 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2667.0, "completions/mean_length": 670.7879638671875, "completions/mean_terminated_length": 604.5437622070312, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 0.12590635190078692, "grad_norm": 0.09989530593156815, "learning_rate": 2e-07, "loss": 0.0389, "num_tokens": 868403828.0, "reward": 0.5892857313156128, "reward_std": 0.16450344026088715, "rewards/simpleverify_reward/mean": 0.5892857313156128, "rewards/simpleverify_reward/std": 0.49223825335502625, "step": 1349 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2590.0, "completions/mean_length": 625.9866333007812, "completions/mean_terminated_length": 566.9058227539062, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 0.1259996850007875, "grad_norm": 0.127846360206604, "learning_rate": 2e-07, "loss": 0.0552, "num_tokens": 869048208.0, "reward": 0.6026785969734192, "reward_std": 0.20760315656661987, "rewards/simpleverify_reward/mean": 0.6026785969734192, "rewards/simpleverify_reward/std": 0.48961687088012695, "step": 1350 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3647.0, "completions/mean_length": 646.2734375, "completions/mean_terminated_length": 595.4846801757812, "completions/min_length": 117.0, "completions/min_terminated_length": 117.0, "epoch": 0.12609301810078807, "grad_norm": 0.12237011641263962, "learning_rate": 2e-07, "loss": 0.0175, "num_tokens": 869713917.0, "reward": 0.6049107313156128, "reward_std": 0.1699499785900116, "rewards/simpleverify_reward/mean": 0.6049107313156128, "rewards/simpleverify_reward/std": 0.48914292454719543, "step": 1351 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3153.0, "completions/mean_length": 595.2455444335938, "completions/mean_terminated_length": 535.641357421875, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 0.12618635120078867, "grad_norm": 0.13634943962097168, "learning_rate": 2e-07, "loss": 0.0405, "num_tokens": 870330281.0, "reward": 0.6395089626312256, "reward_std": 0.20613569021224976, "rewards/simpleverify_reward/mean": 0.6395089030265808, "rewards/simpleverify_reward/std": 0.4804111421108246, "step": 1352 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3973.0, "completions/mean_length": 654.796875, "completions/mean_terminated_length": 592.2295532226562, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 0.12627968430078926, "grad_norm": 0.12783609330654144, "learning_rate": 2e-07, "loss": 0.0299, "num_tokens": 871010211.0, "reward": 0.6026785969734192, "reward_std": 0.1981743425130844, "rewards/simpleverify_reward/mean": 0.6026785969734192, "rewards/simpleverify_reward/std": 0.48961687088012695, "step": 1353 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3117.0, "completions/mean_length": 580.8158569335938, "completions/mean_terminated_length": 541.14111328125, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 0.12637301740078982, "grad_norm": 0.13319523632526398, "learning_rate": 2e-07, "loss": 0.022, "num_tokens": 871614734.0, "reward": 0.613839328289032, "reward_std": 0.1793106645345688, "rewards/simpleverify_reward/mean": 0.6138392686843872, "rewards/simpleverify_reward/std": 0.48714008927345276, "step": 1354 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3912.0, "completions/mean_length": 680.0279541015625, "completions/mean_terminated_length": 606.0216674804688, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 0.12646635050079041, "grad_norm": 0.1217617392539978, "learning_rate": 2e-07, "loss": 0.0283, "num_tokens": 872309623.0, "reward": 0.5546875, "reward_std": 0.1769353449344635, "rewards/simpleverify_reward/mean": 0.5546875, "rewards/simpleverify_reward/std": 0.4972778558731079, "step": 1355 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3617.0, "completions/mean_length": 697.1484985351562, "completions/mean_terminated_length": 619.549072265625, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 0.126559683600791, "grad_norm": 0.11779668927192688, "learning_rate": 2e-07, "loss": 0.0204, "num_tokens": 873024004.0, "reward": 0.5502232313156128, "reward_std": 0.19091911613941193, "rewards/simpleverify_reward/mean": 0.5502232313156128, "rewards/simpleverify_reward/std": 0.49774909019470215, "step": 1356 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3924.0, "completions/mean_length": 620.786865234375, "completions/mean_terminated_length": 593.4229736328125, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 0.12665301670079157, "grad_norm": 0.14539766311645508, "learning_rate": 2e-07, "loss": 0.035, "num_tokens": 873670701.0, "reward": 0.6015625, "reward_std": 0.2170708328485489, "rewards/simpleverify_reward/mean": 0.6015625, "rewards/simpleverify_reward/std": 0.48984986543655396, "step": 1357 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.024553571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3773.0, "completions/mean_length": 741.5078735351562, "completions/mean_terminated_length": 657.0697631835938, "completions/min_length": 122.0, "completions/min_terminated_length": 122.0, "epoch": 0.12674634980079216, "grad_norm": 0.11097297817468643, "learning_rate": 2e-07, "loss": 0.045, "num_tokens": 874415772.0, "reward": 0.5613839626312256, "reward_std": 0.18288180232048035, "rewards/simpleverify_reward/mean": 0.5613839030265808, "rewards/simpleverify_reward/std": 0.496494859457016, "step": 1358 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3468.0, "completions/mean_length": 650.9866333007812, "completions/mean_terminated_length": 572.3333129882812, "completions/min_length": 164.0, "completions/min_terminated_length": 164.0, "epoch": 0.12683968290079276, "grad_norm": 0.12179583311080933, "learning_rate": 2e-07, "loss": 0.0197, "num_tokens": 875082616.0, "reward": 0.609375, "reward_std": 0.19993029534816742, "rewards/simpleverify_reward/mean": 0.609375, "rewards/simpleverify_reward/std": 0.48816296458244324, "step": 1359 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2600.0, "completions/mean_length": 599.2277221679688, "completions/mean_terminated_length": 567.7252197265625, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 0.12693301600079332, "grad_norm": 0.13678593933582306, "learning_rate": 2e-07, "loss": 0.0039, "num_tokens": 875727508.0, "reward": 0.582589328289032, "reward_std": 0.19208984076976776, "rewards/simpleverify_reward/mean": 0.5825892686843872, "rewards/simpleverify_reward/std": 0.4934072494506836, "step": 1360 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3870.0, "completions/mean_length": 607.3125, "completions/mean_terminated_length": 575.8828735351562, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 0.1270263491007939, "grad_norm": 0.1211053878068924, "learning_rate": 2e-07, "loss": 0.0035, "num_tokens": 876362556.0, "reward": 0.5647321939468384, "reward_std": 0.17649094760417938, "rewards/simpleverify_reward/mean": 0.5647321343421936, "rewards/simpleverify_reward/std": 0.49606895446777344, "step": 1361 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2402.0, "completions/mean_length": 614.0881958007812, "completions/mean_terminated_length": 566.8224487304688, "completions/min_length": 159.0, "completions/min_terminated_length": 159.0, "epoch": 0.1271196822007945, "grad_norm": 0.1168784499168396, "learning_rate": 2e-07, "loss": 0.0195, "num_tokens": 876998115.0, "reward": 0.6328125, "reward_std": 0.172803595662117, "rewards/simpleverify_reward/mean": 0.6328125, "rewards/simpleverify_reward/std": 0.48230743408203125, "step": 1362 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 4024.0, "completions/mean_length": 618.7835083007812, "completions/mean_terminated_length": 591.4038696289062, "completions/min_length": 123.0, "completions/min_terminated_length": 123.0, "epoch": 0.12721301530079507, "grad_norm": 0.1264144629240036, "learning_rate": 2e-07, "loss": 0.0328, "num_tokens": 877640905.0, "reward": 0.6238839626312256, "reward_std": 0.1762627214193344, "rewards/simpleverify_reward/mean": 0.6238839030265808, "rewards/simpleverify_reward/std": 0.48468026518821716, "step": 1363 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3786.0, "completions/mean_length": 661.0971069335938, "completions/mean_terminated_length": 622.3284301757812, "completions/min_length": 106.0, "completions/min_terminated_length": 106.0, "epoch": 0.12730634840079566, "grad_norm": 0.12367057055234909, "learning_rate": 2e-07, "loss": 0.044, "num_tokens": 878326944.0, "reward": 0.578125, "reward_std": 0.20038677752017975, "rewards/simpleverify_reward/mean": 0.578125, "rewards/simpleverify_reward/std": 0.4941346049308777, "step": 1364 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2204.0, "completions/mean_length": 619.203125, "completions/mean_terminated_length": 587.880615234375, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 0.12739968150079625, "grad_norm": 0.1386454999446869, "learning_rate": 2e-07, "loss": 0.0055, "num_tokens": 878977854.0, "reward": 0.6015625, "reward_std": 0.20564855635166168, "rewards/simpleverify_reward/mean": 0.6015625, "rewards/simpleverify_reward/std": 0.48984986543655396, "step": 1365 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3449.0, "completions/mean_length": 662.4074096679688, "completions/mean_terminated_length": 596.0010986328125, "completions/min_length": 115.0, "completions/min_terminated_length": 115.0, "epoch": 0.12749301460079684, "grad_norm": 0.13934928178787231, "learning_rate": 2e-07, "loss": 0.0295, "num_tokens": 879655611.0, "reward": 0.640625, "reward_std": 0.22420983016490936, "rewards/simpleverify_reward/mean": 0.640625, "rewards/simpleverify_reward/std": 0.48008525371551514, "step": 1366 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3842.0, "completions/mean_length": 674.0267944335938, "completions/mean_terminated_length": 611.80908203125, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 0.1275863477007974, "grad_norm": 0.11752640455961227, "learning_rate": 2e-07, "loss": 0.0185, "num_tokens": 880343787.0, "reward": 0.6149553656578064, "reward_std": 0.1631597876548767, "rewards/simpleverify_reward/mean": 0.6149553656578064, "rewards/simpleverify_reward/std": 0.4868776500225067, "step": 1367 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3306.0, "completions/mean_length": 639.3013916015625, "completions/mean_terminated_length": 576.4522705078125, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 0.127679680800798, "grad_norm": 0.12642894685268402, "learning_rate": 2e-07, "loss": 0.0217, "num_tokens": 881008961.0, "reward": 0.5948660969734192, "reward_std": 0.1998247504234314, "rewards/simpleverify_reward/mean": 0.5948660969734192, "rewards/simpleverify_reward/std": 0.49119213223457336, "step": 1368 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3649.0, "completions/mean_length": 683.802490234375, "completions/mean_terminated_length": 621.762451171875, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 0.1277730139007986, "grad_norm": 0.12408287078142166, "learning_rate": 2e-07, "loss": 0.0145, "num_tokens": 881722544.0, "reward": 0.5368303656578064, "reward_std": 0.20729824900627136, "rewards/simpleverify_reward/mean": 0.5368303656578064, "rewards/simpleverify_reward/std": 0.49892017245292664, "step": 1369 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2584.0, "completions/mean_length": 657.8783569335938, "completions/mean_terminated_length": 603.3049926757812, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 0.12786634700079916, "grad_norm": 0.1244690790772438, "learning_rate": 2e-07, "loss": 0.0353, "num_tokens": 882399707.0, "reward": 0.5814732313156128, "reward_std": 0.18922732770442963, "rewards/simpleverify_reward/mean": 0.5814732313156128, "rewards/simpleverify_reward/std": 0.4935929775238037, "step": 1370 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3838.0, "completions/mean_length": 628.0848388671875, "completions/mean_terminated_length": 584.9807739257812, "completions/min_length": 120.0, "completions/min_terminated_length": 120.0, "epoch": 0.12795968010079975, "grad_norm": 0.10279800742864609, "learning_rate": 2e-07, "loss": 0.0257, "num_tokens": 883059087.0, "reward": 0.6082589626312256, "reward_std": 0.137300044298172, "rewards/simpleverify_reward/mean": 0.6082589030265808, "rewards/simpleverify_reward/std": 0.48841196298599243, "step": 1371 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 4047.0, "completions/mean_length": 658.7600708007812, "completions/mean_terminated_length": 608.1551513671875, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 0.12805301320080034, "grad_norm": 0.11646929383277893, "learning_rate": 2e-07, "loss": 0.0338, "num_tokens": 883735432.0, "reward": 0.637276828289032, "reward_std": 0.16943003237247467, "rewards/simpleverify_reward/mean": 0.6372767686843872, "rewards/simpleverify_reward/std": 0.481054425239563, "step": 1372 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3546.0, "completions/mean_length": 653.7745971679688, "completions/mean_terminated_length": 607.0475463867188, "completions/min_length": 118.0, "completions/min_terminated_length": 118.0, "epoch": 0.1281463463008009, "grad_norm": 0.1151922345161438, "learning_rate": 2e-07, "loss": 0.0309, "num_tokens": 884410654.0, "reward": 0.5636160969734192, "reward_std": 0.17555803060531616, "rewards/simpleverify_reward/mean": 0.5636160969734192, "rewards/simpleverify_reward/std": 0.49621346592903137, "step": 1373 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3880.0, "completions/mean_length": 658.0748291015625, "completions/mean_terminated_length": 591.584716796875, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 0.1282396794008015, "grad_norm": 0.14940036833286285, "learning_rate": 2e-07, "loss": 0.0369, "num_tokens": 885094961.0, "reward": 0.5993303656578064, "reward_std": 0.19738228619098663, "rewards/simpleverify_reward/mean": 0.5993303656578064, "rewards/simpleverify_reward/std": 0.49030786752700806, "step": 1374 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3913.0, "completions/mean_length": 606.505615234375, "completions/mean_terminated_length": 567.1207885742188, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 0.1283330125008021, "grad_norm": 0.12954175472259521, "learning_rate": 2e-07, "loss": 0.0407, "num_tokens": 885717118.0, "reward": 0.668526828289032, "reward_std": 0.17153619229793549, "rewards/simpleverify_reward/mean": 0.6685267686843872, "rewards/simpleverify_reward/std": 0.4710056483745575, "step": 1375 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3261.0, "completions/mean_length": 664.0569458007812, "completions/mean_terminated_length": 621.4000244140625, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 0.12842634560080265, "grad_norm": 0.11930208653211594, "learning_rate": 2e-07, "loss": 0.0209, "num_tokens": 886402473.0, "reward": 0.5658482313156128, "reward_std": 0.17495183646678925, "rewards/simpleverify_reward/mean": 0.5658482313156128, "rewards/simpleverify_reward/std": 0.49592188000679016, "step": 1376 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3639.0, "completions/mean_length": 619.341552734375, "completions/mean_terminated_length": 591.96630859375, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 0.12851967870080325, "grad_norm": 0.11839012056589127, "learning_rate": 2e-07, "loss": 0.0288, "num_tokens": 887047147.0, "reward": 0.5959821939468384, "reward_std": 0.16894076764583588, "rewards/simpleverify_reward/mean": 0.5959821343421936, "rewards/simpleverify_reward/std": 0.490975022315979, "step": 1377 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3096.0, "completions/mean_length": 589.7366333007812, "completions/mean_terminated_length": 530.0386352539062, "completions/min_length": 123.0, "completions/min_terminated_length": 123.0, "epoch": 0.12861301180080384, "grad_norm": 0.10701869428157806, "learning_rate": 2e-07, "loss": 0.0228, "num_tokens": 887664071.0, "reward": 0.6707589626312256, "reward_std": 0.14376294612884521, "rewards/simpleverify_reward/mean": 0.6707589030265808, "rewards/simpleverify_reward/std": 0.4702001214027405, "step": 1378 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3611.0, "completions/mean_length": 657.1272583007812, "completions/mean_terminated_length": 578.6141357421875, "completions/min_length": 153.0, "completions/min_terminated_length": 153.0, "epoch": 0.1287063449008044, "grad_norm": 0.14175699651241302, "learning_rate": 2e-07, "loss": 0.0591, "num_tokens": 888339577.0, "reward": 0.6383928656578064, "reward_std": 0.22150886058807373, "rewards/simpleverify_reward/mean": 0.6383928656578064, "rewards/simpleverify_reward/std": 0.4807341992855072, "step": 1379 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0033482142857143016, "completions/max_length": 4096.0, "completions/max_terminated_length": 2797.0, "completions/mean_length": 598.7522583007812, "completions/mean_terminated_length": 587.0033569335938, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 0.128799678000805, "grad_norm": 0.12549327313899994, "learning_rate": 2e-07, "loss": 0.0181, "num_tokens": 888969275.0, "reward": 0.6328125, "reward_std": 0.17356063425540924, "rewards/simpleverify_reward/mean": 0.6328125, "rewards/simpleverify_reward/std": 0.48230743408203125, "step": 1380 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2397.0, "completions/mean_length": 604.771240234375, "completions/mean_terminated_length": 569.3472290039062, "completions/min_length": 94.0, "completions/min_terminated_length": 94.0, "epoch": 0.1288930111008056, "grad_norm": 0.13395601511001587, "learning_rate": 2e-07, "loss": 0.0344, "num_tokens": 889603574.0, "reward": 0.6462053656578064, "reward_std": 0.2025703340768814, "rewards/simpleverify_reward/mean": 0.6462053656578064, "rewards/simpleverify_reward/std": 0.478413462638855, "step": 1381 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3964.0, "completions/mean_length": 599.3939819335938, "completions/mean_terminated_length": 559.9288940429688, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 0.12898634420080615, "grad_norm": 0.11713235080242157, "learning_rate": 2e-07, "loss": 0.0402, "num_tokens": 890226135.0, "reward": 0.6339285969734192, "reward_std": 0.18160006403923035, "rewards/simpleverify_reward/mean": 0.6339285969734192, "rewards/simpleverify_reward/std": 0.48199835419654846, "step": 1382 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3387.0, "completions/mean_length": 639.138427734375, "completions/mean_terminated_length": 600.1218872070312, "completions/min_length": 107.0, "completions/min_terminated_length": 107.0, "epoch": 0.12907967730080674, "grad_norm": 0.12661172449588776, "learning_rate": 2e-07, "loss": 0.0421, "num_tokens": 890883819.0, "reward": 0.6015625, "reward_std": 0.208502858877182, "rewards/simpleverify_reward/mean": 0.6015625, "rewards/simpleverify_reward/std": 0.48984986543655396, "step": 1383 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 4037.0, "completions/mean_length": 669.552490234375, "completions/mean_terminated_length": 626.9638671875, "completions/min_length": 119.0, "completions/min_terminated_length": 119.0, "epoch": 0.12917301040080734, "grad_norm": 0.12823480367660522, "learning_rate": 2e-07, "loss": 0.0363, "num_tokens": 891584074.0, "reward": 0.5725446939468384, "reward_std": 0.21124592423439026, "rewards/simpleverify_reward/mean": 0.5725446343421936, "rewards/simpleverify_reward/std": 0.49498558044433594, "step": 1384 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3475.0, "completions/mean_length": 596.109375, "completions/mean_terminated_length": 568.5512084960938, "completions/min_length": 87.0, "completions/min_terminated_length": 87.0, "epoch": 0.1292663435008079, "grad_norm": 0.1237480491399765, "learning_rate": 2e-07, "loss": -0.0044, "num_tokens": 892211780.0, "reward": 0.6227678656578064, "reward_std": 0.1715315878391266, "rewards/simpleverify_reward/mean": 0.6227678656578064, "rewards/simpleverify_reward/std": 0.4849644899368286, "step": 1385 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3658.0, "completions/mean_length": 671.2254638671875, "completions/mean_terminated_length": 612.9149169921875, "completions/min_length": 171.0, "completions/min_terminated_length": 171.0, "epoch": 0.1293596766008085, "grad_norm": 0.12146637588739395, "learning_rate": 2e-07, "loss": 0.0207, "num_tokens": 892898350.0, "reward": 0.6261160969734192, "reward_std": 0.19242431223392487, "rewards/simpleverify_reward/mean": 0.6261160969734192, "rewards/simpleverify_reward/std": 0.48410359025001526, "step": 1386 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3627.0, "completions/mean_length": 682.357177734375, "completions/mean_terminated_length": 612.3735961914062, "completions/min_length": 108.0, "completions/min_terminated_length": 108.0, "epoch": 0.12945300970080909, "grad_norm": 0.12013179063796997, "learning_rate": 2e-07, "loss": 0.0216, "num_tokens": 893599526.0, "reward": 0.5814732313156128, "reward_std": 0.1742025464773178, "rewards/simpleverify_reward/mean": 0.5814732313156128, "rewards/simpleverify_reward/std": 0.4935929775238037, "step": 1387 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3129.0, "completions/mean_length": 675.193115234375, "completions/mean_terminated_length": 620.8945922851562, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 0.12954634280080968, "grad_norm": 0.13046297430992126, "learning_rate": 2e-07, "loss": 0.0358, "num_tokens": 894280467.0, "reward": 0.621651828289032, "reward_std": 0.1829584687948227, "rewards/simpleverify_reward/mean": 0.6216517686843872, "rewards/simpleverify_reward/std": 0.4852459728717804, "step": 1388 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2685.0, "completions/mean_length": 658.5346069335938, "completions/mean_terminated_length": 603.9716796875, "completions/min_length": 168.0, "completions/min_terminated_length": 168.0, "epoch": 0.12963967590081024, "grad_norm": 0.12049389630556107, "learning_rate": 2e-07, "loss": 0.022, "num_tokens": 894962002.0, "reward": 0.5803571939468384, "reward_std": 0.18352048099040985, "rewards/simpleverify_reward/mean": 0.5803571343421936, "rewards/simpleverify_reward/std": 0.4937761425971985, "step": 1389 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2444.0, "completions/mean_length": 600.8058471679688, "completions/mean_terminated_length": 557.3627319335938, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 0.12973300900081083, "grad_norm": 0.1226712092757225, "learning_rate": 2e-07, "loss": 0.033, "num_tokens": 895592644.0, "reward": 0.6662946939468384, "reward_std": 0.1890418827533722, "rewards/simpleverify_reward/mean": 0.6662946343421936, "rewards/simpleverify_reward/std": 0.47179925441741943, "step": 1390 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 1946.0, "completions/mean_length": 617.1160888671875, "completions/mean_terminated_length": 569.8914184570312, "completions/min_length": 165.0, "completions/min_terminated_length": 165.0, "epoch": 0.12982634210081143, "grad_norm": 0.1311003714799881, "learning_rate": 2e-07, "loss": 0.0194, "num_tokens": 896228852.0, "reward": 0.5915178656578064, "reward_std": 0.18772777915000916, "rewards/simpleverify_reward/mean": 0.5915178656578064, "rewards/simpleverify_reward/std": 0.49182769656181335, "step": 1391 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3410.0, "completions/mean_length": 707.0670166015625, "completions/mean_terminated_length": 637.5900268554688, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 0.129919675200812, "grad_norm": 0.11693765223026276, "learning_rate": 2e-07, "loss": 0.03, "num_tokens": 896947416.0, "reward": 0.559151828289032, "reward_std": 0.21571533381938934, "rewards/simpleverify_reward/mean": 0.5591517686843872, "rewards/simpleverify_reward/std": 0.496766060590744, "step": 1392 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3587.0, "completions/mean_length": 637.6373291015625, "completions/mean_terminated_length": 590.6912231445312, "completions/min_length": 153.0, "completions/min_terminated_length": 153.0, "epoch": 0.13001300830081258, "grad_norm": 0.1310821920633316, "learning_rate": 2e-07, "loss": 0.0481, "num_tokens": 897605003.0, "reward": 0.6015625, "reward_std": 0.2235340029001236, "rewards/simpleverify_reward/mean": 0.6015625, "rewards/simpleverify_reward/std": 0.48984986543655396, "step": 1393 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3442.0, "completions/mean_length": 591.2745971679688, "completions/mean_terminated_length": 563.6782836914062, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 0.13010634140081317, "grad_norm": 0.13613159954547882, "learning_rate": 2e-07, "loss": 0.0064, "num_tokens": 898227337.0, "reward": 0.613839328289032, "reward_std": 0.19959653913974762, "rewards/simpleverify_reward/mean": 0.6138392686843872, "rewards/simpleverify_reward/std": 0.48714008927345276, "step": 1394 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2370.0, "completions/mean_length": 621.6015625, "completions/mean_terminated_length": 550.3724365234375, "completions/min_length": 119.0, "completions/min_terminated_length": 119.0, "epoch": 0.13019967450081374, "grad_norm": 0.13210874795913696, "learning_rate": 2e-07, "loss": 0.0363, "num_tokens": 898872756.0, "reward": 0.6037946939468384, "reward_std": 0.20143984258174896, "rewards/simpleverify_reward/mean": 0.6037946343421936, "rewards/simpleverify_reward/std": 0.48938122391700745, "step": 1395 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3319.0, "completions/mean_length": 661.2232666015625, "completions/mean_terminated_length": 602.7423706054688, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 0.13029300760081433, "grad_norm": 0.10326217114925385, "learning_rate": 2e-07, "loss": 0.0347, "num_tokens": 899554964.0, "reward": 0.582589328289032, "reward_std": 0.15108488500118256, "rewards/simpleverify_reward/mean": 0.5825892686843872, "rewards/simpleverify_reward/std": 0.4934072494506836, "step": 1396 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 4014.0, "completions/mean_length": 642.0201416015625, "completions/mean_terminated_length": 599.0892944335938, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 0.13038634070081492, "grad_norm": 0.133042111992836, "learning_rate": 2e-07, "loss": 0.0289, "num_tokens": 900223358.0, "reward": 0.6149553656578064, "reward_std": 0.19645120203495026, "rewards/simpleverify_reward/mean": 0.6149553656578064, "rewards/simpleverify_reward/std": 0.4868776500225067, "step": 1397 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3401.0, "completions/mean_length": 643.075927734375, "completions/mean_terminated_length": 604.1038818359375, "completions/min_length": 161.0, "completions/min_terminated_length": 161.0, "epoch": 0.1304796738008155, "grad_norm": 0.12300930917263031, "learning_rate": 2e-07, "loss": 0.0023, "num_tokens": 900884514.0, "reward": 0.5926339626312256, "reward_std": 0.1703290492296219, "rewards/simpleverify_reward/mean": 0.5926339030265808, "rewards/simpleverify_reward/std": 0.49161845445632935, "step": 1398 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2548.0, "completions/mean_length": 598.7824096679688, "completions/mean_terminated_length": 555.3141479492188, "completions/min_length": 108.0, "completions/min_terminated_length": 108.0, "epoch": 0.13057300690081608, "grad_norm": 0.1304854303598404, "learning_rate": 2e-07, "loss": 0.0243, "num_tokens": 901514823.0, "reward": 0.5881696939468384, "reward_std": 0.16672906279563904, "rewards/simpleverify_reward/mean": 0.5881696343421936, "rewards/simpleverify_reward/std": 0.4924396276473999, "step": 1399 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2100.0, "completions/mean_length": 642.65625, "completions/mean_terminated_length": 599.7333374023438, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 0.13066634000081667, "grad_norm": 0.1211719661951065, "learning_rate": 2e-07, "loss": 0.0135, "num_tokens": 902175011.0, "reward": 0.6082589626312256, "reward_std": 0.1743113398551941, "rewards/simpleverify_reward/mean": 0.6082589030265808, "rewards/simpleverify_reward/std": 0.4884119927883148, "step": 1400 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2303.0, "completions/mean_length": 633.7924194335938, "completions/mean_terminated_length": 578.8367309570312, "completions/min_length": 101.0, "completions/min_terminated_length": 101.0, "epoch": 0.13075967310081724, "grad_norm": 0.12886077165603638, "learning_rate": 2e-07, "loss": 0.0226, "num_tokens": 902837249.0, "reward": 0.613839328289032, "reward_std": 0.18460635840892792, "rewards/simpleverify_reward/mean": 0.6138392686843872, "rewards/simpleverify_reward/std": 0.48714008927345276, "step": 1401 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3705.0, "completions/mean_length": 661.0592041015625, "completions/mean_terminated_length": 594.6268310546875, "completions/min_length": 108.0, "completions/min_terminated_length": 108.0, "epoch": 0.13085300620081783, "grad_norm": 0.12362059205770493, "learning_rate": 2e-07, "loss": 0.0196, "num_tokens": 903511238.0, "reward": 0.515625, "reward_std": 0.18795417249202728, "rewards/simpleverify_reward/mean": 0.515625, "rewards/simpleverify_reward/std": 0.5000349283218384, "step": 1402 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.006696428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3003.0, "completions/mean_length": 587.380615234375, "completions/mean_terminated_length": 563.7269897460938, "completions/min_length": 165.0, "completions/min_terminated_length": 165.0, "epoch": 0.13094633930081842, "grad_norm": 0.1420280635356903, "learning_rate": 2e-07, "loss": 0.0199, "num_tokens": 904129291.0, "reward": 0.613839328289032, "reward_std": 0.20834991335868835, "rewards/simpleverify_reward/mean": 0.6138392686843872, "rewards/simpleverify_reward/std": 0.48714008927345276, "step": 1403 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3751.0, "completions/mean_length": 689.1217041015625, "completions/mean_terminated_length": 619.2767944335938, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 0.13103967240081899, "grad_norm": 0.1302737444639206, "learning_rate": 2e-07, "loss": 0.0513, "num_tokens": 904839464.0, "reward": 0.5189732313156128, "reward_std": 0.19561496376991272, "rewards/simpleverify_reward/mean": 0.5189732313156128, "rewards/simpleverify_reward/std": 0.49991893768310547, "step": 1404 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3535.0, "completions/mean_length": 620.1395263671875, "completions/mean_terminated_length": 548.8804321289062, "completions/min_length": 178.0, "completions/min_terminated_length": 178.0, "epoch": 0.13113300550081958, "grad_norm": 0.13175493478775024, "learning_rate": 2e-07, "loss": 0.0413, "num_tokens": 905484109.0, "reward": 0.5959821939468384, "reward_std": 0.2011374682188034, "rewards/simpleverify_reward/mean": 0.5959821343421936, "rewards/simpleverify_reward/std": 0.490975022315979, "step": 1405 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3589.0, "completions/mean_length": 623.3303833007812, "completions/mean_terminated_length": 576.1900634765625, "completions/min_length": 117.0, "completions/min_terminated_length": 117.0, "epoch": 0.13122633860082017, "grad_norm": 0.12277764827013016, "learning_rate": 2e-07, "loss": 0.0305, "num_tokens": 906124741.0, "reward": 0.668526828289032, "reward_std": 0.17179329693317413, "rewards/simpleverify_reward/mean": 0.6685267686843872, "rewards/simpleverify_reward/std": 0.4710056483745575, "step": 1406 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3358.0, "completions/mean_length": 645.984375, "completions/mean_terminated_length": 575.255126953125, "completions/min_length": 133.0, "completions/min_terminated_length": 133.0, "epoch": 0.13131967170082076, "grad_norm": 0.12886002659797668, "learning_rate": 2e-07, "loss": 0.0175, "num_tokens": 906794655.0, "reward": 0.5636160969734192, "reward_std": 0.1921658217906952, "rewards/simpleverify_reward/mean": 0.5636160969734192, "rewards/simpleverify_reward/std": 0.49621346592903137, "step": 1407 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3728.0, "completions/mean_length": 622.9777221679688, "completions/mean_terminated_length": 563.8456420898438, "completions/min_length": 162.0, "completions/min_terminated_length": 162.0, "epoch": 0.13141300480082133, "grad_norm": 0.13096526265144348, "learning_rate": 2e-07, "loss": 0.0434, "num_tokens": 907444499.0, "reward": 0.6316964626312256, "reward_std": 0.20121414959430695, "rewards/simpleverify_reward/mean": 0.6316964030265808, "rewards/simpleverify_reward/std": 0.4826137125492096, "step": 1408 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.004464285714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3046.0, "completions/mean_length": 530.9921875, "completions/mean_terminated_length": 515.005615234375, "completions/min_length": 108.0, "completions/min_terminated_length": 108.0, "epoch": 0.13150633790082192, "grad_norm": 0.13295531272888184, "learning_rate": 2e-07, "loss": 0.0195, "num_tokens": 908005092.0, "reward": 0.7008928656578064, "reward_std": 0.1738220751285553, "rewards/simpleverify_reward/mean": 0.7008928656578064, "rewards/simpleverify_reward/std": 0.458122581243515, "step": 1409 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 4096.0, "completions/max_terminated_length": 4057.0, "completions/mean_length": 711.4085083007812, "completions/mean_terminated_length": 602.2280883789062, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 0.1315996710008225, "grad_norm": 0.12220402806997299, "learning_rate": 2e-07, "loss": 0.0549, "num_tokens": 908726570.0, "reward": 0.5792410969734192, "reward_std": 0.19396665692329407, "rewards/simpleverify_reward/mean": 0.5792410969734192, "rewards/simpleverify_reward/std": 0.49395665526390076, "step": 1410 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3382.0, "completions/mean_length": 646.2042846679688, "completions/mean_terminated_length": 575.4795532226562, "completions/min_length": 115.0, "completions/min_terminated_length": 115.0, "epoch": 0.13169300410082307, "grad_norm": 0.12950937449932098, "learning_rate": 2e-07, "loss": 0.05, "num_tokens": 909388665.0, "reward": 0.6484375, "reward_std": 0.19918283820152283, "rewards/simpleverify_reward/mean": 0.6484375, "rewards/simpleverify_reward/std": 0.4777248501777649, "step": 1411 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 2652.0, "completions/mean_length": 593.8873291015625, "completions/mean_terminated_length": 566.3115844726562, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 0.13178633720082367, "grad_norm": 0.1412707269191742, "learning_rate": 2e-07, "loss": 0.0419, "num_tokens": 910010540.0, "reward": 0.6595982313156128, "reward_std": 0.200084388256073, "rewards/simpleverify_reward/mean": 0.6595982313156128, "rewards/simpleverify_reward/std": 0.4741089344024658, "step": 1412 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3524.0, "completions/mean_length": 604.2723388671875, "completions/mean_terminated_length": 572.8153076171875, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 0.13187967030082426, "grad_norm": 0.12227712571620941, "learning_rate": 2e-07, "loss": 0.0228, "num_tokens": 910630760.0, "reward": 0.6439732313156128, "reward_std": 0.18606878817081451, "rewards/simpleverify_reward/mean": 0.6439732313156128, "rewards/simpleverify_reward/std": 0.47909072041511536, "step": 1413 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3469.0, "completions/mean_length": 600.0335083007812, "completions/mean_terminated_length": 520.2168579101562, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 0.13197300340082482, "grad_norm": 0.13441546261310577, "learning_rate": 2e-07, "loss": 0.0407, "num_tokens": 911249198.0, "reward": 0.6629464626312256, "reward_std": 0.17679403722286224, "rewards/simpleverify_reward/mean": 0.6629464030265808, "rewards/simpleverify_reward/std": 0.47296738624572754, "step": 1414 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3762.0, "completions/mean_length": 662.7265625, "completions/mean_terminated_length": 596.3264770507812, "completions/min_length": 137.0, "completions/min_terminated_length": 137.0, "epoch": 0.13206633650082542, "grad_norm": 0.11893344670534134, "learning_rate": 2e-07, "loss": 0.0251, "num_tokens": 911930297.0, "reward": 0.5524553656578064, "reward_std": 0.1719798743724823, "rewards/simpleverify_reward/mean": 0.5524553656578064, "rewards/simpleverify_reward/std": 0.49751853942871094, "step": 1415 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3221.0, "completions/mean_length": 667.6473388671875, "completions/mean_terminated_length": 621.108642578125, "completions/min_length": 121.0, "completions/min_terminated_length": 121.0, "epoch": 0.132159669600826, "grad_norm": 0.12583136558532715, "learning_rate": 2e-07, "loss": 0.0156, "num_tokens": 912614461.0, "reward": 0.6116071939468384, "reward_std": 0.1971551775932312, "rewards/simpleverify_reward/mean": 0.6116071343421936, "rewards/simpleverify_reward/std": 0.48765692114830017, "step": 1416 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3777.0, "completions/mean_length": 589.1629638671875, "completions/mean_terminated_length": 549.5823974609375, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 0.13225300270082657, "grad_norm": 0.1292410045862198, "learning_rate": 2e-07, "loss": 0.0359, "num_tokens": 913221903.0, "reward": 0.6495535969734192, "reward_std": 0.18242602050304413, "rewards/simpleverify_reward/mean": 0.6495535969734192, "rewards/simpleverify_reward/std": 0.477376252412796, "step": 1417 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3851.0, "completions/mean_length": 672.8214721679688, "completions/mean_terminated_length": 606.6165771484375, "completions/min_length": 156.0, "completions/min_terminated_length": 156.0, "epoch": 0.13234633580082716, "grad_norm": 0.12406592071056366, "learning_rate": 2e-07, "loss": 0.0366, "num_tokens": 913907767.0, "reward": 0.590401828289032, "reward_std": 0.19569163024425507, "rewards/simpleverify_reward/mean": 0.5904017686843872, "rewards/simpleverify_reward/std": 0.49203425645828247, "step": 1418 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3216.0, "completions/mean_length": 621.5078125, "completions/mean_terminated_length": 586.253662109375, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 0.13243966890082776, "grad_norm": 0.1332419067621231, "learning_rate": 2e-07, "loss": 0.0326, "num_tokens": 914555398.0, "reward": 0.6395089626312256, "reward_std": 0.18731659650802612, "rewards/simpleverify_reward/mean": 0.6395089030265808, "rewards/simpleverify_reward/std": 0.4804111421108246, "step": 1419 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3817.0, "completions/mean_length": 702.9955444335938, "completions/mean_terminated_length": 625.5296630859375, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 0.13253300200082832, "grad_norm": 0.11821720004081726, "learning_rate": 2e-07, "loss": 0.0254, "num_tokens": 915278234.0, "reward": 0.5558035969734192, "reward_std": 0.17190389335155487, "rewards/simpleverify_reward/mean": 0.5558035969734192, "rewards/simpleverify_reward/std": 0.49715372920036316, "step": 1420 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.004464285714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3569.0, "completions/mean_length": 635.7210083007812, "completions/mean_terminated_length": 620.2040405273438, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 0.1326263351008289, "grad_norm": 0.12074993550777435, "learning_rate": 2e-07, "loss": 0.0244, "num_tokens": 915942512.0, "reward": 0.5814732313156128, "reward_std": 0.18832439184188843, "rewards/simpleverify_reward/mean": 0.5814732313156128, "rewards/simpleverify_reward/std": 0.4935929775238037, "step": 1421 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3144.0, "completions/mean_length": 620.7522583007812, "completions/mean_terminated_length": 581.5282592773438, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 0.1327196682008295, "grad_norm": 0.11828422546386719, "learning_rate": 2e-07, "loss": 0.0279, "num_tokens": 916585162.0, "reward": 0.6071428656578064, "reward_std": 0.16221432387828827, "rewards/simpleverify_reward/mean": 0.6071428656578064, "rewards/simpleverify_reward/std": 0.48865827918052673, "step": 1422 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2327.0, "completions/mean_length": 636.0803833007812, "completions/mean_terminated_length": 585.1415405273438, "completions/min_length": 181.0, "completions/min_terminated_length": 181.0, "epoch": 0.13281300130083007, "grad_norm": 0.1309249848127365, "learning_rate": 2e-07, "loss": 0.0314, "num_tokens": 917245418.0, "reward": 0.5848214626312256, "reward_std": 0.18994158506393433, "rewards/simpleverify_reward/mean": 0.5848214030265808, "rewards/simpleverify_reward/std": 0.49302801489830017, "step": 1423 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3091.0, "completions/mean_length": 652.9330444335938, "completions/mean_terminated_length": 586.3435668945312, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 0.13290633440083066, "grad_norm": 0.11701464653015137, "learning_rate": 2e-07, "loss": 0.0251, "num_tokens": 917928142.0, "reward": 0.6149553656578064, "reward_std": 0.15454469621181488, "rewards/simpleverify_reward/mean": 0.6149553656578064, "rewards/simpleverify_reward/std": 0.4868776500225067, "step": 1424 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2604.0, "completions/mean_length": 647.9230346679688, "completions/mean_terminated_length": 601.1165161132812, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 0.13299966750083125, "grad_norm": 0.1186644658446312, "learning_rate": 2e-07, "loss": 0.036, "num_tokens": 918604393.0, "reward": 0.5569196939468384, "reward_std": 0.16795440018177032, "rewards/simpleverify_reward/mean": 0.5569196343421936, "rewards/simpleverify_reward/std": 0.49702703952789307, "step": 1425 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2572.0, "completions/mean_length": 618.0123291015625, "completions/mean_terminated_length": 570.7998046875, "completions/min_length": 118.0, "completions/min_terminated_length": 118.0, "epoch": 0.13309300060083182, "grad_norm": 0.14053338766098022, "learning_rate": 2e-07, "loss": 0.0274, "num_tokens": 919241116.0, "reward": 0.6127232313156128, "reward_std": 0.22413496673107147, "rewards/simpleverify_reward/mean": 0.6127232313156128, "rewards/simpleverify_reward/std": 0.4873998463153839, "step": 1426 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 4005.0, "completions/mean_length": 660.2388916015625, "completions/mean_terminated_length": 605.7029418945312, "completions/min_length": 174.0, "completions/min_terminated_length": 174.0, "epoch": 0.1331863337008324, "grad_norm": 0.12159045785665512, "learning_rate": 2e-07, "loss": 0.0205, "num_tokens": 919929010.0, "reward": 0.5837053656578064, "reward_std": 0.17382529377937317, "rewards/simpleverify_reward/mean": 0.5837053656578064, "rewards/simpleverify_reward/std": 0.49321895837783813, "step": 1427 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2133.0, "completions/mean_length": 623.7645263671875, "completions/mean_terminated_length": 580.6068115234375, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 0.133279666800833, "grad_norm": 0.12859614193439484, "learning_rate": 2e-07, "loss": 0.0239, "num_tokens": 920578519.0, "reward": 0.598214328289032, "reward_std": 0.20421679317951202, "rewards/simpleverify_reward/mean": 0.5982142686843872, "rewards/simpleverify_reward/std": 0.49053287506103516, "step": 1428 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2742.0, "completions/mean_length": 658.1361694335938, "completions/mean_terminated_length": 607.5220947265625, "completions/min_length": 24.0, "completions/min_terminated_length": 24.0, "epoch": 0.1333729999008336, "grad_norm": 0.12027197331190109, "learning_rate": 2e-07, "loss": 0.0208, "num_tokens": 921259489.0, "reward": 0.5323660969734192, "reward_std": 0.1909932643175125, "rewards/simpleverify_reward/mean": 0.5323660969734192, "rewards/simpleverify_reward/std": 0.4992299973964691, "step": 1429 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3497.0, "completions/mean_length": 665.739990234375, "completions/mean_terminated_length": 603.37158203125, "completions/min_length": 85.0, "completions/min_terminated_length": 85.0, "epoch": 0.13346633300083416, "grad_norm": 0.11474312841892242, "learning_rate": 2e-07, "loss": 0.0215, "num_tokens": 921941328.0, "reward": 0.6116071939468384, "reward_std": 0.17461484670639038, "rewards/simpleverify_reward/mean": 0.6116071343421936, "rewards/simpleverify_reward/std": 0.4876568913459778, "step": 1430 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 4021.0, "completions/mean_length": 605.5279541015625, "completions/mean_terminated_length": 562.1434936523438, "completions/min_length": 118.0, "completions/min_terminated_length": 118.0, "epoch": 0.13355966610083475, "grad_norm": 0.1247917115688324, "learning_rate": 2e-07, "loss": 0.0234, "num_tokens": 922557665.0, "reward": 0.6629464626312256, "reward_std": 0.20365296304225922, "rewards/simpleverify_reward/mean": 0.6629464030265808, "rewards/simpleverify_reward/std": 0.47296738624572754, "step": 1431 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2124.0, "completions/mean_length": 588.0513916015625, "completions/mean_terminated_length": 544.44970703125, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 0.13365299920083534, "grad_norm": 0.12656952440738678, "learning_rate": 2e-07, "loss": 0.0339, "num_tokens": 923169135.0, "reward": 0.6339285969734192, "reward_std": 0.17292305827140808, "rewards/simpleverify_reward/mean": 0.6339285969734192, "rewards/simpleverify_reward/std": 0.48199835419654846, "step": 1432 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2689.0, "completions/mean_length": 608.2288208007812, "completions/mean_terminated_length": 548.8456420898438, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 0.1337463323008359, "grad_norm": 0.11610066890716553, "learning_rate": 2e-07, "loss": 0.0164, "num_tokens": 923805236.0, "reward": 0.5948660969734192, "reward_std": 0.14410065114498138, "rewards/simpleverify_reward/mean": 0.5948660969734192, "rewards/simpleverify_reward/std": 0.49119213223457336, "step": 1433 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2453.0, "completions/mean_length": 645.53125, "completions/mean_terminated_length": 594.7315673828125, "completions/min_length": 159.0, "completions/min_terminated_length": 159.0, "epoch": 0.1338396654008365, "grad_norm": 0.11723129451274872, "learning_rate": 2e-07, "loss": 0.0165, "num_tokens": 924476320.0, "reward": 0.5524553656578064, "reward_std": 0.18596114218235016, "rewards/simpleverify_reward/mean": 0.5524553656578064, "rewards/simpleverify_reward/std": 0.49751853942871094, "step": 1434 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2420.0, "completions/mean_length": 623.171875, "completions/mean_terminated_length": 564.0431518554688, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 0.1339329985008371, "grad_norm": 0.12509533762931824, "learning_rate": 2e-07, "loss": 0.0219, "num_tokens": 925125218.0, "reward": 0.6049107313156128, "reward_std": 0.18498222529888153, "rewards/simpleverify_reward/mean": 0.6049107313156128, "rewards/simpleverify_reward/std": 0.48914292454719543, "step": 1435 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3768.0, "completions/mean_length": 686.0614013671875, "completions/mean_terminated_length": 624.0625, "completions/min_length": 115.0, "completions/min_terminated_length": 115.0, "epoch": 0.13402633160083766, "grad_norm": 0.10262620449066162, "learning_rate": 2e-07, "loss": 0.0032, "num_tokens": 925830753.0, "reward": 0.53125, "reward_std": 0.16946254670619965, "rewards/simpleverify_reward/mean": 0.53125, "rewards/simpleverify_reward/std": 0.4993011951446533, "step": 1436 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3317.0, "completions/mean_length": 715.3404541015625, "completions/mean_terminated_length": 669.4490966796875, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 0.13411966470083825, "grad_norm": 0.11328788101673126, "learning_rate": 2e-07, "loss": 0.0422, "num_tokens": 926563346.0, "reward": 0.5234375, "reward_std": 0.20297828316688538, "rewards/simpleverify_reward/mean": 0.5234375, "rewards/simpleverify_reward/std": 0.49972933530807495, "step": 1437 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3211.0, "completions/mean_length": 675.997802734375, "completions/mean_terminated_length": 629.5724487304688, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 0.13421299780083884, "grad_norm": 0.10888991504907608, "learning_rate": 2e-07, "loss": 0.0023, "num_tokens": 927274144.0, "reward": 0.5703125, "reward_std": 0.16307169198989868, "rewards/simpleverify_reward/mean": 0.5703125, "rewards/simpleverify_reward/std": 0.49530795216560364, "step": 1438 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 4086.0, "completions/mean_length": 646.375, "completions/mean_terminated_length": 607.440185546875, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 0.1343063309008394, "grad_norm": 0.12300126254558563, "learning_rate": 2e-07, "loss": 0.0303, "num_tokens": 927955880.0, "reward": 0.5658482313156128, "reward_std": 0.19557921588420868, "rewards/simpleverify_reward/mean": 0.5658482313156128, "rewards/simpleverify_reward/std": 0.49592188000679016, "step": 1439 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2153.0, "completions/mean_length": 634.5011596679688, "completions/mean_terminated_length": 563.5364990234375, "completions/min_length": 102.0, "completions/min_terminated_length": 102.0, "epoch": 0.13439966400084, "grad_norm": 0.11340688914060593, "learning_rate": 2e-07, "loss": 0.0515, "num_tokens": 928606553.0, "reward": 0.6462053656578064, "reward_std": 0.15236549079418182, "rewards/simpleverify_reward/mean": 0.6462053656578064, "rewards/simpleverify_reward/std": 0.478413462638855, "step": 1440 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3658.0, "completions/mean_length": 623.1295166015625, "completions/mean_terminated_length": 587.8917236328125, "completions/min_length": 118.0, "completions/min_terminated_length": 118.0, "epoch": 0.1344929971008406, "grad_norm": 0.11792391538619995, "learning_rate": 2e-07, "loss": 0.0201, "num_tokens": 929259693.0, "reward": 0.6205357313156128, "reward_std": 0.1857328861951828, "rewards/simpleverify_reward/mean": 0.6205357313156128, "rewards/simpleverify_reward/std": 0.4855247139930725, "step": 1441 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3619.0, "completions/mean_length": 629.6373291015625, "completions/mean_terminated_length": 594.465576171875, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 0.13458633020084115, "grad_norm": 0.12498829513788223, "learning_rate": 2e-07, "loss": 0.0089, "num_tokens": 929912256.0, "reward": 0.6037946939468384, "reward_std": 0.1866351217031479, "rewards/simpleverify_reward/mean": 0.6037946343421936, "rewards/simpleverify_reward/std": 0.48938122391700745, "step": 1442 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3549.0, "completions/mean_length": 692.919677734375, "completions/mean_terminated_length": 650.6214599609375, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 0.13467966330084175, "grad_norm": 0.12241464108228683, "learning_rate": 2e-07, "loss": 0.044, "num_tokens": 930624232.0, "reward": 0.5691964626312256, "reward_std": 0.22868923842906952, "rewards/simpleverify_reward/mean": 0.5691964030265808, "rewards/simpleverify_reward/std": 0.4954652488231659, "step": 1443 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2724.0, "completions/mean_length": 664.0814819335938, "completions/mean_terminated_length": 621.4248657226562, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 0.13477299640084234, "grad_norm": 0.12403342127799988, "learning_rate": 2e-07, "loss": 0.0293, "num_tokens": 931307681.0, "reward": 0.5703125, "reward_std": 0.19024761021137238, "rewards/simpleverify_reward/mean": 0.5703125, "rewards/simpleverify_reward/std": 0.49530795216560364, "step": 1444 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3030.0, "completions/mean_length": 656.0826416015625, "completions/mean_terminated_length": 597.5142211914062, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 0.1348663295008429, "grad_norm": 0.11408257484436035, "learning_rate": 2e-07, "loss": 0.0375, "num_tokens": 931996531.0, "reward": 0.5691964626312256, "reward_std": 0.17968933284282684, "rewards/simpleverify_reward/mean": 0.5691964030265808, "rewards/simpleverify_reward/std": 0.4954652786254883, "step": 1445 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3661.0, "completions/mean_length": 636.8504638671875, "completions/mean_terminated_length": 573.956787109375, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 0.1349596626008435, "grad_norm": 0.12599524855613708, "learning_rate": 2e-07, "loss": 0.0323, "num_tokens": 932650397.0, "reward": 0.6183035969734192, "reward_std": 0.1958431601524353, "rewards/simpleverify_reward/mean": 0.6183035969734192, "rewards/simpleverify_reward/std": 0.4860740303993225, "step": 1446 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3667.0, "completions/mean_length": 611.9241333007812, "completions/mean_terminated_length": 536.4423828125, "completions/min_length": 133.0, "completions/min_terminated_length": 133.0, "epoch": 0.1350529957008441, "grad_norm": 0.12522225081920624, "learning_rate": 2e-07, "loss": 0.0463, "num_tokens": 933277977.0, "reward": 0.645089328289032, "reward_std": 0.1754806488752365, "rewards/simpleverify_reward/mean": 0.6450892686843872, "rewards/simpleverify_reward/std": 0.4787535071372986, "step": 1447 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3886.0, "completions/mean_length": 670.9397583007812, "completions/mean_terminated_length": 608.6658935546875, "completions/min_length": 109.0, "completions/min_terminated_length": 109.0, "epoch": 0.13514632880084468, "grad_norm": 0.1246509701013565, "learning_rate": 2e-07, "loss": 0.0534, "num_tokens": 933968939.0, "reward": 0.5848214626312256, "reward_std": 0.20377424359321594, "rewards/simpleverify_reward/mean": 0.5848214030265808, "rewards/simpleverify_reward/std": 0.49302801489830017, "step": 1448 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2739.0, "completions/mean_length": 595.4006958007812, "completions/mean_terminated_length": 563.86376953125, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 0.13523966190084524, "grad_norm": 0.14604082703590393, "learning_rate": 2e-07, "loss": 0.0377, "num_tokens": 934585602.0, "reward": 0.6729910969734192, "reward_std": 0.20061752200126648, "rewards/simpleverify_reward/mean": 0.6729910969734192, "rewards/simpleverify_reward/std": 0.46938255429267883, "step": 1449 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3956.0, "completions/mean_length": 639.7600708007812, "completions/mean_terminated_length": 592.8427734375, "completions/min_length": 170.0, "completions/min_terminated_length": 170.0, "epoch": 0.13533299500084583, "grad_norm": 0.1185305044054985, "learning_rate": 2e-07, "loss": 0.0132, "num_tokens": 935247363.0, "reward": 0.5647321939468384, "reward_std": 0.17423394322395325, "rewards/simpleverify_reward/mean": 0.5647321343421936, "rewards/simpleverify_reward/std": 0.49606895446777344, "step": 1450 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3943.0, "completions/mean_length": 666.0256958007812, "completions/mean_terminated_length": 607.6265869140625, "completions/min_length": 91.0, "completions/min_terminated_length": 91.0, "epoch": 0.13542632810084643, "grad_norm": 0.1289321482181549, "learning_rate": 2e-07, "loss": 0.0256, "num_tokens": 935938698.0, "reward": 0.5569196939468384, "reward_std": 0.19306735694408417, "rewards/simpleverify_reward/mean": 0.5569196343421936, "rewards/simpleverify_reward/std": 0.4970270097255707, "step": 1451 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0033482142857143016, "completions/max_length": 4096.0, "completions/max_terminated_length": 3037.0, "completions/mean_length": 576.9788208007812, "completions/mean_terminated_length": 565.1567993164062, "completions/min_length": 118.0, "completions/min_terminated_length": 118.0, "epoch": 0.135519661200847, "grad_norm": 0.1100669726729393, "learning_rate": 2e-07, "loss": 0.0253, "num_tokens": 936541951.0, "reward": 0.652901828289032, "reward_std": 0.13519500195980072, "rewards/simpleverify_reward/mean": 0.6529017686843872, "rewards/simpleverify_reward/std": 0.47631317377090454, "step": 1452 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2864.0, "completions/mean_length": 617.0011596679688, "completions/mean_terminated_length": 569.77490234375, "completions/min_length": 177.0, "completions/min_terminated_length": 177.0, "epoch": 0.13561299430084758, "grad_norm": 0.13433493673801422, "learning_rate": 2e-07, "loss": 0.0248, "num_tokens": 937201112.0, "reward": 0.5758928656578064, "reward_std": 0.2036515772342682, "rewards/simpleverify_reward/mean": 0.5758928656578064, "rewards/simpleverify_reward/std": 0.49448275566101074, "step": 1453 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3950.0, "completions/mean_length": 672.9017944335938, "completions/mean_terminated_length": 618.56689453125, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 0.13570632740084818, "grad_norm": 0.13522236049175262, "learning_rate": 2e-07, "loss": 0.029, "num_tokens": 937895208.0, "reward": 0.559151828289032, "reward_std": 0.21733295917510986, "rewards/simpleverify_reward/mean": 0.5591517686843872, "rewards/simpleverify_reward/std": 0.496766060590744, "step": 1454 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3878.0, "completions/mean_length": 673.5335083007812, "completions/mean_terminated_length": 619.2086181640625, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 0.13579966050084874, "grad_norm": 0.1158934086561203, "learning_rate": 2e-07, "loss": 0.023, "num_tokens": 938582894.0, "reward": 0.5323660969734192, "reward_std": 0.18513627350330353, "rewards/simpleverify_reward/mean": 0.5323660969734192, "rewards/simpleverify_reward/std": 0.4992299973964691, "step": 1455 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2217.0, "completions/mean_length": 657.1004638671875, "completions/mean_terminated_length": 602.5147705078125, "completions/min_length": 157.0, "completions/min_terminated_length": 157.0, "epoch": 0.13589299360084933, "grad_norm": 0.12765467166900635, "learning_rate": 2e-07, "loss": 0.0262, "num_tokens": 939256432.0, "reward": 0.6082589626312256, "reward_std": 0.17506061494350433, "rewards/simpleverify_reward/mean": 0.6082589030265808, "rewards/simpleverify_reward/std": 0.4884119927883148, "step": 1456 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3216.0, "completions/mean_length": 674.625, "completions/mean_terminated_length": 620.3174438476562, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 0.13598632670084992, "grad_norm": 0.11614825576543808, "learning_rate": 2e-07, "loss": 0.0263, "num_tokens": 939960928.0, "reward": 0.5524553656578064, "reward_std": 0.1720554232597351, "rewards/simpleverify_reward/mean": 0.5524553656578064, "rewards/simpleverify_reward/std": 0.49751853942871094, "step": 1457 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2628.0, "completions/mean_length": 609.4609375, "completions/mean_terminated_length": 554.1190795898438, "completions/min_length": 172.0, "completions/min_terminated_length": 172.0, "epoch": 0.1360796598008505, "grad_norm": 0.13306598365306854, "learning_rate": 2e-07, "loss": 0.0275, "num_tokens": 940599205.0, "reward": 0.5736607313156128, "reward_std": 0.17521215975284576, "rewards/simpleverify_reward/mean": 0.5736607313156128, "rewards/simpleverify_reward/std": 0.4948205351829529, "step": 1458 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2667.0, "completions/mean_length": 600.4174194335938, "completions/mean_terminated_length": 568.9256591796875, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 0.13617299290085108, "grad_norm": 0.12402456998825073, "learning_rate": 2e-07, "loss": 0.0079, "num_tokens": 941219771.0, "reward": 0.6283482313156128, "reward_std": 0.18750205636024475, "rewards/simpleverify_reward/mean": 0.6283482313156128, "rewards/simpleverify_reward/std": 0.4835159182548523, "step": 1459 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3921.0, "completions/mean_length": 655.0379638671875, "completions/mean_terminated_length": 604.3782348632812, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 0.13626632600085167, "grad_norm": 0.12295740097761154, "learning_rate": 2e-07, "loss": 0.0208, "num_tokens": 941900125.0, "reward": 0.6350446939468384, "reward_std": 0.1744953840970993, "rewards/simpleverify_reward/mean": 0.6350446343421936, "rewards/simpleverify_reward/std": 0.4816865026950836, "step": 1460 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2852.0, "completions/mean_length": 642.5123291015625, "completions/mean_terminated_length": 587.6950073242188, "completions/min_length": 156.0, "completions/min_terminated_length": 156.0, "epoch": 0.13635965910085224, "grad_norm": 0.12390080839395523, "learning_rate": 2e-07, "loss": 0.0433, "num_tokens": 942567624.0, "reward": 0.5580357313156128, "reward_std": 0.17983382940292358, "rewards/simpleverify_reward/mean": 0.5580357313156128, "rewards/simpleverify_reward/std": 0.49689781665802, "step": 1461 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3846.0, "completions/mean_length": 641.2600708007812, "completions/mean_terminated_length": 586.4229125976562, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 0.13645299220085283, "grad_norm": 0.14044207334518433, "learning_rate": 2e-07, "loss": 0.0338, "num_tokens": 943227105.0, "reward": 0.5915178656578064, "reward_std": 0.2065505087375641, "rewards/simpleverify_reward/mean": 0.5915178656578064, "rewards/simpleverify_reward/std": 0.49182769656181335, "step": 1462 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2482.0, "completions/mean_length": 643.3928833007812, "completions/mean_terminated_length": 608.3607177734375, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 0.13654632530085342, "grad_norm": 0.13106797635555267, "learning_rate": 2e-07, "loss": 0.0235, "num_tokens": 943888377.0, "reward": 0.6339285969734192, "reward_std": 0.18002675473690033, "rewards/simpleverify_reward/mean": 0.6339285969734192, "rewards/simpleverify_reward/std": 0.48199835419654846, "step": 1463 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3431.0, "completions/mean_length": 657.0703125, "completions/mean_terminated_length": 626.0889892578125, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 0.13663965840085399, "grad_norm": 0.12070310860872269, "learning_rate": 2e-07, "loss": 0.0264, "num_tokens": 944572336.0, "reward": 0.609375, "reward_std": 0.18359604477882385, "rewards/simpleverify_reward/mean": 0.609375, "rewards/simpleverify_reward/std": 0.48816296458244324, "step": 1464 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3177.0, "completions/mean_length": 664.765625, "completions/mean_terminated_length": 594.4214477539062, "completions/min_length": 167.0, "completions/min_terminated_length": 167.0, "epoch": 0.13673299150085458, "grad_norm": 0.12044616043567657, "learning_rate": 2e-07, "loss": 0.0132, "num_tokens": 945263878.0, "reward": 0.527901828289032, "reward_std": 0.183966264128685, "rewards/simpleverify_reward/mean": 0.5279017686843872, "rewards/simpleverify_reward/std": 0.49949970841407776, "step": 1465 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2519.0, "completions/mean_length": 635.2467041015625, "completions/mean_terminated_length": 592.2316284179688, "completions/min_length": 107.0, "completions/min_terminated_length": 107.0, "epoch": 0.13682632460085517, "grad_norm": 0.1272178292274475, "learning_rate": 2e-07, "loss": 0.0231, "num_tokens": 945917723.0, "reward": 0.5926339626312256, "reward_std": 0.1703290343284607, "rewards/simpleverify_reward/mean": 0.5926339030265808, "rewards/simpleverify_reward/std": 0.49161845445632935, "step": 1466 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 2900.0, "completions/mean_length": 610.7120971679688, "completions/mean_terminated_length": 571.374755859375, "completions/min_length": 121.0, "completions/min_terminated_length": 121.0, "epoch": 0.13691965770085573, "grad_norm": 0.12701645493507385, "learning_rate": 2e-07, "loss": 0.0198, "num_tokens": 946553065.0, "reward": 0.5770089626312256, "reward_std": 0.20989929139614105, "rewards/simpleverify_reward/mean": 0.5770089030265808, "rewards/simpleverify_reward/std": 0.4943099617958069, "step": 1467 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 4024.0, "completions/mean_length": 627.4296875, "completions/mean_terminated_length": 596.1813354492188, "completions/min_length": 166.0, "completions/min_terminated_length": 166.0, "epoch": 0.13701299080085633, "grad_norm": 0.12397134304046631, "learning_rate": 2e-07, "loss": 0.0169, "num_tokens": 947207394.0, "reward": 0.5993303656578064, "reward_std": 0.17295695841312408, "rewards/simpleverify_reward/mean": 0.5993303656578064, "rewards/simpleverify_reward/std": 0.49030786752700806, "step": 1468 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2182.0, "completions/mean_length": 660.328125, "completions/mean_terminated_length": 605.7936401367188, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 0.13710632390085692, "grad_norm": 0.12292729318141937, "learning_rate": 2e-07, "loss": 0.0245, "num_tokens": 947890856.0, "reward": 0.578125, "reward_std": 0.20351210236549377, "rewards/simpleverify_reward/mean": 0.578125, "rewards/simpleverify_reward/std": 0.4941346049308777, "step": 1469 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.025669642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 4019.0, "completions/mean_length": 654.4765625, "completions/mean_terminated_length": 563.8064575195312, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 0.1371996570008575, "grad_norm": 0.15388168394565582, "learning_rate": 2e-07, "loss": 0.0255, "num_tokens": 948564051.0, "reward": 0.59375, "reward_std": 0.15341997146606445, "rewards/simpleverify_reward/mean": 0.59375, "rewards/simpleverify_reward/std": 0.4914066195487976, "step": 1470 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3919.0, "completions/mean_length": 691.4620971679688, "completions/mean_terminated_length": 609.7531127929688, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 0.13729299010085808, "grad_norm": 0.13217854499816895, "learning_rate": 2e-07, "loss": 0.0147, "num_tokens": 949273473.0, "reward": 0.5770089626312256, "reward_std": 0.22579464316368103, "rewards/simpleverify_reward/mean": 0.5770089030265808, "rewards/simpleverify_reward/std": 0.4943099319934845, "step": 1471 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3735.0, "completions/mean_length": 626.421875, "completions/mean_terminated_length": 579.3235473632812, "completions/min_length": 104.0, "completions/min_terminated_length": 104.0, "epoch": 0.13738632320085867, "grad_norm": 0.12586498260498047, "learning_rate": 2e-07, "loss": 0.0354, "num_tokens": 949925427.0, "reward": 0.6026785969734192, "reward_std": 0.17307530343532562, "rewards/simpleverify_reward/mean": 0.6026785969734192, "rewards/simpleverify_reward/std": 0.48961687088012695, "step": 1472 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2755.0, "completions/mean_length": 593.8627319335938, "completions/mean_terminated_length": 558.3280639648438, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 0.13747965630085926, "grad_norm": 0.1224343404173851, "learning_rate": 2e-07, "loss": 0.0251, "num_tokens": 950541872.0, "reward": 0.6339285969734192, "reward_std": 0.1692018061876297, "rewards/simpleverify_reward/mean": 0.6339285969734192, "rewards/simpleverify_reward/std": 0.48199835419654846, "step": 1473 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.006696428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2445.0, "completions/mean_length": 627.4185791015625, "completions/mean_terminated_length": 604.0348510742188, "completions/min_length": 161.0, "completions/min_terminated_length": 161.0, "epoch": 0.13757298940085982, "grad_norm": 0.12322679907083511, "learning_rate": 2e-07, "loss": 0.0164, "num_tokens": 951188367.0, "reward": 0.5792410969734192, "reward_std": 0.21485841274261475, "rewards/simpleverify_reward/mean": 0.5792410969734192, "rewards/simpleverify_reward/std": 0.49395665526390076, "step": 1474 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2407.0, "completions/mean_length": 655.864990234375, "completions/mean_terminated_length": 613.106201171875, "completions/min_length": 117.0, "completions/min_terminated_length": 117.0, "epoch": 0.13766632250086042, "grad_norm": 0.12540528178215027, "learning_rate": 2e-07, "loss": 0.0213, "num_tokens": 951876350.0, "reward": 0.5457589626312256, "reward_std": 0.1976444274187088, "rewards/simpleverify_reward/mean": 0.5457589030265808, "rewards/simpleverify_reward/std": 0.4981797933578491, "step": 1475 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2021.0, "completions/mean_length": 546.091552734375, "completions/mean_terminated_length": 510.0721435546875, "completions/min_length": 121.0, "completions/min_terminated_length": 121.0, "epoch": 0.137759655600861, "grad_norm": 0.135465607047081, "learning_rate": 2e-07, "loss": 0.0038, "num_tokens": 952445824.0, "reward": 0.6852678656578064, "reward_std": 0.1618356704711914, "rewards/simpleverify_reward/mean": 0.6852678656578064, "rewards/simpleverify_reward/std": 0.46466848254203796, "step": 1476 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3242.0, "completions/mean_length": 678.3449096679688, "completions/mean_terminated_length": 631.9513549804688, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 0.13785298870086157, "grad_norm": 0.11059506237506866, "learning_rate": 2e-07, "loss": 0.0274, "num_tokens": 953142933.0, "reward": 0.613839328289032, "reward_std": 0.1847560852766037, "rewards/simpleverify_reward/mean": 0.6138392686843872, "rewards/simpleverify_reward/std": 0.48714008927345276, "step": 1477 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3149.0, "completions/mean_length": 605.9933471679688, "completions/mean_terminated_length": 562.6146850585938, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 0.13794632180086216, "grad_norm": 0.1175321415066719, "learning_rate": 2e-07, "loss": 0.0113, "num_tokens": 953768175.0, "reward": 0.6941964626312256, "reward_std": 0.1485040932893753, "rewards/simpleverify_reward/mean": 0.6941964030265808, "rewards/simpleverify_reward/std": 0.4610042870044708, "step": 1478 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2853.0, "completions/mean_length": 646.4765625, "completions/mean_terminated_length": 583.7579345703125, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 0.13803965490086276, "grad_norm": 0.12534858286380768, "learning_rate": 2e-07, "loss": 0.0138, "num_tokens": 954437386.0, "reward": 0.5714285969734192, "reward_std": 0.1789655089378357, "rewards/simpleverify_reward/mean": 0.5714285969734192, "rewards/simpleverify_reward/std": 0.49514806270599365, "step": 1479 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2052.0, "completions/mean_length": 633.9933471679688, "completions/mean_terminated_length": 579.0408325195312, "completions/min_length": 112.0, "completions/min_terminated_length": 112.0, "epoch": 0.13813298800086332, "grad_norm": 0.1427561491727829, "learning_rate": 2e-07, "loss": 0.0451, "num_tokens": 955097556.0, "reward": 0.5959821939468384, "reward_std": 0.23355914652347565, "rewards/simpleverify_reward/mean": 0.5959821343421936, "rewards/simpleverify_reward/std": 0.490975022315979, "step": 1480 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2798.0, "completions/mean_length": 651.1975708007812, "completions/mean_terminated_length": 616.24462890625, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 0.1382263211008639, "grad_norm": 0.11805449426174164, "learning_rate": 2e-07, "loss": 0.0144, "num_tokens": 955766845.0, "reward": 0.598214328289032, "reward_std": 0.2039981335401535, "rewards/simpleverify_reward/mean": 0.5982142686843872, "rewards/simpleverify_reward/std": 0.49053287506103516, "step": 1481 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2779.0, "completions/mean_length": 677.6819458007812, "completions/mean_terminated_length": 627.3555908203125, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 0.1383196542008645, "grad_norm": 0.12287572771310806, "learning_rate": 2e-07, "loss": 0.0232, "num_tokens": 956463520.0, "reward": 0.5368303656578064, "reward_std": 0.19106994569301605, "rewards/simpleverify_reward/mean": 0.5368303656578064, "rewards/simpleverify_reward/std": 0.49892017245292664, "step": 1482 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3024.0, "completions/mean_length": 686.8225708007812, "completions/mean_terminated_length": 605.0022583007812, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 0.13841298730086507, "grad_norm": 0.13571400940418243, "learning_rate": 2e-07, "loss": 0.0357, "num_tokens": 957171185.0, "reward": 0.574776828289032, "reward_std": 0.21139927208423615, "rewards/simpleverify_reward/mean": 0.5747767686843872, "rewards/simpleverify_reward/std": 0.49465295672416687, "step": 1483 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3037.0, "completions/mean_length": 624.7221069335938, "completions/mean_terminated_length": 553.5569458007812, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 0.13850632040086566, "grad_norm": 0.12457072734832764, "learning_rate": 2e-07, "loss": 0.0398, "num_tokens": 957809192.0, "reward": 0.645089328289032, "reward_std": 0.20508511364459991, "rewards/simpleverify_reward/mean": 0.6450892686843872, "rewards/simpleverify_reward/std": 0.4787535071372986, "step": 1484 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2616.0, "completions/mean_length": 633.0402221679688, "completions/mean_terminated_length": 601.8423461914062, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 0.13859965350086625, "grad_norm": 0.11917009204626083, "learning_rate": 2e-07, "loss": 0.0283, "num_tokens": 958476196.0, "reward": 0.4933035969734192, "reward_std": 0.1734868884086609, "rewards/simpleverify_reward/mean": 0.4933035671710968, "rewards/simpleverify_reward/std": 0.5002344250679016, "step": 1485 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3658.0, "completions/mean_length": 600.3984375, "completions/mean_terminated_length": 556.9503173828125, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 0.13869298660086682, "grad_norm": 0.1480293571949005, "learning_rate": 2e-07, "loss": 0.0374, "num_tokens": 959097601.0, "reward": 0.6272321939468384, "reward_std": 0.2123001217842102, "rewards/simpleverify_reward/mean": 0.6272321343421936, "rewards/simpleverify_reward/std": 0.4838111698627472, "step": 1486 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3292.0, "completions/mean_length": 623.232177734375, "completions/mean_terminated_length": 564.1044311523438, "completions/min_length": 94.0, "completions/min_terminated_length": 94.0, "epoch": 0.1387863197008674, "grad_norm": 0.1172926276922226, "learning_rate": 2e-07, "loss": 0.0314, "num_tokens": 959737377.0, "reward": 0.625, "reward_std": 0.15526148676872253, "rewards/simpleverify_reward/mean": 0.625, "rewards/simpleverify_reward/std": 0.48439329862594604, "step": 1487 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2269.0, "completions/mean_length": 605.765625, "completions/mean_terminated_length": 570.3517456054688, "completions/min_length": 117.0, "completions/min_terminated_length": 117.0, "epoch": 0.138879652800868, "grad_norm": 0.1259901076555252, "learning_rate": 2e-07, "loss": 0.0157, "num_tokens": 960374751.0, "reward": 0.6361607313156128, "reward_std": 0.17957280576229095, "rewards/simpleverify_reward/mean": 0.6361607313156128, "rewards/simpleverify_reward/std": 0.4813718795776367, "step": 1488 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3609.0, "completions/mean_length": 697.904052734375, "completions/mean_terminated_length": 628.2391967773438, "completions/min_length": 166.0, "completions/min_terminated_length": 166.0, "epoch": 0.13897298590086857, "grad_norm": 0.11874297261238098, "learning_rate": 2e-07, "loss": 0.0318, "num_tokens": 961099369.0, "reward": 0.5446428656578064, "reward_std": 0.19993282854557037, "rewards/simpleverify_reward/mean": 0.5446428656578064, "rewards/simpleverify_reward/std": 0.49828118085861206, "step": 1489 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 4052.0, "completions/mean_length": 603.9107666015625, "completions/mean_terminated_length": 564.4966430664062, "completions/min_length": 137.0, "completions/min_terminated_length": 137.0, "epoch": 0.13906631900086916, "grad_norm": 0.12622807919979095, "learning_rate": 2e-07, "loss": 0.0189, "num_tokens": 961727585.0, "reward": 0.6462053656578064, "reward_std": 0.17017818987369537, "rewards/simpleverify_reward/mean": 0.6462053656578064, "rewards/simpleverify_reward/std": 0.478413462638855, "step": 1490 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3611.0, "completions/mean_length": 584.2567138671875, "completions/mean_terminated_length": 548.6245727539062, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 0.13915965210086975, "grad_norm": 0.13595692813396454, "learning_rate": 2e-07, "loss": 0.021, "num_tokens": 962342295.0, "reward": 0.6116071939468384, "reward_std": 0.16529622673988342, "rewards/simpleverify_reward/mean": 0.6116071343421936, "rewards/simpleverify_reward/std": 0.4876568913459778, "step": 1491 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3979.0, "completions/mean_length": 691.8795166015625, "completions/mean_terminated_length": 637.8458251953125, "completions/min_length": 173.0, "completions/min_terminated_length": 173.0, "epoch": 0.13925298520087034, "grad_norm": 0.10924011468887329, "learning_rate": 2e-07, "loss": 0.0079, "num_tokens": 963056627.0, "reward": 0.5390625, "reward_std": 0.16641205549240112, "rewards/simpleverify_reward/mean": 0.5390625, "rewards/simpleverify_reward/std": 0.4987502098083496, "step": 1492 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3996.0, "completions/mean_length": 619.375, "completions/mean_terminated_length": 556.1636352539062, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 0.1393463183008709, "grad_norm": 0.1138574555516243, "learning_rate": 2e-07, "loss": -0.0033, "num_tokens": 963702635.0, "reward": 0.5725446939468384, "reward_std": 0.1986968219280243, "rewards/simpleverify_reward/mean": 0.5725446343421936, "rewards/simpleverify_reward/std": 0.49498558044433594, "step": 1493 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3761.0, "completions/mean_length": 634.2076416015625, "completions/mean_terminated_length": 579.2584838867188, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 0.1394396514008715, "grad_norm": 0.10239175707101822, "learning_rate": 2e-07, "loss": 0.02, "num_tokens": 964355621.0, "reward": 0.6227678656578064, "reward_std": 0.14736363291740417, "rewards/simpleverify_reward/mean": 0.6227678656578064, "rewards/simpleverify_reward/std": 0.4849644899368286, "step": 1494 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2546.0, "completions/mean_length": 611.6060791015625, "completions/mean_terminated_length": 556.2982177734375, "completions/min_length": 122.0, "completions/min_terminated_length": 122.0, "epoch": 0.1395329845008721, "grad_norm": 0.14016938209533691, "learning_rate": 2e-07, "loss": 0.0101, "num_tokens": 964995036.0, "reward": 0.5892857313156128, "reward_std": 0.19422808289527893, "rewards/simpleverify_reward/mean": 0.5892857313156128, "rewards/simpleverify_reward/std": 0.49223825335502625, "step": 1495 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.006696428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2573.0, "completions/mean_length": 550.0123291015625, "completions/mean_terminated_length": 526.1067504882812, "completions/min_length": 91.0, "completions/min_terminated_length": 91.0, "epoch": 0.13962631760087266, "grad_norm": 0.13994978368282318, "learning_rate": 2e-07, "loss": 0.0246, "num_tokens": 965581399.0, "reward": 0.6127232313156128, "reward_std": 0.16586072742938995, "rewards/simpleverify_reward/mean": 0.6127232313156128, "rewards/simpleverify_reward/std": 0.4873998463153839, "step": 1496 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3949.0, "completions/mean_length": 648.3538208007812, "completions/mean_terminated_length": 585.6693115234375, "completions/min_length": 105.0, "completions/min_terminated_length": 105.0, "epoch": 0.13971965070087325, "grad_norm": 0.12434790283441544, "learning_rate": 2e-07, "loss": 0.0206, "num_tokens": 966252700.0, "reward": 0.5993303656578064, "reward_std": 0.19738228619098663, "rewards/simpleverify_reward/mean": 0.5993303656578064, "rewards/simpleverify_reward/std": 0.49030786752700806, "step": 1497 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3457.0, "completions/mean_length": 620.638427734375, "completions/mean_terminated_length": 573.4615478515625, "completions/min_length": 170.0, "completions/min_terminated_length": 170.0, "epoch": 0.13981298380087384, "grad_norm": 0.11606879532337189, "learning_rate": 2e-07, "loss": 0.0102, "num_tokens": 966903352.0, "reward": 0.6082589626312256, "reward_std": 0.17288914322853088, "rewards/simpleverify_reward/mean": 0.6082589030265808, "rewards/simpleverify_reward/std": 0.48841196298599243, "step": 1498 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2652.0, "completions/mean_length": 634.8605346679688, "completions/mean_terminated_length": 599.7418212890625, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 0.1399063169008744, "grad_norm": 0.13142597675323486, "learning_rate": 2e-07, "loss": 0.0243, "num_tokens": 967561179.0, "reward": 0.6082589626312256, "reward_std": 0.20926101505756378, "rewards/simpleverify_reward/mean": 0.6082589030265808, "rewards/simpleverify_reward/std": 0.48841196298599243, "step": 1499 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 2278.0, "completions/mean_length": 651.2701416015625, "completions/mean_terminated_length": 624.146240234375, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 0.139999650000875, "grad_norm": 0.10551148653030396, "learning_rate": 2e-07, "loss": 0.0249, "num_tokens": 968246109.0, "reward": 0.551339328289032, "reward_std": 0.16776825487613678, "rewards/simpleverify_reward/mean": 0.5513392686843872, "rewards/simpleverify_reward/std": 0.4976350665092468, "step": 1500 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.005580357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 4068.0, "completions/mean_length": 610.3471069335938, "completions/mean_terminated_length": 590.7868041992188, "completions/min_length": 104.0, "completions/min_terminated_length": 104.0, "epoch": 0.1400929831008756, "grad_norm": 0.12844125926494598, "learning_rate": 2e-07, "loss": 0.0319, "num_tokens": 968881276.0, "reward": 0.5770089626312256, "reward_std": 0.20388232171535492, "rewards/simpleverify_reward/mean": 0.5770089030265808, "rewards/simpleverify_reward/std": 0.4943099319934845, "step": 1501 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3986.0, "completions/mean_length": 611.7957763671875, "completions/mean_terminated_length": 572.4706420898438, "completions/min_length": 119.0, "completions/min_terminated_length": 119.0, "epoch": 0.14018631620087615, "grad_norm": 0.12619726359844208, "learning_rate": 2e-07, "loss": 0.0234, "num_tokens": 969513245.0, "reward": 0.6205357313156128, "reward_std": 0.16789092123508453, "rewards/simpleverify_reward/mean": 0.6205357313156128, "rewards/simpleverify_reward/std": 0.4855247139930725, "step": 1502 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3698.0, "completions/mean_length": 655.833740234375, "completions/mean_terminated_length": 601.2279052734375, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 0.14027964930087675, "grad_norm": 0.1074385866522789, "learning_rate": 2e-07, "loss": 0.0134, "num_tokens": 970181296.0, "reward": 0.5959821939468384, "reward_std": 0.15766821801662445, "rewards/simpleverify_reward/mean": 0.5959821343421936, "rewards/simpleverify_reward/std": 0.490975022315979, "step": 1503 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3280.0, "completions/mean_length": 664.265625, "completions/mean_terminated_length": 609.7936401367188, "completions/min_length": 112.0, "completions/min_terminated_length": 112.0, "epoch": 0.14037298240087734, "grad_norm": 0.12055550515651703, "learning_rate": 2e-07, "loss": 0.0204, "num_tokens": 970861294.0, "reward": 0.645089328289032, "reward_std": 0.1734868884086609, "rewards/simpleverify_reward/mean": 0.6450892686843872, "rewards/simpleverify_reward/std": 0.4787535071372986, "step": 1504 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4032.0, "completions/mean_length": 683.4152221679688, "completions/mean_terminated_length": 617.4152221679688, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 0.1404663155008779, "grad_norm": 0.10533326864242554, "learning_rate": 2e-07, "loss": 0.0183, "num_tokens": 971554234.0, "reward": 0.6104910969734192, "reward_std": 0.1496991515159607, "rewards/simpleverify_reward/mean": 0.6104910969734192, "rewards/simpleverify_reward/std": 0.48791128396987915, "step": 1505 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.006696428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 4091.0, "completions/mean_length": 653.1920166015625, "completions/mean_terminated_length": 629.9820556640625, "completions/min_length": 165.0, "completions/min_terminated_length": 165.0, "epoch": 0.1405596486008785, "grad_norm": 0.13841281831264496, "learning_rate": 2e-07, "loss": 0.0344, "num_tokens": 972223670.0, "reward": 0.5703125, "reward_std": 0.22286252677440643, "rewards/simpleverify_reward/mean": 0.5703125, "rewards/simpleverify_reward/std": 0.49530795216560364, "step": 1506 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3852.0, "completions/mean_length": 643.3114013671875, "completions/mean_terminated_length": 592.47900390625, "completions/min_length": 176.0, "completions/min_terminated_length": 176.0, "epoch": 0.1406529817008791, "grad_norm": 0.1317114531993866, "learning_rate": 2e-07, "loss": 0.0588, "num_tokens": 972885949.0, "reward": 0.6082589626312256, "reward_std": 0.21872051060199738, "rewards/simpleverify_reward/mean": 0.6082589030265808, "rewards/simpleverify_reward/std": 0.48841196298599243, "step": 1507 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3807.0, "completions/mean_length": 599.2689819335938, "completions/mean_terminated_length": 555.8067626953125, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 0.14074631480087965, "grad_norm": 0.14942580461502075, "learning_rate": 2e-07, "loss": 0.0389, "num_tokens": 973508870.0, "reward": 0.6261160969734192, "reward_std": 0.17554666101932526, "rewards/simpleverify_reward/mean": 0.6261160969734192, "rewards/simpleverify_reward/std": 0.48410359025001526, "step": 1508 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2048.0, "completions/mean_length": 552.8761596679688, "completions/mean_terminated_length": 516.925537109375, "completions/min_length": 157.0, "completions/min_terminated_length": 157.0, "epoch": 0.14083964790088024, "grad_norm": 0.13932278752326965, "learning_rate": 2e-07, "loss": 0.0281, "num_tokens": 974094943.0, "reward": 0.6328125, "reward_std": 0.195388525724411, "rewards/simpleverify_reward/mean": 0.6328125, "rewards/simpleverify_reward/std": 0.48230743408203125, "step": 1509 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3490.0, "completions/mean_length": 663.8147583007812, "completions/mean_terminated_length": 581.4422607421875, "completions/min_length": 115.0, "completions/min_terminated_length": 115.0, "epoch": 0.14093298100088084, "grad_norm": 0.12546497583389282, "learning_rate": 2e-07, "loss": 0.0568, "num_tokens": 974768513.0, "reward": 0.65625, "reward_std": 0.18096037209033966, "rewards/simpleverify_reward/mean": 0.65625, "rewards/simpleverify_reward/std": 0.4752241373062134, "step": 1510 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 4089.0, "completions/mean_length": 617.7410888671875, "completions/mean_terminated_length": 586.4053955078125, "completions/min_length": 94.0, "completions/min_terminated_length": 94.0, "epoch": 0.14102631410088143, "grad_norm": 0.12443217635154724, "learning_rate": 2e-07, "loss": 0.027, "num_tokens": 975411761.0, "reward": 0.6205357313156128, "reward_std": 0.17543858289718628, "rewards/simpleverify_reward/mean": 0.6205357313156128, "rewards/simpleverify_reward/std": 0.4855247139930725, "step": 1511 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3674.0, "completions/mean_length": 659.8560791015625, "completions/mean_terminated_length": 597.3806762695312, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 0.141119647200882, "grad_norm": 0.12551942467689514, "learning_rate": 2e-07, "loss": 0.0305, "num_tokens": 976094616.0, "reward": 0.6104910969734192, "reward_std": 0.17367054522037506, "rewards/simpleverify_reward/mean": 0.6104910969734192, "rewards/simpleverify_reward/std": 0.48791128396987915, "step": 1512 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2377.0, "completions/mean_length": 588.6473388671875, "completions/mean_terminated_length": 557.049560546875, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 0.14121298030088258, "grad_norm": 0.12920527160167694, "learning_rate": 2e-07, "loss": 0.0356, "num_tokens": 976702620.0, "reward": 0.6339285969734192, "reward_std": 0.17852042615413666, "rewards/simpleverify_reward/mean": 0.6339285969734192, "rewards/simpleverify_reward/std": 0.48199835419654846, "step": 1513 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3407.0, "completions/mean_length": 666.4732666015625, "completions/mean_terminated_length": 592.1732788085938, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 0.14130631340088318, "grad_norm": 0.12319471687078476, "learning_rate": 2e-07, "loss": -0.0014, "num_tokens": 977386444.0, "reward": 0.5613839626312256, "reward_std": 0.18144823610782623, "rewards/simpleverify_reward/mean": 0.5613839030265808, "rewards/simpleverify_reward/std": 0.496494859457016, "step": 1514 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2978.0, "completions/mean_length": 660.5022583007812, "completions/mean_terminated_length": 594.0591430664062, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 0.14139964650088374, "grad_norm": 0.12529587745666504, "learning_rate": 2e-07, "loss": 0.0543, "num_tokens": 978063262.0, "reward": 0.566964328289032, "reward_std": 0.2150036096572876, "rewards/simpleverify_reward/mean": 0.5669642686843872, "rewards/simpleverify_reward/std": 0.49577224254608154, "step": 1515 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3579.0, "completions/mean_length": 623.8850708007812, "completions/mean_terminated_length": 580.7288208007812, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 0.14149297960088433, "grad_norm": 0.12564031779766083, "learning_rate": 2e-07, "loss": 0.0347, "num_tokens": 978710567.0, "reward": 0.5792410969734192, "reward_std": 0.18535634875297546, "rewards/simpleverify_reward/mean": 0.5792410969734192, "rewards/simpleverify_reward/std": 0.49395665526390076, "step": 1516 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3064.0, "completions/mean_length": 644.607177734375, "completions/mean_terminated_length": 573.8496704101562, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 0.14158631270088493, "grad_norm": 0.10279744863510132, "learning_rate": 2e-07, "loss": 0.0374, "num_tokens": 979372919.0, "reward": 0.645089328289032, "reward_std": 0.13542324304580688, "rewards/simpleverify_reward/mean": 0.6450892686843872, "rewards/simpleverify_reward/std": 0.4787535071372986, "step": 1517 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2837.0, "completions/mean_length": 637.0592041015625, "completions/mean_terminated_length": 594.066650390625, "completions/min_length": 157.0, "completions/min_terminated_length": 157.0, "epoch": 0.1416796458008855, "grad_norm": 0.10790126770734787, "learning_rate": 2e-07, "loss": 0.0132, "num_tokens": 980038588.0, "reward": 0.606026828289032, "reward_std": 0.12489927560091019, "rewards/simpleverify_reward/mean": 0.6060267686843872, "rewards/simpleverify_reward/std": 0.48890194296836853, "step": 1518 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3532.0, "completions/mean_length": 664.4085083007812, "completions/mean_terminated_length": 609.9387817382812, "completions/min_length": 119.0, "completions/min_terminated_length": 119.0, "epoch": 0.14177297890088608, "grad_norm": 0.11150240898132324, "learning_rate": 2e-07, "loss": 0.024, "num_tokens": 980714370.0, "reward": 0.6662946939468384, "reward_std": 0.14436137676239014, "rewards/simpleverify_reward/mean": 0.6662946343421936, "rewards/simpleverify_reward/std": 0.47179922461509705, "step": 1519 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3704.0, "completions/mean_length": 618.6261596679688, "completions/mean_terminated_length": 547.3359985351562, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 0.14186631200088667, "grad_norm": 0.13485361635684967, "learning_rate": 2e-07, "loss": 0.0253, "num_tokens": 981347067.0, "reward": 0.6361607313156128, "reward_std": 0.20576800405979156, "rewards/simpleverify_reward/mean": 0.6361607313156128, "rewards/simpleverify_reward/std": 0.4813718795776367, "step": 1520 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3383.0, "completions/mean_length": 666.9308471679688, "completions/mean_terminated_length": 624.3096313476562, "completions/min_length": 133.0, "completions/min_terminated_length": 133.0, "epoch": 0.14195964510088724, "grad_norm": 0.12531337141990662, "learning_rate": 2e-07, "loss": 0.0213, "num_tokens": 982047301.0, "reward": 0.543526828289032, "reward_std": 0.20955385267734528, "rewards/simpleverify_reward/mean": 0.5435267686843872, "rewards/simpleverify_reward/std": 0.49838000535964966, "step": 1521 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 2326.0, "completions/mean_length": 623.40625, "completions/mean_terminated_length": 584.2122192382812, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 0.14205297820088783, "grad_norm": 0.12550699710845947, "learning_rate": 2e-07, "loss": 0.0142, "num_tokens": 982698105.0, "reward": 0.6283482313156128, "reward_std": 0.20068030059337616, "rewards/simpleverify_reward/mean": 0.6283482313156128, "rewards/simpleverify_reward/std": 0.4835159480571747, "step": 1522 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2573.0, "completions/mean_length": 695.0736694335938, "completions/mean_terminated_length": 633.2385864257812, "completions/min_length": 160.0, "completions/min_terminated_length": 160.0, "epoch": 0.14214631130088842, "grad_norm": 0.11454468965530396, "learning_rate": 2e-07, "loss": 0.0245, "num_tokens": 983412131.0, "reward": 0.5870535969734192, "reward_std": 0.19445309042930603, "rewards/simpleverify_reward/mean": 0.5870535969734192, "rewards/simpleverify_reward/std": 0.49263837933540344, "step": 1523 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2009.0, "completions/mean_length": 616.8705444335938, "completions/mean_terminated_length": 549.5836181640625, "completions/min_length": 119.0, "completions/min_terminated_length": 119.0, "epoch": 0.142239644400889, "grad_norm": 0.1339309811592102, "learning_rate": 2e-07, "loss": 0.0478, "num_tokens": 984053463.0, "reward": 0.6361607313156128, "reward_std": 0.19888044893741608, "rewards/simpleverify_reward/mean": 0.6361607313156128, "rewards/simpleverify_reward/std": 0.4813718795776367, "step": 1524 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3803.0, "completions/mean_length": 719.630615234375, "completions/mean_terminated_length": 610.7154541015625, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 0.14233297750088958, "grad_norm": 0.12977775931358337, "learning_rate": 2e-07, "loss": 0.0539, "num_tokens": 984787068.0, "reward": 0.5145089626312256, "reward_std": 0.22548791766166687, "rewards/simpleverify_reward/mean": 0.5145089030265808, "rewards/simpleverify_reward/std": 0.5000685453414917, "step": 1525 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.025669642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2805.0, "completions/mean_length": 700.9486694335938, "completions/mean_terminated_length": 611.5028686523438, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 0.14242631060089017, "grad_norm": 0.11973059922456741, "learning_rate": 2e-07, "loss": 0.0272, "num_tokens": 985511334.0, "reward": 0.5145089626312256, "reward_std": 0.18442019820213318, "rewards/simpleverify_reward/mean": 0.5145089030265808, "rewards/simpleverify_reward/std": 0.5000685453414917, "step": 1526 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.006696428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 1864.0, "completions/mean_length": 599.8527221679688, "completions/mean_terminated_length": 576.2831420898438, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 0.14251964370089074, "grad_norm": 0.12230966985225677, "learning_rate": 2e-07, "loss": 0.0129, "num_tokens": 986136546.0, "reward": 0.6551339626312256, "reward_std": 0.1698751151561737, "rewards/simpleverify_reward/mean": 0.6551339030265808, "rewards/simpleverify_reward/std": 0.4755900502204895, "step": 1527 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3738.0, "completions/mean_length": 615.3973388671875, "completions/mean_terminated_length": 576.1128540039062, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 0.14261297680089133, "grad_norm": 0.12341618537902832, "learning_rate": 2e-07, "loss": 0.0194, "num_tokens": 986786350.0, "reward": 0.606026828289032, "reward_std": 0.1836288422346115, "rewards/simpleverify_reward/mean": 0.6060267686843872, "rewards/simpleverify_reward/std": 0.48890194296836853, "step": 1528 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0033482142857143016, "completions/max_length": 4096.0, "completions/max_terminated_length": 2214.0, "completions/mean_length": 562.3136596679688, "completions/mean_terminated_length": 550.4423217773438, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 0.14270630990089192, "grad_norm": 0.13733181357383728, "learning_rate": 2e-07, "loss": 0.0211, "num_tokens": 987381935.0, "reward": 0.6305803656578064, "reward_std": 0.17337629199028015, "rewards/simpleverify_reward/mean": 0.6305803656578064, "rewards/simpleverify_reward/std": 0.4829172194004059, "step": 1529 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3361.0, "completions/mean_length": 605.4029541015625, "completions/mean_terminated_length": 558.0192260742188, "completions/min_length": 158.0, "completions/min_terminated_length": 158.0, "epoch": 0.14279964300089248, "grad_norm": 0.1359368860721588, "learning_rate": 2e-07, "loss": 0.0004, "num_tokens": 988002416.0, "reward": 0.6551339626312256, "reward_std": 0.19584247469902039, "rewards/simpleverify_reward/mean": 0.6551339030265808, "rewards/simpleverify_reward/std": 0.4755900502204895, "step": 1530 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 4070.0, "completions/mean_length": 622.9933471679688, "completions/mean_terminated_length": 591.7049560546875, "completions/min_length": 118.0, "completions/min_terminated_length": 118.0, "epoch": 0.14289297610089308, "grad_norm": 0.13424073159694672, "learning_rate": 2e-07, "loss": 0.0289, "num_tokens": 988648874.0, "reward": 0.527901828289032, "reward_std": 0.1861465871334076, "rewards/simpleverify_reward/mean": 0.5279017686843872, "rewards/simpleverify_reward/std": 0.49949970841407776, "step": 1531 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2301.0, "completions/mean_length": 592.427490234375, "completions/mean_terminated_length": 540.845947265625, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 0.14298630920089367, "grad_norm": 0.12171164900064468, "learning_rate": 2e-07, "loss": 0.0264, "num_tokens": 989260609.0, "reward": 0.6462053656578064, "reward_std": 0.15217892825603485, "rewards/simpleverify_reward/mean": 0.6462053656578064, "rewards/simpleverify_reward/std": 0.478413462638855, "step": 1532 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3326.0, "completions/mean_length": 587.0022583007812, "completions/mean_terminated_length": 555.3896484375, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 0.14307964230089426, "grad_norm": 0.14071212708950043, "learning_rate": 2e-07, "loss": 0.0185, "num_tokens": 989870611.0, "reward": 0.6227678656578064, "reward_std": 0.2304784059524536, "rewards/simpleverify_reward/mean": 0.6227678656578064, "rewards/simpleverify_reward/std": 0.4849644601345062, "step": 1533 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3020.0, "completions/mean_length": 644.3917846679688, "completions/mean_terminated_length": 617.2137451171875, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 0.14317297540089483, "grad_norm": 0.13911019265651703, "learning_rate": 2e-07, "loss": 0.0206, "num_tokens": 990535186.0, "reward": 0.6037946939468384, "reward_std": 0.2318345606327057, "rewards/simpleverify_reward/mean": 0.6037946343421936, "rewards/simpleverify_reward/std": 0.48938122391700745, "step": 1534 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2725.0, "completions/mean_length": 641.7098388671875, "completions/mean_terminated_length": 606.66064453125, "completions/min_length": 173.0, "completions/min_terminated_length": 173.0, "epoch": 0.14326630850089542, "grad_norm": 0.13812510669231415, "learning_rate": 2e-07, "loss": 0.0235, "num_tokens": 991197382.0, "reward": 0.5770089626312256, "reward_std": 0.2150745540857315, "rewards/simpleverify_reward/mean": 0.5770089030265808, "rewards/simpleverify_reward/std": 0.4943099319934845, "step": 1535 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 2762.0, "completions/mean_length": 578.2835083007812, "completions/mean_terminated_length": 550.5849609375, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 0.143359641600896, "grad_norm": 0.13185852766036987, "learning_rate": 2e-07, "loss": 0.0361, "num_tokens": 991807604.0, "reward": 0.6540178656578064, "reward_std": 0.19039665162563324, "rewards/simpleverify_reward/mean": 0.6540178656578064, "rewards/simpleverify_reward/std": 0.4759531021118164, "step": 1536 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3620.0, "completions/mean_length": 666.4475708007812, "completions/mean_terminated_length": 600.1194458007812, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 0.14345297470089657, "grad_norm": 0.11741551011800766, "learning_rate": 2e-07, "loss": 0.0219, "num_tokens": 992480709.0, "reward": 0.629464328289032, "reward_std": 0.16435371339321136, "rewards/simpleverify_reward/mean": 0.6294642686843872, "rewards/simpleverify_reward/std": 0.4832179844379425, "step": 1537 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2964.0, "completions/mean_length": 661.8392944335938, "completions/mean_terminated_length": 599.3999633789062, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 0.14354630780089717, "grad_norm": 0.13559594750404358, "learning_rate": 2e-07, "loss": 0.051, "num_tokens": 993161917.0, "reward": 0.6116071939468384, "reward_std": 0.21297159790992737, "rewards/simpleverify_reward/mean": 0.6116071343421936, "rewards/simpleverify_reward/std": 0.4876568913459778, "step": 1538 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3405.0, "completions/mean_length": 667.0770263671875, "completions/mean_terminated_length": 592.7901611328125, "completions/min_length": 109.0, "completions/min_terminated_length": 109.0, "epoch": 0.14363964090089776, "grad_norm": 0.12091411650180817, "learning_rate": 2e-07, "loss": 0.0354, "num_tokens": 993851978.0, "reward": 0.6160714626312256, "reward_std": 0.17682726681232452, "rewards/simpleverify_reward/mean": 0.6160714030265808, "rewards/simpleverify_reward/std": 0.486612468957901, "step": 1539 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2575.0, "completions/mean_length": 632.654052734375, "completions/mean_terminated_length": 581.664794921875, "completions/min_length": 166.0, "completions/min_terminated_length": 166.0, "epoch": 0.14373297400089832, "grad_norm": 0.12669385969638824, "learning_rate": 2e-07, "loss": 0.0517, "num_tokens": 994501580.0, "reward": 0.6227678656578064, "reward_std": 0.19986501336097717, "rewards/simpleverify_reward/mean": 0.6227678656578064, "rewards/simpleverify_reward/std": 0.4849644899368286, "step": 1540 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 2876.0, "completions/mean_length": 573.732177734375, "completions/mean_terminated_length": 545.997802734375, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 0.14382630710089891, "grad_norm": 0.12352126091718674, "learning_rate": 2e-07, "loss": 0.0116, "num_tokens": 995104532.0, "reward": 0.6796875596046448, "reward_std": 0.16262802481651306, "rewards/simpleverify_reward/mean": 0.6796875, "rewards/simpleverify_reward/std": 0.4668572247028351, "step": 1541 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 4096.0, "completions/max_terminated_length": 4094.0, "completions/mean_length": 695.8482666015625, "completions/mean_terminated_length": 614.2445678710938, "completions/min_length": 117.0, "completions/min_terminated_length": 117.0, "epoch": 0.1439196402008995, "grad_norm": 0.1303410828113556, "learning_rate": 2e-07, "loss": 0.0384, "num_tokens": 995812828.0, "reward": 0.5837053656578064, "reward_std": 0.22097612917423248, "rewards/simpleverify_reward/mean": 0.5837053656578064, "rewards/simpleverify_reward/std": 0.49321892857551575, "step": 1542 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 2806.0, "completions/mean_length": 636.515625, "completions/mean_terminated_length": 597.4695434570312, "completions/min_length": 90.0, "completions/min_terminated_length": 90.0, "epoch": 0.14401297330090007, "grad_norm": 0.13609854876995087, "learning_rate": 2e-07, "loss": 0.0212, "num_tokens": 996475442.0, "reward": 0.5803571939468384, "reward_std": 0.18919342756271362, "rewards/simpleverify_reward/mean": 0.5803571343421936, "rewards/simpleverify_reward/std": 0.4937761425971985, "step": 1543 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3591.0, "completions/mean_length": 670.0670166015625, "completions/mean_terminated_length": 615.6870727539062, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 0.14410630640090066, "grad_norm": 0.13248211145401, "learning_rate": 2e-07, "loss": 0.0269, "num_tokens": 997165022.0, "reward": 0.5848214626312256, "reward_std": 0.21714681386947632, "rewards/simpleverify_reward/mean": 0.5848214030265808, "rewards/simpleverify_reward/std": 0.49302801489830017, "step": 1544 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3379.0, "completions/mean_length": 683.0435791015625, "completions/mean_terminated_length": 632.796142578125, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 0.14419963950090126, "grad_norm": 0.12538331747055054, "learning_rate": 2e-07, "loss": -0.0004, "num_tokens": 997866813.0, "reward": 0.5323660969734192, "reward_std": 0.22064125537872314, "rewards/simpleverify_reward/mean": 0.5323660969734192, "rewards/simpleverify_reward/std": 0.4992299973964691, "step": 1545 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2562.0, "completions/mean_length": 612.3314819335938, "completions/mean_terminated_length": 569.0316772460938, "completions/min_length": 84.0, "completions/min_terminated_length": 84.0, "epoch": 0.14429297260090182, "grad_norm": 0.12735846638679504, "learning_rate": 2e-07, "loss": 0.0219, "num_tokens": 998501902.0, "reward": 0.6495535969734192, "reward_std": 0.16101224720478058, "rewards/simpleverify_reward/mean": 0.6495535969734192, "rewards/simpleverify_reward/std": 0.477376252412796, "step": 1546 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3186.0, "completions/mean_length": 656.796875, "completions/mean_terminated_length": 617.9796752929688, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 0.1443863057009024, "grad_norm": 0.11964999884366989, "learning_rate": 2e-07, "loss": 0.0276, "num_tokens": 999179736.0, "reward": 0.59375, "reward_std": 0.17228226363658905, "rewards/simpleverify_reward/mean": 0.59375, "rewards/simpleverify_reward/std": 0.4914066195487976, "step": 1547 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2905.0, "completions/mean_length": 674.216552734375, "completions/mean_terminated_length": 608.0386352539062, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 0.144479638800903, "grad_norm": 0.11779432743787766, "learning_rate": 2e-07, "loss": 0.0132, "num_tokens": 999874778.0, "reward": 0.5212053656578064, "reward_std": 0.16766127943992615, "rewards/simpleverify_reward/mean": 0.5212053656578064, "rewards/simpleverify_reward/std": 0.49982914328575134, "step": 1548 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3605.0, "completions/mean_length": 620.8158569335938, "completions/mean_terminated_length": 561.6470336914062, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 0.14457297190090357, "grad_norm": 0.12761478126049042, "learning_rate": 2e-07, "loss": 0.0393, "num_tokens": 1000528349.0, "reward": 0.613839328289032, "reward_std": 0.1885194182395935, "rewards/simpleverify_reward/mean": 0.6138392686843872, "rewards/simpleverify_reward/std": 0.48714008927345276, "step": 1549 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2879.0, "completions/mean_length": 648.6317138671875, "completions/mean_terminated_length": 581.9590454101562, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 0.14466630500090416, "grad_norm": 0.12239287793636322, "learning_rate": 2e-07, "loss": 0.0251, "num_tokens": 1001200515.0, "reward": 0.5892857313156128, "reward_std": 0.1853531152009964, "rewards/simpleverify_reward/mean": 0.5892857313156128, "rewards/simpleverify_reward/std": 0.49223825335502625, "step": 1550 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3544.0, "completions/mean_length": 593.8973388671875, "completions/mean_terminated_length": 562.3468627929688, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 0.14475963810090475, "grad_norm": 0.13429835438728333, "learning_rate": 2e-07, "loss": 0.012, "num_tokens": 1001820839.0, "reward": 0.6037946939468384, "reward_std": 0.18178732693195343, "rewards/simpleverify_reward/mean": 0.6037946343421936, "rewards/simpleverify_reward/std": 0.48938122391700745, "step": 1551 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2650.0, "completions/mean_length": 661.2980346679688, "completions/mean_terminated_length": 614.673095703125, "completions/min_length": 160.0, "completions/min_terminated_length": 160.0, "epoch": 0.14485297120090532, "grad_norm": 0.11266981065273285, "learning_rate": 2e-07, "loss": 0.0306, "num_tokens": 1002507826.0, "reward": 0.5602678656578064, "reward_std": 0.1393709033727646, "rewards/simpleverify_reward/mean": 0.5602678656578064, "rewards/simpleverify_reward/std": 0.4966317117214203, "step": 1552 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2307.0, "completions/mean_length": 605.8951416015625, "completions/mean_terminated_length": 558.5181274414062, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 0.1449463043009059, "grad_norm": 0.13266074657440186, "learning_rate": 2e-07, "loss": 0.0261, "num_tokens": 1003130852.0, "reward": 0.6573660969734192, "reward_std": 0.19148436188697815, "rewards/simpleverify_reward/mean": 0.6573660969734192, "rewards/simpleverify_reward/std": 0.47485536336898804, "step": 1553 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 4088.0, "completions/mean_length": 593.2388916015625, "completions/mean_terminated_length": 557.6978149414062, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 0.1450396374009065, "grad_norm": 0.12145979702472687, "learning_rate": 2e-07, "loss": 0.029, "num_tokens": 1003753994.0, "reward": 0.5792410969734192, "reward_std": 0.1730746179819107, "rewards/simpleverify_reward/mean": 0.5792410969734192, "rewards/simpleverify_reward/std": 0.49395665526390076, "step": 1554 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.006696428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3000.0, "completions/mean_length": 590.2723388671875, "completions/mean_terminated_length": 566.63818359375, "completions/min_length": 159.0, "completions/min_terminated_length": 159.0, "epoch": 0.1451329705009071, "grad_norm": 0.1419229805469513, "learning_rate": 2e-07, "loss": 0.0219, "num_tokens": 1004369694.0, "reward": 0.6272321939468384, "reward_std": 0.19933803379535675, "rewards/simpleverify_reward/mean": 0.6272321343421936, "rewards/simpleverify_reward/std": 0.4838111698627472, "step": 1555 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3696.0, "completions/mean_length": 696.544677734375, "completions/mean_terminated_length": 618.9314575195312, "completions/min_length": 91.0, "completions/min_terminated_length": 91.0, "epoch": 0.14522630360090766, "grad_norm": 0.12798136472702026, "learning_rate": 2e-07, "loss": 0.0434, "num_tokens": 1005085150.0, "reward": 0.5993303656578064, "reward_std": 0.18303261697292328, "rewards/simpleverify_reward/mean": 0.5993303656578064, "rewards/simpleverify_reward/std": 0.49030786752700806, "step": 1556 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2559.0, "completions/mean_length": 665.896240234375, "completions/mean_terminated_length": 623.2621459960938, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 0.14531963670090825, "grad_norm": 0.12191472947597504, "learning_rate": 2e-07, "loss": 0.013, "num_tokens": 1005765865.0, "reward": 0.613839328289032, "reward_std": 0.17675308883190155, "rewards/simpleverify_reward/mean": 0.6138392686843872, "rewards/simpleverify_reward/std": 0.48714008927345276, "step": 1557 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3892.0, "completions/mean_length": 699.5692138671875, "completions/mean_terminated_length": 625.986328125, "completions/min_length": 119.0, "completions/min_terminated_length": 119.0, "epoch": 0.14541296980090884, "grad_norm": 0.12252545356750488, "learning_rate": 2e-07, "loss": 0.029, "num_tokens": 1006488087.0, "reward": 0.5725446939468384, "reward_std": 0.19756774604320526, "rewards/simpleverify_reward/mean": 0.5725446343421936, "rewards/simpleverify_reward/std": 0.49498558044433594, "step": 1558 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3828.0, "completions/mean_length": 628.0111694335938, "completions/mean_terminated_length": 584.90625, "completions/min_length": 123.0, "completions/min_terminated_length": 123.0, "epoch": 0.1455063029009094, "grad_norm": 0.13765843212604523, "learning_rate": 2e-07, "loss": 0.0403, "num_tokens": 1007142481.0, "reward": 0.5703125, "reward_std": 0.19106994569301605, "rewards/simpleverify_reward/mean": 0.5703125, "rewards/simpleverify_reward/std": 0.49530795216560364, "step": 1559 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3387.0, "completions/mean_length": 628.4006958007812, "completions/mean_terminated_length": 561.3367309570312, "completions/min_length": 122.0, "completions/min_terminated_length": 122.0, "epoch": 0.14559963600091, "grad_norm": 0.13918465375900269, "learning_rate": 2e-07, "loss": 0.0176, "num_tokens": 1007802336.0, "reward": 0.582589328289032, "reward_std": 0.19787265360355377, "rewards/simpleverify_reward/mean": 0.5825892686843872, "rewards/simpleverify_reward/std": 0.4934072494506836, "step": 1560 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3843.0, "completions/mean_length": 644.9107666015625, "completions/mean_terminated_length": 582.1636352539062, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 0.1456929691009106, "grad_norm": 0.12152666598558426, "learning_rate": 2e-07, "loss": 0.0362, "num_tokens": 1008466656.0, "reward": 0.5881696939468384, "reward_std": 0.18073216080665588, "rewards/simpleverify_reward/mean": 0.5881696343421936, "rewards/simpleverify_reward/std": 0.4924395978450775, "step": 1561 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3562.0, "completions/mean_length": 594.4140625, "completions/mean_terminated_length": 550.8915405273438, "completions/min_length": 161.0, "completions/min_terminated_length": 161.0, "epoch": 0.14578630220091116, "grad_norm": 0.13233880698680878, "learning_rate": 2e-07, "loss": 0.0309, "num_tokens": 1009087603.0, "reward": 0.640625, "reward_std": 0.19268713891506195, "rewards/simpleverify_reward/mean": 0.640625, "rewards/simpleverify_reward/std": 0.48008525371551514, "step": 1562 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 2270.0, "completions/mean_length": 632.1127319335938, "completions/mean_terminated_length": 604.8380737304688, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 0.14587963530091175, "grad_norm": 0.11546596884727478, "learning_rate": 2e-07, "loss": 0.0147, "num_tokens": 1009750728.0, "reward": 0.5837053656578064, "reward_std": 0.16792230308055878, "rewards/simpleverify_reward/mean": 0.5837053656578064, "rewards/simpleverify_reward/std": 0.49321892857551575, "step": 1563 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3846.0, "completions/mean_length": 652.1741333007812, "completions/mean_terminated_length": 593.5391845703125, "completions/min_length": 133.0, "completions/min_terminated_length": 133.0, "epoch": 0.14597296840091234, "grad_norm": 0.12110535800457001, "learning_rate": 2e-07, "loss": 0.0231, "num_tokens": 1010425892.0, "reward": 0.6238839626312256, "reward_std": 0.18948553502559662, "rewards/simpleverify_reward/mean": 0.6238839030265808, "rewards/simpleverify_reward/std": 0.48468026518821716, "step": 1564 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 2660.0, "completions/mean_length": 617.9263916015625, "completions/mean_terminated_length": 578.6704711914062, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 0.1460663015009129, "grad_norm": 0.13501721620559692, "learning_rate": 2e-07, "loss": 0.0407, "num_tokens": 1011063482.0, "reward": 0.6629464626312256, "reward_std": 0.1932830661535263, "rewards/simpleverify_reward/mean": 0.6629464030265808, "rewards/simpleverify_reward/std": 0.47296738624572754, "step": 1565 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 2867.0, "completions/mean_length": 649.810302734375, "completions/mean_terminated_length": 610.9142456054688, "completions/min_length": 114.0, "completions/min_terminated_length": 114.0, "epoch": 0.1461596346009135, "grad_norm": 0.12987296283245087, "learning_rate": 2e-07, "loss": 0.0313, "num_tokens": 1011743896.0, "reward": 0.6595982313156128, "reward_std": 0.1965920329093933, "rewards/simpleverify_reward/mean": 0.6595982313156128, "rewards/simpleverify_reward/std": 0.4741089344024658, "step": 1566 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3041.0, "completions/mean_length": 595.6205444335938, "completions/mean_terminated_length": 556.1128540039062, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 0.1462529677009141, "grad_norm": 0.1405174434185028, "learning_rate": 2e-07, "loss": 0.0246, "num_tokens": 1012359108.0, "reward": 0.668526828289032, "reward_std": 0.1918615996837616, "rewards/simpleverify_reward/mean": 0.6685267686843872, "rewards/simpleverify_reward/std": 0.4710056483745575, "step": 1567 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3255.0, "completions/mean_length": 746.044677734375, "completions/mean_terminated_length": 669.5616455078125, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 0.14634630080091465, "grad_norm": 0.11021227389574051, "learning_rate": 2e-07, "loss": 0.0388, "num_tokens": 1013110604.0, "reward": 0.5613839626312256, "reward_std": 0.1926429718732834, "rewards/simpleverify_reward/mean": 0.5613839030265808, "rewards/simpleverify_reward/std": 0.496494859457016, "step": 1568 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.006696428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2464.0, "completions/mean_length": 589.8136596679688, "completions/mean_terminated_length": 566.1763916015625, "completions/min_length": 83.0, "completions/min_terminated_length": 83.0, "epoch": 0.14643963390091524, "grad_norm": 0.13414941728115082, "learning_rate": 2e-07, "loss": 0.0047, "num_tokens": 1013717317.0, "reward": 0.609375, "reward_std": 0.1924593597650528, "rewards/simpleverify_reward/mean": 0.609375, "rewards/simpleverify_reward/std": 0.48816296458244324, "step": 1569 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 2648.0, "completions/mean_length": 630.4710083007812, "completions/mean_terminated_length": 603.183349609375, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 0.14653296700091584, "grad_norm": 0.12901617586612701, "learning_rate": 2e-07, "loss": 0.0011, "num_tokens": 1014367907.0, "reward": 0.5848214626312256, "reward_std": 0.16259412467479706, "rewards/simpleverify_reward/mean": 0.5848214030265808, "rewards/simpleverify_reward/std": 0.49302801489830017, "step": 1570 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 4037.0, "completions/mean_length": 591.380615234375, "completions/mean_terminated_length": 551.8250732421875, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 0.1466263001009164, "grad_norm": 0.12841719388961792, "learning_rate": 2e-07, "loss": 0.0281, "num_tokens": 1014980864.0, "reward": 0.6741071939468384, "reward_std": 0.18607808649539948, "rewards/simpleverify_reward/mean": 0.6741071343421936, "rewards/simpleverify_reward/std": 0.4689692556858063, "step": 1571 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.005580357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 1840.0, "completions/mean_length": 527.2980346679688, "completions/mean_terminated_length": 507.2716369628906, "completions/min_length": 106.0, "completions/min_terminated_length": 106.0, "epoch": 0.146719633200917, "grad_norm": 0.13655030727386475, "learning_rate": 2e-07, "loss": 0.0142, "num_tokens": 1015547875.0, "reward": 0.6506696939468384, "reward_std": 0.16781355440616608, "rewards/simpleverify_reward/mean": 0.6506696343421936, "rewards/simpleverify_reward/std": 0.47702476382255554, "step": 1572 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3934.0, "completions/mean_length": 658.3449096679688, "completions/mean_terminated_length": 611.6798706054688, "completions/min_length": 120.0, "completions/min_terminated_length": 120.0, "epoch": 0.14681296630091759, "grad_norm": 0.12039344757795334, "learning_rate": 2e-07, "loss": 0.0053, "num_tokens": 1016225912.0, "reward": 0.5770089626312256, "reward_std": 0.16720622777938843, "rewards/simpleverify_reward/mean": 0.5770089030265808, "rewards/simpleverify_reward/std": 0.4943099617958069, "step": 1573 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.005580357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2566.0, "completions/mean_length": 558.3203125, "completions/mean_terminated_length": 538.468017578125, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 0.14690629940091818, "grad_norm": 0.13917063176631927, "learning_rate": 2e-07, "loss": 0.007, "num_tokens": 1016813439.0, "reward": 0.6707589626312256, "reward_std": 0.19441284239292145, "rewards/simpleverify_reward/mean": 0.6707589030265808, "rewards/simpleverify_reward/std": 0.4702001214027405, "step": 1574 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 2450.0, "completions/mean_length": 610.310302734375, "completions/mean_terminated_length": 570.9683837890625, "completions/min_length": 119.0, "completions/min_terminated_length": 119.0, "epoch": 0.14699963250091874, "grad_norm": 0.1279652714729309, "learning_rate": 2e-07, "loss": 0.0192, "num_tokens": 1017439733.0, "reward": 0.645089328289032, "reward_std": 0.16747651994228363, "rewards/simpleverify_reward/mean": 0.6450892686843872, "rewards/simpleverify_reward/std": 0.4787535071372986, "step": 1575 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3930.0, "completions/mean_length": 629.5457763671875, "completions/mean_terminated_length": 574.522705078125, "completions/min_length": 116.0, "completions/min_terminated_length": 116.0, "epoch": 0.14709296560091933, "grad_norm": 0.15224617719650269, "learning_rate": 2e-07, "loss": 0.0174, "num_tokens": 1018099942.0, "reward": 0.5803571939468384, "reward_std": 0.2216162234544754, "rewards/simpleverify_reward/mean": 0.5803571343421936, "rewards/simpleverify_reward/std": 0.4937761425971985, "step": 1576 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.004464285714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2964.0, "completions/mean_length": 578.1171875, "completions/mean_terminated_length": 562.3419799804688, "completions/min_length": 160.0, "completions/min_terminated_length": 160.0, "epoch": 0.14718629870091993, "grad_norm": 0.13960593938827515, "learning_rate": 2e-07, "loss": 0.0091, "num_tokens": 1018702671.0, "reward": 0.6417410969734192, "reward_std": 0.21180973947048187, "rewards/simpleverify_reward/mean": 0.6417410969734192, "rewards/simpleverify_reward/std": 0.47975656390190125, "step": 1577 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3407.0, "completions/mean_length": 656.671875, "completions/mean_terminated_length": 606.0362548828125, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 0.1472796318009205, "grad_norm": 0.12254558503627777, "learning_rate": 2e-07, "loss": 0.0644, "num_tokens": 1019378673.0, "reward": 0.613839328289032, "reward_std": 0.1685328334569931, "rewards/simpleverify_reward/mean": 0.6138392686843872, "rewards/simpleverify_reward/std": 0.48714008927345276, "step": 1578 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2808.0, "completions/mean_length": 656.1785888671875, "completions/mean_terminated_length": 585.6583251953125, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 0.14737296490092108, "grad_norm": 0.13007818162441254, "learning_rate": 2e-07, "loss": 0.0612, "num_tokens": 1020059161.0, "reward": 0.6227678656578064, "reward_std": 0.1917407363653183, "rewards/simpleverify_reward/mean": 0.6227678656578064, "rewards/simpleverify_reward/std": 0.4849644601345062, "step": 1579 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.006696428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2112.0, "completions/mean_length": 587.6205444335938, "completions/mean_terminated_length": 563.9685668945312, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 0.14746629800092168, "grad_norm": 0.13092996180057526, "learning_rate": 2e-07, "loss": 0.0259, "num_tokens": 1020673325.0, "reward": 0.637276828289032, "reward_std": 0.21135328710079193, "rewards/simpleverify_reward/mean": 0.6372767686843872, "rewards/simpleverify_reward/std": 0.481054425239563, "step": 1580 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.025669642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3729.0, "completions/mean_length": 669.7410888671875, "completions/mean_terminated_length": 579.4730834960938, "completions/min_length": 89.0, "completions/min_terminated_length": 89.0, "epoch": 0.14755963110092224, "grad_norm": 0.11566300690174103, "learning_rate": 2e-07, "loss": 0.0467, "num_tokens": 1021368053.0, "reward": 0.6049107313156128, "reward_std": 0.16427703201770782, "rewards/simpleverify_reward/mean": 0.6049107313156128, "rewards/simpleverify_reward/std": 0.48914292454719543, "step": 1581 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.025669642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3963.0, "completions/mean_length": 730.9631958007812, "completions/mean_terminated_length": 642.3081665039062, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 0.14765296420092283, "grad_norm": 0.11575949192047119, "learning_rate": 2e-07, "loss": 0.0265, "num_tokens": 1022102596.0, "reward": 0.6004464626312256, "reward_std": 0.1813708394765854, "rewards/simpleverify_reward/mean": 0.6004464030265808, "rewards/simpleverify_reward/std": 0.49008017778396606, "step": 1582 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2408.0, "completions/mean_length": 597.3839721679688, "completions/mean_terminated_length": 549.8914184570312, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 0.14774629730092342, "grad_norm": 0.1206369698047638, "learning_rate": 2e-07, "loss": 0.0108, "num_tokens": 1022722732.0, "reward": 0.668526828289032, "reward_std": 0.14887316524982452, "rewards/simpleverify_reward/mean": 0.6685267686843872, "rewards/simpleverify_reward/std": 0.4710056483745575, "step": 1583 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3939.0, "completions/mean_length": 702.6785888671875, "completions/mean_terminated_length": 629.1630249023438, "completions/min_length": 164.0, "completions/min_terminated_length": 164.0, "epoch": 0.147839630400924, "grad_norm": 0.13021576404571533, "learning_rate": 2e-07, "loss": 0.0321, "num_tokens": 1023441020.0, "reward": 0.5457589626312256, "reward_std": 0.21327398717403412, "rewards/simpleverify_reward/mean": 0.5457589030265808, "rewards/simpleverify_reward/std": 0.4981797933578491, "step": 1584 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3230.0, "completions/mean_length": 614.7444458007812, "completions/mean_terminated_length": 575.45263671875, "completions/min_length": 156.0, "completions/min_terminated_length": 156.0, "epoch": 0.14793296350092458, "grad_norm": 0.1408184915781021, "learning_rate": 2e-07, "loss": 0.0183, "num_tokens": 1024094703.0, "reward": 0.621651828289032, "reward_std": 0.1847144216299057, "rewards/simpleverify_reward/mean": 0.6216517686843872, "rewards/simpleverify_reward/std": 0.485245943069458, "step": 1585 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3705.0, "completions/mean_length": 642.3928833007812, "completions/mean_terminated_length": 587.57373046875, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 0.14802629660092517, "grad_norm": 0.12319440394639969, "learning_rate": 2e-07, "loss": 0.0219, "num_tokens": 1024760831.0, "reward": 0.621651828289032, "reward_std": 0.1626255065202713, "rewards/simpleverify_reward/mean": 0.6216517686843872, "rewards/simpleverify_reward/std": 0.4852459728717804, "step": 1586 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2145.0, "completions/mean_length": 586.6819458007812, "completions/mean_terminated_length": 543.0632934570312, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 0.14811962970092574, "grad_norm": 0.13031105697155, "learning_rate": 2e-07, "loss": 0.0366, "num_tokens": 1025370274.0, "reward": 0.6049107313156128, "reward_std": 0.18652454018592834, "rewards/simpleverify_reward/mean": 0.6049107313156128, "rewards/simpleverify_reward/std": 0.48914292454719543, "step": 1587 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3847.0, "completions/mean_length": 628.8471069335938, "completions/mean_terminated_length": 585.7525634765625, "completions/min_length": 153.0, "completions/min_terminated_length": 153.0, "epoch": 0.14821296280092633, "grad_norm": 0.14472901821136475, "learning_rate": 2e-07, "loss": 0.0487, "num_tokens": 1026022217.0, "reward": 0.6026785969734192, "reward_std": 0.23097974061965942, "rewards/simpleverify_reward/mean": 0.6026785969734192, "rewards/simpleverify_reward/std": 0.48961687088012695, "step": 1588 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3724.0, "completions/mean_length": 634.5089721679688, "completions/mean_terminated_length": 599.3866577148438, "completions/min_length": 110.0, "completions/min_terminated_length": 110.0, "epoch": 0.14830629590092692, "grad_norm": 0.12318733334541321, "learning_rate": 2e-07, "loss": 0.0208, "num_tokens": 1026668801.0, "reward": 0.6004464626312256, "reward_std": 0.2051183432340622, "rewards/simpleverify_reward/mean": 0.6004464030265808, "rewards/simpleverify_reward/std": 0.49008017778396606, "step": 1589 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3898.0, "completions/mean_length": 601.7277221679688, "completions/mean_terminated_length": 554.2941284179688, "completions/min_length": 115.0, "completions/min_terminated_length": 115.0, "epoch": 0.14839962900092749, "grad_norm": 0.12236359715461731, "learning_rate": 2e-07, "loss": 0.0561, "num_tokens": 1027285085.0, "reward": 0.645089328289032, "reward_std": 0.18656803667545319, "rewards/simpleverify_reward/mean": 0.6450892686843872, "rewards/simpleverify_reward/std": 0.4787535071372986, "step": 1590 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2610.0, "completions/mean_length": 582.513427734375, "completions/mean_terminated_length": 534.8190307617188, "completions/min_length": 122.0, "completions/min_terminated_length": 122.0, "epoch": 0.14849296210092808, "grad_norm": 0.13722892105579376, "learning_rate": 2e-07, "loss": 0.0355, "num_tokens": 1027904209.0, "reward": 0.6116071939468384, "reward_std": 0.19136375188827515, "rewards/simpleverify_reward/mean": 0.6116071343421936, "rewards/simpleverify_reward/std": 0.48765692114830017, "step": 1591 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 4059.0, "completions/mean_length": 632.4442138671875, "completions/mean_terminated_length": 593.3521728515625, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 0.14858629520092867, "grad_norm": 0.1349661499261856, "learning_rate": 2e-07, "loss": 0.0363, "num_tokens": 1028555575.0, "reward": 0.6261160969734192, "reward_std": 0.18028958141803741, "rewards/simpleverify_reward/mean": 0.6261160969734192, "rewards/simpleverify_reward/std": 0.48410359025001526, "step": 1592 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2694.0, "completions/mean_length": 598.732177734375, "completions/mean_terminated_length": 547.2434692382812, "completions/min_length": 157.0, "completions/min_terminated_length": 157.0, "epoch": 0.14867962830092923, "grad_norm": 0.12440957874059677, "learning_rate": 2e-07, "loss": 0.0298, "num_tokens": 1029186823.0, "reward": 0.6104910969734192, "reward_std": 0.1510535031557083, "rewards/simpleverify_reward/mean": 0.6104910969734192, "rewards/simpleverify_reward/std": 0.48791128396987915, "step": 1593 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 4026.0, "completions/mean_length": 676.5256958007812, "completions/mean_terminated_length": 630.1074829101562, "completions/min_length": 102.0, "completions/min_terminated_length": 102.0, "epoch": 0.14877296140092983, "grad_norm": 0.12450714409351349, "learning_rate": 2e-07, "loss": 0.0465, "num_tokens": 1029877526.0, "reward": 0.6183035969734192, "reward_std": 0.20388048887252808, "rewards/simpleverify_reward/mean": 0.6183035969734192, "rewards/simpleverify_reward/std": 0.4860740303993225, "step": 1594 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3881.0, "completions/mean_length": 648.396240234375, "completions/mean_terminated_length": 597.6387329101562, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 0.14886629450093042, "grad_norm": 0.12197700887918472, "learning_rate": 2e-07, "loss": 0.0177, "num_tokens": 1030547305.0, "reward": 0.5558035969734192, "reward_std": 0.18708978593349457, "rewards/simpleverify_reward/mean": 0.5558035969734192, "rewards/simpleverify_reward/std": 0.49715372920036316, "step": 1595 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3815.0, "completions/mean_length": 605.5, "completions/mean_terminated_length": 546.0703735351562, "completions/min_length": 115.0, "completions/min_terminated_length": 115.0, "epoch": 0.148959627600931, "grad_norm": 0.12710323929786682, "learning_rate": 2e-07, "loss": 0.0364, "num_tokens": 1031173729.0, "reward": 0.6651785969734192, "reward_std": 0.17930814623832703, "rewards/simpleverify_reward/mean": 0.6651785969734192, "rewards/simpleverify_reward/std": 0.47219160199165344, "step": 1596 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3659.0, "completions/mean_length": 639.2332763671875, "completions/mean_terminated_length": 600.2178344726562, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 0.14905296070093157, "grad_norm": 0.13930092751979828, "learning_rate": 2e-07, "loss": 0.0378, "num_tokens": 1031836410.0, "reward": 0.6037946939468384, "reward_std": 0.18344563245773315, "rewards/simpleverify_reward/mean": 0.6037946343421936, "rewards/simpleverify_reward/std": 0.48938122391700745, "step": 1597 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3207.0, "completions/mean_length": 602.5904541015625, "completions/mean_terminated_length": 563.1614379882812, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 0.14914629380093217, "grad_norm": 0.12097050249576569, "learning_rate": 2e-07, "loss": 0.0388, "num_tokens": 1032461595.0, "reward": 0.6339285969734192, "reward_std": 0.15244035422801971, "rewards/simpleverify_reward/mean": 0.6339285969734192, "rewards/simpleverify_reward/std": 0.48199835419654846, "step": 1598 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2267.0, "completions/mean_length": 603.435302734375, "completions/mean_terminated_length": 560.0248413085938, "completions/min_length": 165.0, "completions/min_terminated_length": 165.0, "epoch": 0.14923962690093276, "grad_norm": 0.12528812885284424, "learning_rate": 2e-07, "loss": 0.0265, "num_tokens": 1033082609.0, "reward": 0.6339285969734192, "reward_std": 0.15849418938159943, "rewards/simpleverify_reward/mean": 0.6339285969734192, "rewards/simpleverify_reward/std": 0.48199835419654846, "step": 1599 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3409.0, "completions/mean_length": 602.8092041015625, "completions/mean_terminated_length": 571.3389892578125, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 0.14933296000093332, "grad_norm": 0.11689667403697968, "learning_rate": 2e-07, "loss": 0.0307, "num_tokens": 1033719638.0, "reward": 0.6417410969734192, "reward_std": 0.1521058827638626, "rewards/simpleverify_reward/mean": 0.6417410969734192, "rewards/simpleverify_reward/std": 0.47975656390190125, "step": 1600 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2653.0, "completions/mean_length": 653.8092041015625, "completions/mean_terminated_length": 607.0825805664062, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 0.14942629310093392, "frac_reward_zero_std": 0.6071428656578064, "grad_norm": 0.12178564816713333, "learning_rate": 2e-07, "loss": 0.0209, "num_tokens": 1034400635.0, "reward": 0.5680803656578064, "reward_std": 0.16954627633094788, "rewards/simpleverify_reward/mean": 0.5680803656578064, "rewards/simpleverify_reward/std": 0.4956200420856476, "step": 1601 }, { "epoch": 0.14942629310093392, "step": 1601, "total_flos": 0.0, "train_loss": 1.3027437901511778e-05, "train_runtime": 176.4542, "train_samples_per_second": 8124.487, "train_steps_per_second": 9.068 } ], "logging_steps": 1, "max_steps": 1600, "num_input_tokens_seen": 1034400635, "num_train_epochs": 1, "save_steps": 160, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }