{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 14.951603498542275, "eval_steps": 500, "global_step": 1600, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2283.0, "completions/mean_length": 607.34375, "completions/mean_terminated_length": 535.8223266601562, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "epoch": 0.009329446064139942, "grad_norm": 0.18171170353889465, "learning_rate": 1e-06, "loss": -0.0102, "num_tokens": 556956.0, "reward": 0.5368303656578064, "reward_std": 0.27554163336753845, "rewards/verify_math_reward/mean": 0.5368303656578064, "rewards/verify_math_reward/std": 0.49892017245292664, "step": 1 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3530.0, "completions/mean_length": 591.0435791015625, "completions/mean_terminated_length": 539.441650390625, "completions/min_length": 35.0, "completions/min_terminated_length": 35.0, "epoch": 0.018658892128279883, "grad_norm": 0.14002105593681335, "learning_rate": 1e-06, "loss": 0.0057, "num_tokens": 1120539.0, "reward": 0.4587053656578064, "reward_std": 0.23826707899570465, "rewards/verify_math_reward/mean": 0.4587053656578064, "rewards/verify_math_reward/std": 0.49857014417648315, "step": 2 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 2046.0, "completions/mean_length": 570.7991333007812, "completions/mean_terminated_length": 531.0112915039062, "completions/min_length": 67.0, "completions/min_terminated_length": 67.0, "epoch": 0.027988338192419825, "grad_norm": 0.14252738654613495, "learning_rate": 1e-06, "loss": 0.004, "num_tokens": 1689559.0, "reward": 0.504464328289032, "reward_std": 0.23642486333847046, "rewards/verify_math_reward/mean": 0.5044642686843872, "rewards/verify_math_reward/std": 0.5002593398094177, "step": 3 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 1987.0, "completions/mean_length": 552.765625, "completions/mean_terminated_length": 520.8446044921875, "completions/min_length": 14.0, "completions/min_terminated_length": 14.0, "epoch": 0.037317784256559766, "grad_norm": 0.15027225017547607, "learning_rate": 1e-06, "loss": 0.0021, "num_tokens": 2231501.0, "reward": 0.5457589626312256, "reward_std": 0.2344599962234497, "rewards/verify_math_reward/mean": 0.5457589030265808, "rewards/verify_math_reward/std": 0.4981797933578491, "step": 4 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3696.0, "completions/mean_length": 605.4364013671875, "completions/mean_terminated_length": 550.0306396484375, "completions/min_length": 19.0, "completions/min_terminated_length": 19.0, "epoch": 0.04664723032069971, "grad_norm": 0.16336442530155182, "learning_rate": 1e-06, "loss": 0.0162, "num_tokens": 2800196.0, "reward": 0.4977678656578064, "reward_std": 0.25547540187835693, "rewards/verify_math_reward/mean": 0.4977678656578064, "rewards/verify_math_reward/std": 0.5002743005752563, "step": 5 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3311.0, "completions/mean_length": 582.796875, "completions/mean_terminated_length": 551.1464233398438, "completions/min_length": 105.0, "completions/min_terminated_length": 105.0, "epoch": 0.05597667638483965, "grad_norm": 0.14060573279857635, "learning_rate": 1e-06, "loss": 0.0086, "num_tokens": 3380038.0, "reward": 0.5145089626312256, "reward_std": 0.245405375957489, "rewards/verify_math_reward/mean": 0.5145089030265808, "rewards/verify_math_reward/std": 0.5000685453414917, "step": 6 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3680.0, "completions/mean_length": 562.1138916015625, "completions/mean_terminated_length": 526.2570190429688, "completions/min_length": 6.0, "completions/min_terminated_length": 6.0, "epoch": 0.0653061224489796, "grad_norm": 0.15395274758338928, "learning_rate": 1e-06, "loss": 0.0007, "num_tokens": 3945284.0, "reward": 0.535714328289032, "reward_std": 0.23634566366672516, "rewards/verify_math_reward/mean": 0.5357142686843872, "rewards/verify_math_reward/std": 0.4990014135837555, "step": 7 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3486.0, "completions/mean_length": 633.5502319335938, "completions/mean_terminated_length": 578.5906982421875, "completions/min_length": 73.0, "completions/min_terminated_length": 73.0, "epoch": 0.07463556851311953, "grad_norm": 0.12867768108844757, "learning_rate": 1e-06, "loss": 0.0068, "num_tokens": 4529993.0, "reward": 0.5089285969734192, "reward_std": 0.23856060206890106, "rewards/verify_math_reward/mean": 0.5089285969734192, "rewards/verify_math_reward/std": 0.5001994967460632, "step": 8 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3523.0, "completions/mean_length": 651.9174194335938, "completions/mean_terminated_length": 573.2853393554688, "completions/min_length": 121.0, "completions/min_terminated_length": 121.0, "epoch": 0.08396501457725948, "grad_norm": 0.12435610592365265, "learning_rate": 1e-06, "loss": 0.0116, "num_tokens": 5121167.0, "reward": 0.5290178656578064, "reward_std": 0.2126762568950653, "rewards/verify_math_reward/mean": 0.5290178656578064, "rewards/verify_math_reward/std": 0.49943605065345764, "step": 9 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 2807.0, "completions/mean_length": 589.6484375, "completions/mean_terminated_length": 550.0733642578125, "completions/min_length": 93.0, "completions/min_terminated_length": 93.0, "epoch": 0.09329446064139942, "grad_norm": 0.13147082924842834, "learning_rate": 1e-06, "loss": -0.0089, "num_tokens": 5703772.0, "reward": 0.5189732313156128, "reward_std": 0.21905823051929474, "rewards/verify_math_reward/mean": 0.5189732313156128, "rewards/verify_math_reward/std": 0.49991893768310547, "step": 10 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 2577.0, "completions/mean_length": 601.9185791015625, "completions/mean_terminated_length": 574.4061279296875, "completions/min_length": 3.0, "completions/min_terminated_length": 3.0, "epoch": 0.10262390670553936, "grad_norm": 0.11295190453529358, "learning_rate": 1e-06, "loss": 0.0141, "num_tokens": 6298763.0, "reward": 0.4899553656578064, "reward_std": 0.1918206363916397, "rewards/verify_math_reward/mean": 0.4899553656578064, "rewards/verify_math_reward/std": 0.5001782774925232, "step": 11 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3135.0, "completions/mean_length": 600.5826416015625, "completions/mean_terminated_length": 528.9225463867188, "completions/min_length": 50.0, "completions/min_terminated_length": 50.0, "epoch": 0.1119533527696793, "grad_norm": 0.12936817109584808, "learning_rate": 1e-06, "loss": 0.0068, "num_tokens": 6865637.0, "reward": 0.5368303656578064, "reward_std": 0.20336057245731354, "rewards/verify_math_reward/mean": 0.5368303656578064, "rewards/verify_math_reward/std": 0.49892017245292664, "step": 12 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 2718.0, "completions/mean_length": 603.872802734375, "completions/mean_terminated_length": 576.375732421875, "completions/min_length": 101.0, "completions/min_terminated_length": 101.0, "epoch": 0.12128279883381925, "grad_norm": 0.11916936188936234, "learning_rate": 1e-06, "loss": 0.014, "num_tokens": 7466027.0, "reward": 0.5736607313156128, "reward_std": 0.21582452952861786, "rewards/verify_math_reward/mean": 0.5736607313156128, "rewards/verify_math_reward/std": 0.4948205351829529, "step": 13 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 2803.0, "completions/mean_length": 567.6171875, "completions/mean_terminated_length": 527.79345703125, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 0.1306122448979592, "grad_norm": 0.12818068265914917, "learning_rate": 1e-06, "loss": 0.0052, "num_tokens": 8029764.0, "reward": 0.5680803656578064, "reward_std": 0.22064122557640076, "rewards/verify_math_reward/mean": 0.5680803656578064, "rewards/verify_math_reward/std": 0.4956200420856476, "step": 14 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3235.0, "completions/mean_length": 637.6975708007812, "completions/mean_terminated_length": 570.8134155273438, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 0.13994169096209913, "grad_norm": 0.1150139793753624, "learning_rate": 1e-06, "loss": -0.0025, "num_tokens": 8624973.0, "reward": 0.5569196939468384, "reward_std": 0.1802103966474533, "rewards/verify_math_reward/mean": 0.5569196343421936, "rewards/verify_math_reward/std": 0.4970270097255707, "step": 15 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3444.0, "completions/mean_length": 630.1038208007812, "completions/mean_terminated_length": 563.0728149414062, "completions/min_length": 102.0, "completions/min_terminated_length": 102.0, "epoch": 0.14927113702623906, "grad_norm": 0.12789832055568695, "learning_rate": 1e-06, "loss": 0.0077, "num_tokens": 9211378.0, "reward": 0.5424107313156128, "reward_std": 0.21835143864154816, "rewards/verify_math_reward/mean": 0.5424107313156128, "rewards/verify_math_reward/std": 0.4984763562679291, "step": 16 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.005580357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3065.0, "completions/mean_length": 569.8939819335938, "completions/mean_terminated_length": 550.1066284179688, "completions/min_length": 106.0, "completions/min_terminated_length": 106.0, "epoch": 0.158600583090379, "grad_norm": 0.13087671995162964, "learning_rate": 1e-06, "loss": 0.0131, "num_tokens": 9787147.0, "reward": 0.5647321939468384, "reward_std": 0.2232327163219452, "rewards/verify_math_reward/mean": 0.5647321343421936, "rewards/verify_math_reward/std": 0.49606895446777344, "step": 17 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.024553571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3330.0, "completions/mean_length": 656.6607666015625, "completions/mean_terminated_length": 570.0869140625, "completions/min_length": 82.0, "completions/min_terminated_length": 82.0, "epoch": 0.16793002915451896, "grad_norm": 0.12154269218444824, "learning_rate": 1e-06, "loss": 0.0173, "num_tokens": 10366635.0, "reward": 0.5267857313156128, "reward_std": 0.22320063412189484, "rewards/verify_math_reward/mean": 0.5267857313156128, "rewards/verify_math_reward/std": 0.4995608627796173, "step": 18 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3149.0, "completions/mean_length": 601.3092041015625, "completions/mean_terminated_length": 529.6640014648438, "completions/min_length": 16.0, "completions/min_terminated_length": 16.0, "epoch": 0.1772594752186589, "grad_norm": 0.13360120356082916, "learning_rate": 1e-06, "loss": -0.0023, "num_tokens": 10926136.0, "reward": 0.5892857313156128, "reward_std": 0.25400978326797485, "rewards/verify_math_reward/mean": 0.5892857313156128, "rewards/verify_math_reward/std": 0.49223825335502625, "step": 19 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.005580357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3689.0, "completions/mean_length": 558.7589721679688, "completions/mean_terminated_length": 538.9091186523438, "completions/min_length": 11.0, "completions/min_terminated_length": 11.0, "epoch": 0.18658892128279883, "grad_norm": 0.12295672297477722, "learning_rate": 1e-06, "loss": 0.007, "num_tokens": 11497472.0, "reward": 0.6350446939468384, "reward_std": 0.21098628640174866, "rewards/verify_math_reward/mean": 0.6350446343421936, "rewards/verify_math_reward/std": 0.481686532497406, "step": 20 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3967.0, "completions/mean_length": 580.6529541015625, "completions/mean_terminated_length": 528.8980712890625, "completions/min_length": 71.0, "completions/min_terminated_length": 71.0, "epoch": 0.19591836734693877, "grad_norm": 0.13105490803718567, "learning_rate": 1e-06, "loss": 0.0106, "num_tokens": 12046681.0, "reward": 0.5814732313156128, "reward_std": 0.20046527683734894, "rewards/verify_math_reward/mean": 0.5814732313156128, "rewards/verify_math_reward/std": 0.4935930073261261, "step": 21 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3375.0, "completions/mean_length": 636.5267944335938, "completions/mean_terminated_length": 593.5277099609375, "completions/min_length": 46.0, "completions/min_terminated_length": 46.0, "epoch": 0.20524781341107873, "grad_norm": 0.12085764110088348, "learning_rate": 1e-06, "loss": 0.0143, "num_tokens": 12667385.0, "reward": 0.5245535969734192, "reward_std": 0.220902681350708, "rewards/verify_math_reward/mean": 0.5245535969734192, "rewards/verify_math_reward/std": 0.4996756613254547, "step": 22 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3876.0, "completions/mean_length": 608.3582763671875, "completions/mean_terminated_length": 552.9989013671875, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 0.21457725947521866, "grad_norm": 0.1213921383023262, "learning_rate": 1e-06, "loss": -0.0045, "num_tokens": 13239466.0, "reward": 0.6328125, "reward_std": 0.18021151423454285, "rewards/verify_math_reward/mean": 0.6328125, "rewards/verify_math_reward/std": 0.48230743408203125, "step": 23 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2871.0, "completions/mean_length": 634.1551513671875, "completions/mean_terminated_length": 579.2052001953125, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 0.2239067055393586, "grad_norm": 0.11899662017822266, "learning_rate": 1e-06, "loss": 0.0154, "num_tokens": 13834621.0, "reward": 0.5290178656578064, "reward_std": 0.2045544981956482, "rewards/verify_math_reward/mean": 0.5290178656578064, "rewards/verify_math_reward/std": 0.49943602085113525, "step": 24 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2828.0, "completions/mean_length": 634.7824096679688, "completions/mean_terminated_length": 587.7975463867188, "completions/min_length": 86.0, "completions/min_terminated_length": 86.0, "epoch": 0.23323615160349853, "grad_norm": 0.11786917597055435, "learning_rate": 1e-06, "loss": 0.0117, "num_tokens": 14439466.0, "reward": 0.5424107313156128, "reward_std": 0.19234946370124817, "rewards/verify_math_reward/mean": 0.5424107313156128, "rewards/verify_math_reward/std": 0.4984763562679291, "step": 25 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3850.0, "completions/mean_length": 616.411865234375, "completions/mean_terminated_length": 569.1776123046875, "completions/min_length": 119.0, "completions/min_terminated_length": 119.0, "epoch": 0.2425655976676385, "grad_norm": 0.12758877873420715, "learning_rate": 1e-06, "loss": 0.0069, "num_tokens": 15030091.0, "reward": 0.5111607313156128, "reward_std": 0.1912982016801834, "rewards/verify_math_reward/mean": 0.5111607313156128, "rewards/verify_math_reward/std": 0.5001546144485474, "step": 26 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 1912.0, "completions/mean_length": 587.1652221679688, "completions/mean_terminated_length": 543.5525512695312, "completions/min_length": 173.0, "completions/min_terminated_length": 173.0, "epoch": 0.2518950437317784, "grad_norm": 0.13884863257408142, "learning_rate": 1e-06, "loss": 0.0073, "num_tokens": 15599255.0, "reward": 0.5636160969734192, "reward_std": 0.22218075394630432, "rewards/verify_math_reward/mean": 0.5636160969734192, "rewards/verify_math_reward/std": 0.49621346592903137, "step": 27 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3205.0, "completions/mean_length": 576.9386596679688, "completions/mean_terminated_length": 545.2353515625, "completions/min_length": 49.0, "completions/min_terminated_length": 49.0, "epoch": 0.2612244897959184, "grad_norm": 0.13604635000228882, "learning_rate": 1e-06, "loss": 0.0071, "num_tokens": 16173416.0, "reward": 0.5379464626312256, "reward_std": 0.2260870635509491, "rewards/verify_math_reward/mean": 0.5379464030265808, "rewards/verify_math_reward/std": 0.4988364577293396, "step": 28 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3413.0, "completions/mean_length": 643.078125, "completions/mean_terminated_length": 564.2442626953125, "completions/min_length": 133.0, "completions/min_terminated_length": 133.0, "epoch": 0.2705539358600583, "grad_norm": 0.12634027004241943, "learning_rate": 1e-06, "loss": 0.0017, "num_tokens": 16756678.0, "reward": 0.4910714626312256, "reward_std": 0.19989962875843048, "rewards/verify_math_reward/mean": 0.4910714328289032, "rewards/verify_math_reward/std": 0.5001994967460632, "step": 29 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3281.0, "completions/mean_length": 587.6339721679688, "completions/mean_terminated_length": 548.0361328125, "completions/min_length": 162.0, "completions/min_terminated_length": 162.0, "epoch": 0.27988338192419826, "grad_norm": 0.12438934296369553, "learning_rate": 1e-06, "loss": 0.025, "num_tokens": 17337566.0, "reward": 0.5412946939468384, "reward_std": 0.22138941287994385, "rewards/verify_math_reward/mean": 0.5412946343421936, "rewards/verify_math_reward/std": 0.49857014417648315, "step": 30 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2446.0, "completions/mean_length": 660.9810791015625, "completions/mean_terminated_length": 614.351806640625, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 0.2892128279883382, "grad_norm": 0.11914543807506561, "learning_rate": 1e-06, "loss": 0.0042, "num_tokens": 17965533.0, "reward": 0.5457589626312256, "reward_std": 0.21530599892139435, "rewards/verify_math_reward/mean": 0.5457589030265808, "rewards/verify_math_reward/std": 0.4981797933578491, "step": 31 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.006696428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3930.0, "completions/mean_length": 568.6819458007812, "completions/mean_terminated_length": 544.9022827148438, "completions/min_length": 5.0, "completions/min_terminated_length": 5.0, "epoch": 0.29854227405247813, "grad_norm": 0.13532161712646484, "learning_rate": 1e-06, "loss": 0.0072, "num_tokens": 18541336.0, "reward": 0.5736607313156128, "reward_std": 0.2328890562057495, "rewards/verify_math_reward/mean": 0.5736607313156128, "rewards/verify_math_reward/std": 0.4948205351829529, "step": 32 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 2846.0, "completions/mean_length": 557.8928833007812, "completions/mean_terminated_length": 517.9593505859375, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 0.30787172011661806, "grad_norm": 0.1307281255722046, "learning_rate": 1e-06, "loss": 0.0152, "num_tokens": 19086784.0, "reward": 0.5334821939468384, "reward_std": 0.20485760271549225, "rewards/verify_math_reward/mean": 0.5334821343421936, "rewards/verify_math_reward/std": 0.49915632605552673, "step": 33 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 4079.0, "completions/mean_length": 610.513427734375, "completions/mean_terminated_length": 571.173828125, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 0.317201166180758, "grad_norm": 0.13105987012386322, "learning_rate": 1e-06, "loss": 0.0043, "num_tokens": 19676236.0, "reward": 0.5390625, "reward_std": 0.2295122593641281, "rewards/verify_math_reward/mean": 0.5390625, "rewards/verify_math_reward/std": 0.4987502098083496, "step": 34 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3472.0, "completions/mean_length": 640.3928833007812, "completions/mean_terminated_length": 573.5608520507812, "completions/min_length": 158.0, "completions/min_terminated_length": 158.0, "epoch": 0.32653061224489793, "grad_norm": 0.10946747660636902, "learning_rate": 1e-06, "loss": -0.0098, "num_tokens": 20264052.0, "reward": 0.5714285969734192, "reward_std": 0.16145730018615723, "rewards/verify_math_reward/mean": 0.5714285969734192, "rewards/verify_math_reward/std": 0.49514803290367126, "step": 35 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2592.0, "completions/mean_length": 659.5335083007812, "completions/mean_terminated_length": 593.0716552734375, "completions/min_length": 182.0, "completions/min_terminated_length": 182.0, "epoch": 0.3358600583090379, "grad_norm": 0.1137261837720871, "learning_rate": 1e-06, "loss": 0.0114, "num_tokens": 20878186.0, "reward": 0.5625, "reward_std": 0.20282670855522156, "rewards/verify_math_reward/mean": 0.5625, "rewards/verify_math_reward/std": 0.49635544419288635, "step": 36 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3530.0, "completions/mean_length": 612.9152221679688, "completions/mean_terminated_length": 545.5517578125, "completions/min_length": 103.0, "completions/min_terminated_length": 103.0, "epoch": 0.34518950437317786, "grad_norm": 0.1323130577802658, "learning_rate": 1e-06, "loss": 0.0043, "num_tokens": 21434454.0, "reward": 0.559151828289032, "reward_std": 0.20256778597831726, "rewards/verify_math_reward/mean": 0.5591517686843872, "rewards/verify_math_reward/std": 0.496766060590744, "step": 37 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3922.0, "completions/mean_length": 625.1027221679688, "completions/mean_terminated_length": 566.0068359375, "completions/min_length": 115.0, "completions/min_terminated_length": 115.0, "epoch": 0.3545189504373178, "grad_norm": 0.12597277760505676, "learning_rate": 1e-06, "loss": 0.0033, "num_tokens": 22022442.0, "reward": 0.546875, "reward_std": 0.19915145635604858, "rewards/verify_math_reward/mean": 0.546875, "rewards/verify_math_reward/std": 0.4980759024620056, "step": 38 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2314.0, "completions/mean_length": 588.6495971679688, "completions/mean_terminated_length": 545.0553588867188, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 0.3638483965014577, "grad_norm": 0.13477082550525665, "learning_rate": 1e-06, "loss": 0.003, "num_tokens": 22600680.0, "reward": 0.5602678656578064, "reward_std": 0.21226690709590912, "rewards/verify_math_reward/mean": 0.5602678656578064, "rewards/verify_math_reward/std": 0.4966317117214203, "step": 39 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2219.0, "completions/mean_length": 589.9631958007812, "completions/mean_terminated_length": 554.388916015625, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 0.37317784256559766, "grad_norm": 0.12254554778337479, "learning_rate": 1e-06, "loss": 0.0026, "num_tokens": 23194775.0, "reward": 0.520089328289032, "reward_std": 0.19366033375263214, "rewards/verify_math_reward/mean": 0.5200892686843872, "rewards/verify_math_reward/std": 0.4998753070831299, "step": 40 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.025669642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2182.0, "completions/mean_length": 680.0234375, "completions/mean_terminated_length": 590.0263671875, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 0.3825072886297376, "grad_norm": 0.1197601780295372, "learning_rate": 1e-06, "loss": 0.0007, "num_tokens": 23811652.0, "reward": 0.4977678656578064, "reward_std": 0.20985764265060425, "rewards/verify_math_reward/mean": 0.4977678656578064, "rewards/verify_math_reward/std": 0.5002742409706116, "step": 41 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3504.0, "completions/mean_length": 616.5502319335938, "completions/mean_terminated_length": 553.2874755859375, "completions/min_length": 113.0, "completions/min_terminated_length": 113.0, "epoch": 0.39183673469387753, "grad_norm": 0.11636195331811905, "learning_rate": 1e-06, "loss": -0.0031, "num_tokens": 24389745.0, "reward": 0.512276828289032, "reward_std": 0.18678346276283264, "rewards/verify_math_reward/mean": 0.5122767686843872, "rewards/verify_math_reward/std": 0.500128448009491, "step": 42 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2795.0, "completions/mean_length": 610.9486694335938, "completions/mean_terminated_length": 555.63037109375, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 0.40116618075801747, "grad_norm": 0.1344902515411377, "learning_rate": 1e-06, "loss": 0.0045, "num_tokens": 24965947.0, "reward": 0.5613839626312256, "reward_std": 0.2333090901374817, "rewards/verify_math_reward/mean": 0.5613839030265808, "rewards/verify_math_reward/std": 0.496494859457016, "step": 43 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2116.0, "completions/mean_length": 544.6819458007812, "completions/mean_terminated_length": 508.6482238769531, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 0.41049562682215746, "grad_norm": 0.12748591601848602, "learning_rate": 1e-06, "loss": 0.0113, "num_tokens": 25503310.0, "reward": 0.6004464626312256, "reward_std": 0.20628975331783295, "rewards/verify_math_reward/mean": 0.6004464030265808, "rewards/verify_math_reward/std": 0.49008017778396606, "step": 44 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2274.0, "completions/mean_length": 618.0100708007812, "completions/mean_terminated_length": 570.7975463867188, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 0.4198250728862974, "grad_norm": 0.1190551146864891, "learning_rate": 1e-06, "loss": 0.0086, "num_tokens": 26101287.0, "reward": 0.5736607313156128, "reward_std": 0.20565037429332733, "rewards/verify_math_reward/mean": 0.5736607313156128, "rewards/verify_math_reward/std": 0.4948205351829529, "step": 45 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2312.0, "completions/mean_length": 644.5792846679688, "completions/mean_terminated_length": 609.5591430664062, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 0.4291545189504373, "grad_norm": 0.11842440068721771, "learning_rate": 1e-06, "loss": -0.0001, "num_tokens": 26730414.0, "reward": 0.5479910969734192, "reward_std": 0.23724929988384247, "rewards/verify_math_reward/mean": 0.5479910969734192, "rewards/verify_math_reward/std": 0.49796950817108154, "step": 46 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.024553571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3814.0, "completions/mean_length": 678.6328125, "completions/mean_terminated_length": 592.6121215820312, "completions/min_length": 114.0, "completions/min_terminated_length": 114.0, "epoch": 0.43848396501457726, "grad_norm": 0.12601913511753082, "learning_rate": 1e-06, "loss": 0.0027, "num_tokens": 27333253.0, "reward": 0.5334821939468384, "reward_std": 0.23067805171012878, "rewards/verify_math_reward/mean": 0.5334821343421936, "rewards/verify_math_reward/std": 0.49915632605552673, "step": 47 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3334.0, "completions/mean_length": 610.5089721679688, "completions/mean_terminated_length": 543.0989379882812, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 0.4478134110787172, "grad_norm": 0.12337189167737961, "learning_rate": 1e-06, "loss": -0.0083, "num_tokens": 27905285.0, "reward": 0.543526828289032, "reward_std": 0.16296179592609406, "rewards/verify_math_reward/mean": 0.5435267686843872, "rewards/verify_math_reward/std": 0.49838000535964966, "step": 48 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2660.0, "completions/mean_length": 612.9564819335938, "completions/mean_terminated_length": 561.67724609375, "completions/min_length": 178.0, "completions/min_terminated_length": 178.0, "epoch": 0.45714285714285713, "grad_norm": 0.11862468719482422, "learning_rate": 1e-06, "loss": 0.0058, "num_tokens": 28484262.0, "reward": 0.5111607313156128, "reward_std": 0.1951705813407898, "rewards/verify_math_reward/mean": 0.5111607313156128, "rewards/verify_math_reward/std": 0.5001546144485474, "step": 49 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.033482142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 4030.0, "completions/mean_length": 694.6563110351562, "completions/mean_terminated_length": 576.8267822265625, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 0.46647230320699706, "grad_norm": 0.13872234523296356, "learning_rate": 1e-06, "loss": -0.0054, "num_tokens": 29086330.0, "reward": 0.527901828289032, "reward_std": 0.22931794822216034, "rewards/verify_math_reward/mean": 0.5279017686843872, "rewards/verify_math_reward/std": 0.49949970841407776, "step": 50 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3270.0, "completions/mean_length": 643.7589721679688, "completions/mean_terminated_length": 568.9669189453125, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 0.47580174927113705, "grad_norm": 0.12871721386909485, "learning_rate": 1e-06, "loss": 0.0112, "num_tokens": 29666522.0, "reward": 0.598214328289032, "reward_std": 0.21057121455669403, "rewards/verify_math_reward/mean": 0.5982142686843872, "rewards/verify_math_reward/std": 0.49053287506103516, "step": 51 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3218.0, "completions/mean_length": 611.7120971679688, "completions/mean_terminated_length": 544.3253784179688, "completions/min_length": 137.0, "completions/min_terminated_length": 137.0, "epoch": 0.485131195335277, "grad_norm": 0.12275160849094391, "learning_rate": 1e-06, "loss": 0.0077, "num_tokens": 30224672.0, "reward": 0.621651828289032, "reward_std": 0.2090653032064438, "rewards/verify_math_reward/mean": 0.6216517686843872, "rewards/verify_math_reward/std": 0.4852459728717804, "step": 52 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3963.0, "completions/mean_length": 617.1171875, "completions/mean_terminated_length": 549.8350219726562, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 0.4944606413994169, "grad_norm": 0.12363526970148087, "learning_rate": 1e-06, "loss": -0.012, "num_tokens": 30793729.0, "reward": 0.5680803656578064, "reward_std": 0.20000769197940826, "rewards/verify_math_reward/mean": 0.5680803656578064, "rewards/verify_math_reward/std": 0.4956200420856476, "step": 53 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 4037.0, "completions/mean_length": 544.0301513671875, "completions/mean_terminated_length": 512.0303955078125, "completions/min_length": 116.0, "completions/min_terminated_length": 116.0, "epoch": 0.5037900874635568, "grad_norm": 0.1251416653394699, "learning_rate": 1e-06, "loss": 0.0105, "num_tokens": 31338252.0, "reward": 0.6037946939468384, "reward_std": 0.19257515668869019, "rewards/verify_math_reward/mean": 0.6037946343421936, "rewards/verify_math_reward/std": 0.48938122391700745, "step": 54 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3845.0, "completions/mean_length": 544.0301513671875, "completions/mean_terminated_length": 516.0618896484375, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 0.5131195335276968, "grad_norm": 0.14017212390899658, "learning_rate": 1e-06, "loss": 0.0266, "num_tokens": 31891423.0, "reward": 0.5892857313156128, "reward_std": 0.21756118535995483, "rewards/verify_math_reward/mean": 0.5892857313156128, "rewards/verify_math_reward/std": 0.49223825335502625, "step": 55 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2310.0, "completions/mean_length": 613.091552734375, "completions/mean_terminated_length": 545.7315063476562, "completions/min_length": 99.0, "completions/min_terminated_length": 99.0, "epoch": 0.5224489795918368, "grad_norm": 0.1301651895046234, "learning_rate": 1e-06, "loss": 0.0095, "num_tokens": 32462529.0, "reward": 0.4966517984867096, "reward_std": 0.20038999617099762, "rewards/verify_math_reward/mean": 0.4966517984867096, "rewards/verify_math_reward/std": 0.5002680420875549, "step": 56 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3428.0, "completions/mean_length": 555.2567138671875, "completions/mean_terminated_length": 527.3768310546875, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 0.5317784256559767, "grad_norm": 0.13453331589698792, "learning_rate": 1e-06, "loss": 0.0261, "num_tokens": 33018119.0, "reward": 0.5625, "reward_std": 0.19399915635585785, "rewards/verify_math_reward/mean": 0.5625, "rewards/verify_math_reward/std": 0.49635544419288635, "step": 57 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3956.0, "completions/mean_length": 577.25, "completions/mean_terminated_length": 533.51416015625, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 0.5411078717201167, "grad_norm": 0.11944576352834702, "learning_rate": 1e-06, "loss": 0.0122, "num_tokens": 33580143.0, "reward": 0.566964328289032, "reward_std": 0.18291178345680237, "rewards/verify_math_reward/mean": 0.5669642686843872, "rewards/verify_math_reward/std": 0.49577224254608154, "step": 58 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3082.0, "completions/mean_length": 566.3058471679688, "completions/mean_terminated_length": 518.3914184570312, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 0.5504373177842565, "grad_norm": 0.1279263198375702, "learning_rate": 1e-06, "loss": 0.0063, "num_tokens": 34132545.0, "reward": 0.5881696939468384, "reward_std": 0.17480847239494324, "rewards/verify_math_reward/mean": 0.5881696343421936, "rewards/verify_math_reward/std": 0.4924396276473999, "step": 59 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3234.0, "completions/mean_length": 564.146240234375, "completions/mean_terminated_length": 532.3276977539062, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 0.5597667638483965, "grad_norm": 0.12249712646007538, "learning_rate": 1e-06, "loss": 0.0118, "num_tokens": 34690260.0, "reward": 0.598214328289032, "reward_std": 0.15969882905483246, "rewards/verify_math_reward/mean": 0.5982142686843872, "rewards/verify_math_reward/std": 0.49053290486335754, "step": 60 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2483.0, "completions/mean_length": 660.6473388671875, "completions/mean_terminated_length": 590.2186889648438, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 0.5690962099125364, "grad_norm": 0.11897089332342148, "learning_rate": 1e-06, "loss": 0.0007, "num_tokens": 35292496.0, "reward": 0.5323660969734192, "reward_std": 0.19846788048744202, "rewards/verify_math_reward/mean": 0.5323660969734192, "rewards/verify_math_reward/std": 0.4992299973964691, "step": 61 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2778.0, "completions/mean_length": 656.9498291015625, "completions/mean_terminated_length": 582.4435424804688, "completions/min_length": 159.0, "completions/min_terminated_length": 159.0, "epoch": 0.5784256559766764, "grad_norm": 0.1298826038837433, "learning_rate": 1e-06, "loss": 0.0021, "num_tokens": 35887267.0, "reward": 0.5267857313156128, "reward_std": 0.23829957842826843, "rewards/verify_math_reward/mean": 0.5267857313156128, "rewards/verify_math_reward/std": 0.4995608329772949, "step": 62 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3655.0, "completions/mean_length": 590.6328125, "completions/mean_terminated_length": 547.0632934570312, "completions/min_length": 37.0, "completions/min_terminated_length": 37.0, "epoch": 0.5877551020408164, "grad_norm": 0.13611359894275665, "learning_rate": 1e-06, "loss": 0.0117, "num_tokens": 36450074.0, "reward": 0.5915178656578064, "reward_std": 0.1956934630870819, "rewards/verify_math_reward/mean": 0.5915178656578064, "rewards/verify_math_reward/std": 0.49182769656181335, "step": 63 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4028.0, "completions/mean_length": 636.396240234375, "completions/mean_terminated_length": 569.4868774414062, "completions/min_length": 114.0, "completions/min_terminated_length": 114.0, "epoch": 0.5970845481049563, "grad_norm": 0.13279348611831665, "learning_rate": 1e-06, "loss": 0.0047, "num_tokens": 37040421.0, "reward": 0.6272321939468384, "reward_std": 0.19043126702308655, "rewards/verify_math_reward/mean": 0.6272321343421936, "rewards/verify_math_reward/std": 0.4838111698627472, "step": 64 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3001.0, "completions/mean_length": 611.6652221679688, "completions/mean_terminated_length": 576.3111572265625, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 0.6064139941690962, "grad_norm": 0.12394732981920242, "learning_rate": 1e-06, "loss": 0.0235, "num_tokens": 37649289.0, "reward": 0.5334821939468384, "reward_std": 0.18341170251369476, "rewards/verify_math_reward/mean": 0.5334821343421936, "rewards/verify_math_reward/std": 0.49915632605552673, "step": 65 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3913.0, "completions/mean_length": 571.1038208007812, "completions/mean_terminated_length": 527.2915649414062, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 0.6157434402332361, "grad_norm": 0.13319340348243713, "learning_rate": 1e-06, "loss": 0.008, "num_tokens": 38214478.0, "reward": 0.5926339626312256, "reward_std": 0.2032838761806488, "rewards/verify_math_reward/mean": 0.5926339030265808, "rewards/verify_math_reward/std": 0.49161845445632935, "step": 66 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3479.0, "completions/mean_length": 696.8739013671875, "completions/mean_terminated_length": 619.2682495117188, "completions/min_length": 114.0, "completions/min_terminated_length": 114.0, "epoch": 0.6250728862973761, "grad_norm": 0.11537881940603256, "learning_rate": 1e-06, "loss": -0.004, "num_tokens": 38852629.0, "reward": 0.5178571939468384, "reward_std": 0.20324109494686127, "rewards/verify_math_reward/mean": 0.5178571343421936, "rewards/verify_math_reward/std": 0.4999600946903229, "step": 67 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3630.0, "completions/mean_length": 590.625, "completions/mean_terminated_length": 555.0574951171875, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 0.634402332361516, "grad_norm": 0.12531507015228271, "learning_rate": 1e-06, "loss": 0.0086, "num_tokens": 39428589.0, "reward": 0.5524553656578064, "reward_std": 0.19242683053016663, "rewards/verify_math_reward/mean": 0.5524553656578064, "rewards/verify_math_reward/std": 0.49751853942871094, "step": 68 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.024553571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3846.0, "completions/mean_length": 674.203125, "completions/mean_terminated_length": 588.0709228515625, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 0.643731778425656, "grad_norm": 0.12898904085159302, "learning_rate": 1e-06, "loss": 0.0107, "num_tokens": 40032931.0, "reward": 0.5491071939468384, "reward_std": 0.23394668102264404, "rewards/verify_math_reward/mean": 0.5491071343421936, "rewards/verify_math_reward/std": 0.49786055088043213, "step": 69 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 2719.0, "completions/mean_length": 611.7890625, "completions/mean_terminated_length": 572.4638671875, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 0.6530612244897959, "grad_norm": 0.13397973775863647, "learning_rate": 1e-06, "loss": -0.0002, "num_tokens": 40630174.0, "reward": 0.5323660969734192, "reward_std": 0.2358924299478531, "rewards/verify_math_reward/mean": 0.5323660969734192, "rewards/verify_math_reward/std": 0.4992299973964691, "step": 70 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3893.0, "completions/mean_length": 600.4498291015625, "completions/mean_terminated_length": 560.9966430664062, "completions/min_length": 116.0, "completions/min_terminated_length": 116.0, "epoch": 0.6623906705539359, "grad_norm": 0.13122980296611786, "learning_rate": 1e-06, "loss": 0.0221, "num_tokens": 41226801.0, "reward": 0.5368303656578064, "reward_std": 0.22053246200084686, "rewards/verify_math_reward/mean": 0.5368303656578064, "rewards/verify_math_reward/std": 0.49892017245292664, "step": 71 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3377.0, "completions/mean_length": 663.4989013671875, "completions/mean_terminated_length": 581.1188354492188, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 0.6717201166180758, "grad_norm": 0.12445461750030518, "learning_rate": 1e-06, "loss": -0.0051, "num_tokens": 41832720.0, "reward": 0.5334821939468384, "reward_std": 0.2016706019639969, "rewards/verify_math_reward/mean": 0.5334821343421936, "rewards/verify_math_reward/std": 0.49915632605552673, "step": 72 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2948.0, "completions/mean_length": 617.943115234375, "completions/mean_terminated_length": 562.73583984375, "completions/min_length": 81.0, "completions/min_terminated_length": 81.0, "epoch": 0.6810495626822157, "grad_norm": 0.11597079783678055, "learning_rate": 1e-06, "loss": 0.0058, "num_tokens": 42425829.0, "reward": 0.5558035969734192, "reward_std": 0.17191274464130402, "rewards/verify_math_reward/mean": 0.5558035969734192, "rewards/verify_math_reward/std": 0.49715372920036316, "step": 73 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.029017857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3584.0, "completions/mean_length": 697.904052734375, "completions/mean_terminated_length": 596.3517456054688, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 0.6903790087463557, "grad_norm": 0.11661992222070694, "learning_rate": 1e-06, "loss": -0.0, "num_tokens": 43034359.0, "reward": 0.5580357313156128, "reward_std": 0.20861980319023132, "rewards/verify_math_reward/mean": 0.5580357313156128, "rewards/verify_math_reward/std": 0.49689778685569763, "step": 74 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3494.0, "completions/mean_length": 652.15625, "completions/mean_terminated_length": 577.546142578125, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 0.6997084548104956, "grad_norm": 0.11767545342445374, "learning_rate": 1e-06, "loss": -0.0002, "num_tokens": 43625931.0, "reward": 0.5245535969734192, "reward_std": 0.18878154456615448, "rewards/verify_math_reward/mean": 0.5245535969734192, "rewards/verify_math_reward/std": 0.4996756613254547, "step": 75 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2363.0, "completions/mean_length": 672.21875, "completions/mean_terminated_length": 594.0502319335938, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 0.7090379008746356, "grad_norm": 0.13513846695423126, "learning_rate": 1e-06, "loss": 0.011, "num_tokens": 44239559.0, "reward": 0.535714328289032, "reward_std": 0.2354377806186676, "rewards/verify_math_reward/mean": 0.5357142686843872, "rewards/verify_math_reward/std": 0.4990014135837555, "step": 76 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2531.0, "completions/mean_length": 601.7366333007812, "completions/mean_terminated_length": 542.242919921875, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 0.7183673469387755, "grad_norm": 0.14092093706130981, "learning_rate": 1e-06, "loss": -0.004, "num_tokens": 44808899.0, "reward": 0.5580357313156128, "reward_std": 0.23581615090370178, "rewards/verify_math_reward/mean": 0.5580357313156128, "rewards/verify_math_reward/std": 0.49689781665802, "step": 77 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3195.0, "completions/mean_length": 643.294677734375, "completions/mean_terminated_length": 564.4657592773438, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 0.7276967930029155, "grad_norm": 0.13623066246509552, "learning_rate": 1e-06, "loss": 0.021, "num_tokens": 45389283.0, "reward": 0.5770089626312256, "reward_std": 0.2100435197353363, "rewards/verify_math_reward/mean": 0.5770089030265808, "rewards/verify_math_reward/std": 0.4943099319934845, "step": 78 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 4059.0, "completions/mean_length": 576.1495971679688, "completions/mean_terminated_length": 540.4351196289062, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 0.7370262390670554, "grad_norm": 0.126764714717865, "learning_rate": 1e-06, "loss": 0.0076, "num_tokens": 45957737.0, "reward": 0.590401828289032, "reward_std": 0.20973819494247437, "rewards/verify_math_reward/mean": 0.5904017686843872, "rewards/verify_math_reward/std": 0.49203425645828247, "step": 79 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2471.0, "completions/mean_length": 566.1886596679688, "completions/mean_terminated_length": 522.3152465820312, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 0.7463556851311953, "grad_norm": 0.1301468461751938, "learning_rate": 1e-06, "loss": -0.0019, "num_tokens": 46510034.0, "reward": 0.6127232313156128, "reward_std": 0.2131231427192688, "rewards/verify_math_reward/mean": 0.6127232313156128, "rewards/verify_math_reward/std": 0.4873998463153839, "step": 80 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3503.0, "completions/mean_length": 674.513427734375, "completions/mean_terminated_length": 600.3876953125, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 0.7556851311953353, "grad_norm": 0.1248849481344223, "learning_rate": 1e-06, "loss": 0.0033, "num_tokens": 47132390.0, "reward": 0.5580357313156128, "reward_std": 0.19696861505508423, "rewards/verify_math_reward/mean": 0.5580357313156128, "rewards/verify_math_reward/std": 0.49689778685569763, "step": 81 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.025669642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3460.0, "completions/mean_length": 651.6629638671875, "completions/mean_terminated_length": 560.918701171875, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 0.7650145772594752, "grad_norm": 0.12517298758029938, "learning_rate": 1e-06, "loss": 0.0125, "num_tokens": 47716240.0, "reward": 0.546875, "reward_std": 0.18370595574378967, "rewards/verify_math_reward/mean": 0.546875, "rewards/verify_math_reward/std": 0.4980759024620056, "step": 82 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2290.0, "completions/mean_length": 594.9397583007812, "completions/mean_terminated_length": 563.398681640625, "completions/min_length": 117.0, "completions/min_terminated_length": 117.0, "epoch": 0.7743440233236152, "grad_norm": 0.10509128123521805, "learning_rate": 1e-06, "loss": -0.0038, "num_tokens": 48301754.0, "reward": 0.6439732313156128, "reward_std": 0.16037102043628693, "rewards/verify_math_reward/mean": 0.6439732313156128, "rewards/verify_math_reward/std": 0.47909072041511536, "step": 83 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3104.0, "completions/mean_length": 653.5301513671875, "completions/mean_terminated_length": 570.9108276367188, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 0.7836734693877551, "grad_norm": 0.12554019689559937, "learning_rate": 1e-06, "loss": -0.0036, "num_tokens": 48896917.0, "reward": 0.4899553656578064, "reward_std": 0.21440306305885315, "rewards/verify_math_reward/mean": 0.4899553656578064, "rewards/verify_math_reward/std": 0.5001782774925232, "step": 84 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3494.0, "completions/mean_length": 586.4754638671875, "completions/mean_terminated_length": 550.8657836914062, "completions/min_length": 77.0, "completions/min_terminated_length": 77.0, "epoch": 0.793002915451895, "grad_norm": 0.12394072115421295, "learning_rate": 1e-06, "loss": 0.0059, "num_tokens": 49475263.0, "reward": 0.5602678656578064, "reward_std": 0.21271198987960815, "rewards/verify_math_reward/mean": 0.5602678656578064, "rewards/verify_math_reward/std": 0.4966317415237427, "step": 85 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 2100.0, "completions/mean_length": 582.677490234375, "completions/mean_terminated_length": 555.0135498046875, "completions/min_length": 121.0, "completions/min_terminated_length": 121.0, "epoch": 0.8023323615160349, "grad_norm": 0.1227576732635498, "learning_rate": 1e-06, "loss": -0.0033, "num_tokens": 50055534.0, "reward": 0.6004464626312256, "reward_std": 0.1935526728630066, "rewards/verify_math_reward/mean": 0.6004464030265808, "rewards/verify_math_reward/std": 0.49008017778396606, "step": 86 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3584.0, "completions/mean_length": 563.5546875, "completions/mean_terminated_length": 519.6486206054688, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 0.8116618075801749, "grad_norm": 0.13397268950939178, "learning_rate": 1e-06, "loss": 0.0165, "num_tokens": 50596087.0, "reward": 0.6160714626312256, "reward_std": 0.18411780893802643, "rewards/verify_math_reward/mean": 0.6160714030265808, "rewards/verify_math_reward/std": 0.486612468957901, "step": 87 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3707.0, "completions/mean_length": 590.1886596679688, "completions/mean_terminated_length": 538.5741577148438, "completions/min_length": 113.0, "completions/min_terminated_length": 113.0, "epoch": 0.8209912536443149, "grad_norm": 0.12742318212985992, "learning_rate": 1e-06, "loss": 0.0228, "num_tokens": 51151720.0, "reward": 0.6383928656578064, "reward_std": 0.1874246895313263, "rewards/verify_math_reward/mean": 0.6383928656578064, "rewards/verify_math_reward/std": 0.4807341992855072, "step": 88 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2388.0, "completions/mean_length": 634.989990234375, "completions/mean_terminated_length": 580.0532836914062, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 0.8303206997084548, "grad_norm": 0.11321911215782166, "learning_rate": 1e-06, "loss": 0.0106, "num_tokens": 51765175.0, "reward": 0.5334821939468384, "reward_std": 0.1803976595401764, "rewards/verify_math_reward/mean": 0.5334821343421936, "rewards/verify_math_reward/std": 0.49915632605552673, "step": 89 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 4055.0, "completions/mean_length": 622.1741333007812, "completions/mean_terminated_length": 578.9966430664062, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 0.8396501457725948, "grad_norm": 0.11763538420200348, "learning_rate": 1e-06, "loss": 0.012, "num_tokens": 52365139.0, "reward": 0.5803571939468384, "reward_std": 0.1898646205663681, "rewards/verify_math_reward/mean": 0.5803571343421936, "rewards/verify_math_reward/std": 0.4937761425971985, "step": 90 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.025669642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3428.0, "completions/mean_length": 649.9017944335938, "completions/mean_terminated_length": 559.1111450195312, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 0.8489795918367347, "grad_norm": 0.13605010509490967, "learning_rate": 1e-06, "loss": 0.0007, "num_tokens": 52941203.0, "reward": 0.6015625, "reward_std": 0.2129797637462616, "rewards/verify_math_reward/mean": 0.6015625, "rewards/verify_math_reward/std": 0.48984986543655396, "step": 91 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2929.0, "completions/mean_length": 624.7131958007812, "completions/mean_terminated_length": 581.5672607421875, "completions/min_length": 99.0, "completions/min_terminated_length": 99.0, "epoch": 0.8583090379008746, "grad_norm": 0.12498349696397781, "learning_rate": 1e-06, "loss": 0.0055, "num_tokens": 53542970.0, "reward": 0.6071428656578064, "reward_std": 0.2092207968235016, "rewards/verify_math_reward/mean": 0.6071428656578064, "rewards/verify_math_reward/std": 0.48865827918052673, "step": 92 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3373.0, "completions/mean_length": 602.7600708007812, "completions/mean_terminated_length": 547.3118286132812, "completions/min_length": 137.0, "completions/min_terminated_length": 137.0, "epoch": 0.8676384839650145, "grad_norm": 0.1420130878686905, "learning_rate": 1e-06, "loss": 0.0202, "num_tokens": 54123235.0, "reward": 0.590401828289032, "reward_std": 0.2405879944562912, "rewards/verify_math_reward/mean": 0.5904017686843872, "rewards/verify_math_reward/std": 0.49203425645828247, "step": 93 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3199.0, "completions/mean_length": 605.966552734375, "completions/mean_terminated_length": 574.5247802734375, "completions/min_length": 115.0, "completions/min_terminated_length": 115.0, "epoch": 0.8769679300291545, "grad_norm": 0.1178143247961998, "learning_rate": 1e-06, "loss": 0.0067, "num_tokens": 54710165.0, "reward": 0.6037946939468384, "reward_std": 0.1704389452934265, "rewards/verify_math_reward/mean": 0.6037946343421936, "rewards/verify_math_reward/std": 0.48938119411468506, "step": 94 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3513.0, "completions/mean_length": 654.724365234375, "completions/mean_terminated_length": 600.1008911132812, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 0.8862973760932945, "grad_norm": 0.12943097949028015, "learning_rate": 1e-06, "loss": -0.0036, "num_tokens": 55326950.0, "reward": 0.5457589626312256, "reward_std": 0.22068330645561218, "rewards/verify_math_reward/mean": 0.5457589030265808, "rewards/verify_math_reward/std": 0.4981798231601715, "step": 95 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2576.0, "completions/mean_length": 638.4888916015625, "completions/mean_terminated_length": 583.6077270507812, "completions/min_length": 159.0, "completions/min_terminated_length": 159.0, "epoch": 0.8956268221574344, "grad_norm": 0.13328658044338226, "learning_rate": 1e-06, "loss": 0.007, "num_tokens": 55925148.0, "reward": 0.5725446939468384, "reward_std": 0.23788981139659882, "rewards/verify_math_reward/mean": 0.5725446343421936, "rewards/verify_math_reward/std": 0.49498558044433594, "step": 96 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2864.0, "completions/mean_length": 639.8125, "completions/mean_terminated_length": 572.96923828125, "completions/min_length": 97.0, "completions/min_terminated_length": 97.0, "epoch": 0.9049562682215744, "grad_norm": 0.13662846386432648, "learning_rate": 1e-06, "loss": 0.0088, "num_tokens": 56520084.0, "reward": 0.559151828289032, "reward_std": 0.22278834879398346, "rewards/verify_math_reward/mean": 0.5591517686843872, "rewards/verify_math_reward/std": 0.496766060590744, "step": 97 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 4074.0, "completions/mean_length": 695.6563110351562, "completions/mean_terminated_length": 637.7616577148438, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 0.9142857142857143, "grad_norm": 0.10658743232488632, "learning_rate": 1e-06, "loss": 0.0127, "num_tokens": 57167880.0, "reward": 0.4966517984867096, "reward_std": 0.17559011280536652, "rewards/verify_math_reward/mean": 0.4966517984867096, "rewards/verify_math_reward/std": 0.5002680420875549, "step": 98 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.025669642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3235.0, "completions/mean_length": 671.685302734375, "completions/mean_terminated_length": 581.468505859375, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 0.9236151603498542, "grad_norm": 0.12809064984321594, "learning_rate": 1e-06, "loss": 0.0032, "num_tokens": 57774718.0, "reward": 0.5022321939468384, "reward_std": 0.21432848274707794, "rewards/verify_math_reward/mean": 0.5022321343421936, "rewards/verify_math_reward/std": 0.5002743005752563, "step": 99 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0279017857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3224.0, "completions/mean_length": 675.546875, "completions/mean_terminated_length": 577.370849609375, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 0.9329446064139941, "grad_norm": 0.12401802837848663, "learning_rate": 1e-06, "loss": -0.0041, "num_tokens": 58367872.0, "reward": 0.5792410969734192, "reward_std": 0.19261088967323303, "rewards/verify_math_reward/mean": 0.5792410969734192, "rewards/verify_math_reward/std": 0.49395665526390076, "step": 100 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3526.0, "completions/mean_length": 658.6004638671875, "completions/mean_terminated_length": 580.1209716796875, "completions/min_length": 122.0, "completions/min_terminated_length": 122.0, "epoch": 0.9422740524781341, "grad_norm": 0.10858191549777985, "learning_rate": 1e-06, "loss": 0.0103, "num_tokens": 58961578.0, "reward": 0.566964328289032, "reward_std": 0.17187067866325378, "rewards/verify_math_reward/mean": 0.5669642686843872, "rewards/verify_math_reward/std": 0.49577224254608154, "step": 101 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3564.0, "completions/mean_length": 645.5279541015625, "completions/mean_terminated_length": 582.7920532226562, "completions/min_length": 115.0, "completions/min_terminated_length": 115.0, "epoch": 0.9516034985422741, "grad_norm": 0.11887253820896149, "learning_rate": 1e-06, "loss": 0.0105, "num_tokens": 59574347.0, "reward": 0.5412946939468384, "reward_std": 0.17633940279483795, "rewards/verify_math_reward/mean": 0.5412946343421936, "rewards/verify_math_reward/std": 0.49857014417648315, "step": 102 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.024553571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 4019.0, "completions/mean_length": 734.6295166015625, "completions/mean_terminated_length": 650.0182495117188, "completions/min_length": 160.0, "completions/min_terminated_length": 160.0, "epoch": 0.960932944606414, "grad_norm": 0.11316835135221481, "learning_rate": 1e-06, "loss": 0.0076, "num_tokens": 60229927.0, "reward": 0.5256696939468384, "reward_std": 0.17341090738773346, "rewards/verify_math_reward/mean": 0.5256696343421936, "rewards/verify_math_reward/std": 0.4996195137500763, "step": 103 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 1838.0, "completions/mean_length": 612.4420166015625, "completions/mean_terminated_length": 557.1473999023438, "completions/min_length": 86.0, "completions/min_terminated_length": 86.0, "epoch": 0.970262390670554, "grad_norm": 0.14979751408100128, "learning_rate": 1e-06, "loss": -0.0068, "num_tokens": 60814275.0, "reward": 0.582589328289032, "reward_std": 0.23582008481025696, "rewards/verify_math_reward/mean": 0.5825892686843872, "rewards/verify_math_reward/std": 0.493407279253006, "step": 104 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 4081.0, "completions/mean_length": 603.3671875, "completions/mean_terminated_length": 543.9012451171875, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 0.9795918367346939, "grad_norm": 0.12644144892692566, "learning_rate": 1e-06, "loss": -0.0001, "num_tokens": 61381348.0, "reward": 0.527901828289032, "reward_std": 0.20850147306919098, "rewards/verify_math_reward/mean": 0.5279017686843872, "rewards/verify_math_reward/std": 0.49949970841407776, "step": 105 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 1928.0, "completions/mean_length": 615.6998291015625, "completions/mean_terminated_length": 552.4215698242188, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 0.9889212827988338, "grad_norm": 0.12566089630126953, "learning_rate": 1e-06, "loss": -0.0041, "num_tokens": 61949671.0, "reward": 0.5758928656578064, "reward_std": 0.18565760552883148, "rewards/verify_math_reward/mean": 0.5758928656578064, "rewards/verify_math_reward/std": 0.49448272585868835, "step": 106 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008522727272727293, "completions/max_length": 4096.0, "completions/max_terminated_length": 3541.0, "completions/mean_length": 548.3125, "completions/mean_terminated_length": 517.816650390625, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 0.9982507288629737, "grad_norm": 0.13114283978939056, "learning_rate": 1e-06, "loss": 0.0255, "num_tokens": 62530659.0, "reward": 0.559151828289032, "reward_std": 0.2181578129529953, "rewards/verify_math_reward/mean": 0.5591517686843872, "rewards/verify_math_reward/std": 0.496766060590744, "step": 107 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.005580357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3522.0, "completions/mean_length": 603.1596069335938, "completions/mean_terminated_length": 583.5589599609375, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 1.00932944606414, "grad_norm": 0.1275864988565445, "learning_rate": 1e-06, "loss": 0.024, "num_tokens": 63141034.0, "reward": 0.5658482313156128, "reward_std": 0.2159428894519806, "rewards/verify_math_reward/mean": 0.5658482313156128, "rewards/verify_math_reward/std": 0.49592188000679016, "step": 108 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3996.0, "completions/mean_length": 634.6194458007812, "completions/mean_terminated_length": 563.6572265625, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 1.01865889212828, "grad_norm": 0.13994581997394562, "learning_rate": 1e-06, "loss": 0.0099, "num_tokens": 63732373.0, "reward": 0.5725446939468384, "reward_std": 0.21012048423290253, "rewards/verify_math_reward/mean": 0.5725446343421936, "rewards/verify_math_reward/std": 0.49498558044433594, "step": 109 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2561.0, "completions/mean_length": 627.6272583007812, "completions/mean_terminated_length": 576.56396484375, "completions/min_length": 156.0, "completions/min_terminated_length": 156.0, "epoch": 1.0279883381924197, "grad_norm": 0.12112820893526077, "learning_rate": 1e-06, "loss": 0.0016, "num_tokens": 64334479.0, "reward": 0.5368303656578064, "reward_std": 0.2135361284017563, "rewards/verify_math_reward/mean": 0.5368303656578064, "rewards/verify_math_reward/std": 0.49892017245292664, "step": 110 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2995.0, "completions/mean_length": 633.6339721679688, "completions/mean_terminated_length": 558.62255859375, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 1.0373177842565597, "grad_norm": 0.11971090734004974, "learning_rate": 1e-06, "loss": -0.0065, "num_tokens": 64928767.0, "reward": 0.5133928656578064, "reward_std": 0.17792311310768127, "rewards/verify_math_reward/mean": 0.5133928656578064, "rewards/verify_math_reward/std": 0.500099778175354, "step": 111 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 4014.0, "completions/mean_length": 585.0714721679688, "completions/mean_terminated_length": 549.4475708007812, "completions/min_length": 94.0, "completions/min_terminated_length": 94.0, "epoch": 1.0466472303206997, "grad_norm": 0.14180590212345123, "learning_rate": 1e-06, "loss": -0.0017, "num_tokens": 65510703.0, "reward": 0.598214328289032, "reward_std": 0.2285270094871521, "rewards/verify_math_reward/mean": 0.5982142686843872, "rewards/verify_math_reward/std": 0.49053287506103516, "step": 112 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3056.0, "completions/mean_length": 645.7053833007812, "completions/mean_terminated_length": 578.97607421875, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 1.0559766763848397, "grad_norm": 0.11551226675510406, "learning_rate": 1e-06, "loss": -0.0045, "num_tokens": 66110255.0, "reward": 0.5424107313156128, "reward_std": 0.19425876438617706, "rewards/verify_math_reward/mean": 0.5424107313156128, "rewards/verify_math_reward/std": 0.4984763562679291, "step": 113 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.029017857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3007.0, "completions/mean_length": 666.7623291015625, "completions/mean_terminated_length": 564.279296875, "completions/min_length": 133.0, "completions/min_terminated_length": 133.0, "epoch": 1.0653061224489795, "grad_norm": 0.12090034782886505, "learning_rate": 1e-06, "loss": 0.0016, "num_tokens": 66712626.0, "reward": 0.5234375, "reward_std": 0.17652484774589539, "rewards/verify_math_reward/mean": 0.5234375, "rewards/verify_math_reward/std": 0.49972933530807495, "step": 114 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 4093.0, "completions/mean_length": 633.575927734375, "completions/mean_terminated_length": 558.5632934570312, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 1.0746355685131195, "grad_norm": 0.12432066351175308, "learning_rate": 1e-06, "loss": 0.022, "num_tokens": 67290398.0, "reward": 0.5636160969734192, "reward_std": 0.1777704507112503, "rewards/verify_math_reward/mean": 0.5636160969734192, "rewards/verify_math_reward/std": 0.49621346592903137, "step": 115 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3250.0, "completions/mean_length": 621.9553833007812, "completions/mean_terminated_length": 562.805908203125, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 1.0839650145772595, "grad_norm": 0.1291705071926117, "learning_rate": 1e-06, "loss": 0.0068, "num_tokens": 67874070.0, "reward": 0.6439732313156128, "reward_std": 0.19227458536624908, "rewards/verify_math_reward/mean": 0.6439732313156128, "rewards/verify_math_reward/std": 0.47909069061279297, "step": 116 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3373.0, "completions/mean_length": 586.6328125, "completions/mean_terminated_length": 559.0, "completions/min_length": 118.0, "completions/min_terminated_length": 118.0, "epoch": 1.0932944606413995, "grad_norm": 0.13343921303749084, "learning_rate": 1e-06, "loss": 0.0146, "num_tokens": 68456141.0, "reward": 0.5770089626312256, "reward_std": 0.21312315762043, "rewards/verify_math_reward/mean": 0.5770089030265808, "rewards/verify_math_reward/std": 0.4943099319934845, "step": 117 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2638.0, "completions/mean_length": 611.771240234375, "completions/mean_terminated_length": 560.4744873046875, "completions/min_length": 103.0, "completions/min_terminated_length": 103.0, "epoch": 1.1026239067055394, "grad_norm": 0.1323866993188858, "learning_rate": 1e-06, "loss": 0.0177, "num_tokens": 69025392.0, "reward": 0.6238839626312256, "reward_std": 0.21440446376800537, "rewards/verify_math_reward/mean": 0.6238839030265808, "rewards/verify_math_reward/std": 0.48468026518821716, "step": 118 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3523.0, "completions/mean_length": 611.7801513671875, "completions/mean_terminated_length": 548.4306640625, "completions/min_length": 106.0, "completions/min_terminated_length": 106.0, "epoch": 1.1119533527696792, "grad_norm": 0.13896460831165314, "learning_rate": 1e-06, "loss": -0.0015, "num_tokens": 69581635.0, "reward": 0.6383928656578064, "reward_std": 0.20760175585746765, "rewards/verify_math_reward/mean": 0.6383928656578064, "rewards/verify_math_reward/std": 0.4807341992855072, "step": 119 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2819.0, "completions/mean_length": 638.6317138671875, "completions/mean_terminated_length": 575.7704467773438, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 1.1212827988338192, "grad_norm": 0.13054411113262177, "learning_rate": 1e-06, "loss": 0.0059, "num_tokens": 70184953.0, "reward": 0.5703125, "reward_std": 0.2216501384973526, "rewards/verify_math_reward/mean": 0.5703125, "rewards/verify_math_reward/std": 0.49530795216560364, "step": 120 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3805.0, "completions/mean_length": 637.3147583007812, "completions/mean_terminated_length": 582.4149780273438, "completions/min_length": 122.0, "completions/min_terminated_length": 122.0, "epoch": 1.1306122448979592, "grad_norm": 0.13472609221935272, "learning_rate": 1e-06, "loss": 0.0215, "num_tokens": 70795843.0, "reward": 0.5345982313156128, "reward_std": 0.2102688103914261, "rewards/verify_math_reward/mean": 0.5345982313156128, "rewards/verify_math_reward/std": 0.4990801215171814, "step": 121 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2207.0, "completions/mean_length": 612.4185791015625, "completions/mean_terminated_length": 561.13134765625, "completions/min_length": 101.0, "completions/min_terminated_length": 101.0, "epoch": 1.1399416909620992, "grad_norm": 0.11735182255506516, "learning_rate": 1e-06, "loss": 0.011, "num_tokens": 71369890.0, "reward": 0.625, "reward_std": 0.1767519861459732, "rewards/verify_math_reward/mean": 0.625, "rewards/verify_math_reward/std": 0.48439329862594604, "step": 122 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.029017857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2966.0, "completions/mean_length": 681.1607666015625, "completions/mean_terminated_length": 579.1080322265625, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 1.149271137026239, "grad_norm": 0.13733011484146118, "learning_rate": 1e-06, "loss": 0.0132, "num_tokens": 71963802.0, "reward": 0.5881696939468384, "reward_std": 0.23228992521762848, "rewards/verify_math_reward/mean": 0.5881696343421936, "rewards/verify_math_reward/std": 0.4924395978450775, "step": 123 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3791.0, "completions/mean_length": 597.818115234375, "completions/mean_terminated_length": 566.3029174804688, "completions/min_length": 99.0, "completions/min_terminated_length": 99.0, "epoch": 1.158600583090379, "grad_norm": 0.12670235335826874, "learning_rate": 1e-06, "loss": 0.0112, "num_tokens": 72556191.0, "reward": 0.6104910969734192, "reward_std": 0.17949683964252472, "rewards/verify_math_reward/mean": 0.6104910969734192, "rewards/verify_math_reward/std": 0.48791128396987915, "step": 124 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2245.0, "completions/mean_length": 664.1272583007812, "completions/mean_terminated_length": 585.77392578125, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 1.167930029154519, "grad_norm": 0.10632211714982986, "learning_rate": 1e-06, "loss": -0.007, "num_tokens": 73159673.0, "reward": 0.4854910969734192, "reward_std": 0.1410633772611618, "rewards/verify_math_reward/mean": 0.4854910671710968, "rewards/verify_math_reward/std": 0.5000686049461365, "step": 125 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2799.0, "completions/mean_length": 583.8627319335938, "completions/mean_terminated_length": 552.2218627929688, "completions/min_length": 162.0, "completions/min_terminated_length": 162.0, "epoch": 1.177259475218659, "grad_norm": 0.1307123452425003, "learning_rate": 1e-06, "loss": 0.0067, "num_tokens": 73738526.0, "reward": 0.598214328289032, "reward_std": 0.18712298572063446, "rewards/verify_math_reward/mean": 0.5982142686843872, "rewards/verify_math_reward/std": 0.49053287506103516, "step": 126 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2390.0, "completions/mean_length": 627.5111694335938, "completions/mean_terminated_length": 552.3671264648438, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 1.186588921282799, "grad_norm": 0.126788929104805, "learning_rate": 1e-06, "loss": 0.008, "num_tokens": 74305552.0, "reward": 0.6238839626312256, "reward_std": 0.18539589643478394, "rewards/verify_math_reward/mean": 0.6238839030265808, "rewards/verify_math_reward/std": 0.48468026518821716, "step": 127 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2035.0, "completions/mean_length": 620.9576416015625, "completions/mean_terminated_length": 565.7982177734375, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 1.1959183673469387, "grad_norm": 0.13120092451572418, "learning_rate": 1e-06, "loss": 0.0231, "num_tokens": 74896506.0, "reward": 0.5558035969734192, "reward_std": 0.20354419946670532, "rewards/verify_math_reward/mean": 0.5558035969734192, "rewards/verify_math_reward/std": 0.49715372920036316, "step": 128 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2196.0, "completions/mean_length": 628.6295166015625, "completions/mean_terminated_length": 565.5863647460938, "completions/min_length": 158.0, "completions/min_terminated_length": 158.0, "epoch": 1.2052478134110787, "grad_norm": 0.13632048666477203, "learning_rate": 1e-06, "loss": 0.0155, "num_tokens": 75487310.0, "reward": 0.559151828289032, "reward_std": 0.2307412326335907, "rewards/verify_math_reward/mean": 0.5591517686843872, "rewards/verify_math_reward/std": 0.496766060590744, "step": 129 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3733.0, "completions/mean_length": 681.984375, "completions/mean_terminated_length": 604.0387573242188, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 1.2145772594752187, "grad_norm": 0.14439013600349426, "learning_rate": 1e-06, "loss": 0.007, "num_tokens": 76109592.0, "reward": 0.5167410969734192, "reward_std": 0.23601117730140686, "rewards/verify_math_reward/mean": 0.5167410969734192, "rewards/verify_math_reward/std": 0.4999987483024597, "step": 130 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3954.0, "completions/mean_length": 575.3717041015625, "completions/mean_terminated_length": 527.580322265625, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 1.2239067055393587, "grad_norm": 0.12474658340215683, "learning_rate": 1e-06, "loss": 0.0298, "num_tokens": 76661125.0, "reward": 0.625, "reward_std": 0.1924593597650528, "rewards/verify_math_reward/mean": 0.625, "rewards/verify_math_reward/std": 0.48439329862594604, "step": 131 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3872.0, "completions/mean_length": 568.6439819335938, "completions/mean_terminated_length": 528.8318481445312, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 1.2332361516034984, "grad_norm": 0.13131001591682434, "learning_rate": 1e-06, "loss": -0.0006, "num_tokens": 77215166.0, "reward": 0.598214328289032, "reward_std": 0.17836818099021912, "rewards/verify_math_reward/mean": 0.5982142686843872, "rewards/verify_math_reward/std": 0.49053290486335754, "step": 132 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3578.0, "completions/mean_length": 632.4017944335938, "completions/mean_terminated_length": 553.3241577148438, "completions/min_length": 106.0, "completions/min_terminated_length": 106.0, "epoch": 1.2425655976676384, "grad_norm": 0.11237182468175888, "learning_rate": 1e-06, "loss": 0.0118, "num_tokens": 77790750.0, "reward": 0.5524553656578064, "reward_std": 0.16529689729213715, "rewards/verify_math_reward/mean": 0.5524553656578064, "rewards/verify_math_reward/std": 0.49751853942871094, "step": 133 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3082.0, "completions/mean_length": 606.552490234375, "completions/mean_terminated_length": 559.1843872070312, "completions/min_length": 133.0, "completions/min_terminated_length": 133.0, "epoch": 1.2518950437317784, "grad_norm": 0.13661354780197144, "learning_rate": 1e-06, "loss": 0.0068, "num_tokens": 78366333.0, "reward": 0.6049107313156128, "reward_std": 0.23499269783496857, "rewards/verify_math_reward/mean": 0.6049107313156128, "rewards/verify_math_reward/std": 0.48914292454719543, "step": 134 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2681.0, "completions/mean_length": 581.9185791015625, "completions/mean_terminated_length": 522.08740234375, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 1.2612244897959184, "grad_norm": 0.14530333876609802, "learning_rate": 1e-06, "loss": -0.0065, "num_tokens": 78910308.0, "reward": 0.6540178656578064, "reward_std": 0.2191763073205948, "rewards/verify_math_reward/mean": 0.6540178656578064, "rewards/verify_math_reward/std": 0.4759531021118164, "step": 135 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3982.0, "completions/mean_length": 599.6875, "completions/mean_terminated_length": 544.1904907226562, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 1.2705539358600584, "grad_norm": 0.1232718899846077, "learning_rate": 1e-06, "loss": 0.0086, "num_tokens": 79481884.0, "reward": 0.5915178656578064, "reward_std": 0.18761266767978668, "rewards/verify_math_reward/mean": 0.5915178656578064, "rewards/verify_math_reward/std": 0.49182769656181335, "step": 136 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2723.0, "completions/mean_length": 619.9185791015625, "completions/mean_terminated_length": 548.6549072265625, "completions/min_length": 137.0, "completions/min_terminated_length": 137.0, "epoch": 1.2798833819241984, "grad_norm": 0.12724876403808594, "learning_rate": 1e-06, "loss": 0.0119, "num_tokens": 80049827.0, "reward": 0.5870535969734192, "reward_std": 0.17885534465312958, "rewards/verify_math_reward/mean": 0.5870535969734192, "rewards/verify_math_reward/std": 0.49263837933540344, "step": 137 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3823.0, "completions/mean_length": 608.0346069335938, "completions/mean_terminated_length": 564.681396484375, "completions/min_length": 110.0, "completions/min_terminated_length": 110.0, "epoch": 1.2892128279883381, "grad_norm": 0.12700584530830383, "learning_rate": 1e-06, "loss": 0.0174, "num_tokens": 80641882.0, "reward": 0.5703125, "reward_std": 0.1734876036643982, "rewards/verify_math_reward/mean": 0.5703125, "rewards/verify_math_reward/std": 0.49530795216560364, "step": 138 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3850.0, "completions/mean_length": 638.7388916015625, "completions/mean_terminated_length": 579.8751831054688, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 1.2985422740524781, "grad_norm": 0.1467692255973816, "learning_rate": 1e-06, "loss": 0.0294, "num_tokens": 81248536.0, "reward": 0.5223214626312256, "reward_std": 0.2576557397842407, "rewards/verify_math_reward/mean": 0.5223214030265808, "rewards/verify_math_reward/std": 0.49978047609329224, "step": 139 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2631.0, "completions/mean_length": 651.279052734375, "completions/mean_terminated_length": 576.64990234375, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 1.3078717201166181, "grad_norm": 0.13139992952346802, "learning_rate": 1e-06, "loss": -0.0033, "num_tokens": 81847314.0, "reward": 0.5267857313156128, "reward_std": 0.19381622970104218, "rewards/verify_math_reward/mean": 0.5267857313156128, "rewards/verify_math_reward/std": 0.4995608627796173, "step": 140 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3093.0, "completions/mean_length": 652.3225708007812, "completions/mean_terminated_length": 581.7232666015625, "completions/min_length": 160.0, "completions/min_terminated_length": 160.0, "epoch": 1.3172011661807579, "grad_norm": 0.13410133123397827, "learning_rate": 1e-06, "loss": 0.0038, "num_tokens": 82451995.0, "reward": 0.5267857313156128, "reward_std": 0.21913281083106995, "rewards/verify_math_reward/mean": 0.5267857313156128, "rewards/verify_math_reward/std": 0.4995608329772949, "step": 141 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3262.0, "completions/mean_length": 647.2578125, "completions/mean_terminated_length": 584.5534057617188, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 1.3265306122448979, "grad_norm": 0.1401214450597763, "learning_rate": 1e-06, "loss": 0.0033, "num_tokens": 83059810.0, "reward": 0.5412946939468384, "reward_std": 0.21963205933570862, "rewards/verify_math_reward/mean": 0.5412946343421936, "rewards/verify_math_reward/std": 0.49857014417648315, "step": 142 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.029017857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2717.0, "completions/mean_length": 675.0245971679688, "completions/mean_terminated_length": 572.7885131835938, "completions/min_length": 157.0, "completions/min_terminated_length": 157.0, "epoch": 1.3358600583090379, "grad_norm": 0.12698782980442047, "learning_rate": 1e-06, "loss": -0.0047, "num_tokens": 83649704.0, "reward": 0.5111607313156128, "reward_std": 0.1937370002269745, "rewards/verify_math_reward/mean": 0.5111607313156128, "rewards/verify_math_reward/std": 0.5001546144485474, "step": 143 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 4096.0, "completions/max_terminated_length": 4035.0, "completions/mean_length": 700.0234985351562, "completions/mean_terminated_length": 618.52001953125, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 1.3451895043731779, "grad_norm": 0.11701101064682007, "learning_rate": 1e-06, "loss": -0.0078, "num_tokens": 84287125.0, "reward": 0.5234375, "reward_std": 0.2080129235982895, "rewards/verify_math_reward/mean": 0.5234375, "rewards/verify_math_reward/std": 0.49972933530807495, "step": 144 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0267857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3662.0, "completions/mean_length": 635.4364013671875, "completions/mean_terminated_length": 540.1914672851562, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 1.3545189504373178, "grad_norm": 0.13555040955543518, "learning_rate": 1e-06, "loss": 0.0045, "num_tokens": 84844828.0, "reward": 0.629464328289032, "reward_std": 0.2072654515504837, "rewards/verify_math_reward/mean": 0.6294642686843872, "rewards/verify_math_reward/std": 0.4832179844379425, "step": 145 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.024553571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2974.0, "completions/mean_length": 657.4631958007812, "completions/mean_terminated_length": 570.9096069335938, "completions/min_length": 117.0, "completions/min_terminated_length": 117.0, "epoch": 1.3638483965014578, "grad_norm": 0.1325100064277649, "learning_rate": 1e-06, "loss": 0.0042, "num_tokens": 85447187.0, "reward": 0.5625, "reward_std": 0.20921938121318817, "rewards/verify_math_reward/mean": 0.5625, "rewards/verify_math_reward/std": 0.49635544419288635, "step": 146 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.029017857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3577.0, "completions/mean_length": 678.9074096679688, "completions/mean_terminated_length": 576.787353515625, "completions/min_length": 110.0, "completions/min_terminated_length": 110.0, "epoch": 1.3731778425655976, "grad_norm": 0.13249272108078003, "learning_rate": 1e-06, "loss": 0.0106, "num_tokens": 86036600.0, "reward": 0.5390625, "reward_std": 0.2029387205839157, "rewards/verify_math_reward/mean": 0.5390625, "rewards/verify_math_reward/std": 0.4987502098083496, "step": 147 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3334.0, "completions/mean_length": 663.9330444335938, "completions/mean_terminated_length": 589.5780639648438, "completions/min_length": 161.0, "completions/min_terminated_length": 161.0, "epoch": 1.3825072886297376, "grad_norm": 0.1341022551059723, "learning_rate": 1e-06, "loss": 0.0189, "num_tokens": 86637108.0, "reward": 0.5636160969734192, "reward_std": 0.24502448737621307, "rewards/verify_math_reward/mean": 0.5636160969734192, "rewards/verify_math_reward/std": 0.49621346592903137, "step": 148 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2786.0, "completions/mean_length": 613.3303833007812, "completions/mean_terminated_length": 533.8173217773438, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 1.3918367346938776, "grad_norm": 0.1319631189107895, "learning_rate": 1e-06, "loss": 0.0151, "num_tokens": 87189356.0, "reward": 0.582589328289032, "reward_std": 0.1983163207769394, "rewards/verify_math_reward/mean": 0.5825892686843872, "rewards/verify_math_reward/std": 0.4934072494506836, "step": 149 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2903.0, "completions/mean_length": 597.7578125, "completions/mean_terminated_length": 550.2703857421875, "completions/min_length": 157.0, "completions/min_terminated_length": 157.0, "epoch": 1.4011661807580174, "grad_norm": 0.11776057630777359, "learning_rate": 1e-06, "loss": -0.0026, "num_tokens": 87772051.0, "reward": 0.6361607313156128, "reward_std": 0.16224895417690277, "rewards/verify_math_reward/mean": 0.6361607313156128, "rewards/verify_math_reward/std": 0.4813718795776367, "step": 150 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3784.0, "completions/mean_length": 643.935302734375, "completions/mean_terminated_length": 569.1470947265625, "completions/min_length": 133.0, "completions/min_terminated_length": 133.0, "epoch": 1.4104956268221573, "grad_norm": 0.125766783952713, "learning_rate": 1e-06, "loss": 0.0211, "num_tokens": 88368393.0, "reward": 0.5558035969734192, "reward_std": 0.1846805065870285, "rewards/verify_math_reward/mean": 0.5558035969734192, "rewards/verify_math_reward/std": 0.49715372920036316, "step": 151 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2478.0, "completions/mean_length": 611.4464721679688, "completions/mean_terminated_length": 531.890380859375, "completions/min_length": 123.0, "completions/min_terminated_length": 123.0, "epoch": 1.4198250728862973, "grad_norm": 0.14089730381965637, "learning_rate": 1e-06, "loss": 0.0136, "num_tokens": 88918809.0, "reward": 0.6551339626312256, "reward_std": 0.20252712070941925, "rewards/verify_math_reward/mean": 0.6551339030265808, "rewards/verify_math_reward/std": 0.4755900502204895, "step": 152 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2443.0, "completions/mean_length": 591.8225708007812, "completions/mean_terminated_length": 544.2545776367188, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 1.4291545189504373, "grad_norm": 0.12657928466796875, "learning_rate": 1e-06, "loss": -0.0097, "num_tokens": 89489154.0, "reward": 0.5814732313156128, "reward_std": 0.18768639862537384, "rewards/verify_math_reward/mean": 0.5814732313156128, "rewards/verify_math_reward/std": 0.4935929775238037, "step": 153 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2491.0, "completions/mean_length": 605.3995971679688, "completions/mean_terminated_length": 541.93408203125, "completions/min_length": 113.0, "completions/min_terminated_length": 113.0, "epoch": 1.4384839650145773, "grad_norm": 0.11196082085371017, "learning_rate": 1e-06, "loss": 0.0126, "num_tokens": 90048584.0, "reward": 0.613839328289032, "reward_std": 0.1641671359539032, "rewards/verify_math_reward/mean": 0.6138392686843872, "rewards/verify_math_reward/std": 0.48714008927345276, "step": 154 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 4092.0, "completions/mean_length": 565.0457763671875, "completions/mean_terminated_length": 521.158203125, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 1.4478134110787173, "grad_norm": 0.1285495012998581, "learning_rate": 1e-06, "loss": 0.006, "num_tokens": 90594137.0, "reward": 0.6417410969734192, "reward_std": 0.19358547031879425, "rewards/verify_math_reward/mean": 0.6417410969734192, "rewards/verify_math_reward/std": 0.47975656390190125, "step": 155 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 4058.0, "completions/mean_length": 587.7210083007812, "completions/mean_terminated_length": 540.0972900390625, "completions/min_length": 153.0, "completions/min_terminated_length": 153.0, "epoch": 1.457142857142857, "grad_norm": 0.1286974847316742, "learning_rate": 1e-06, "loss": 0.0104, "num_tokens": 91160887.0, "reward": 0.5948660969734192, "reward_std": 0.20106007158756256, "rewards/verify_math_reward/mean": 0.5948660969734192, "rewards/verify_math_reward/std": 0.49119213223457336, "step": 156 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3553.0, "completions/mean_length": 668.7020263671875, "completions/mean_terminated_length": 594.4503784179688, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 1.466472303206997, "grad_norm": 0.1116761788725853, "learning_rate": 1e-06, "loss": -0.0036, "num_tokens": 91773724.0, "reward": 0.5412946939468384, "reward_std": 0.18829120695590973, "rewards/verify_math_reward/mean": 0.5412946343421936, "rewards/verify_math_reward/std": 0.49857014417648315, "step": 157 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3258.0, "completions/mean_length": 657.099365234375, "completions/mean_terminated_length": 578.5855712890625, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 1.475801749271137, "grad_norm": 0.12376651912927628, "learning_rate": 1e-06, "loss": 0.008, "num_tokens": 92371421.0, "reward": 0.5546875, "reward_std": 0.1896056979894638, "rewards/verify_math_reward/mean": 0.5546875, "rewards/verify_math_reward/std": 0.4972778558731079, "step": 158 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3460.0, "completions/mean_length": 658.6517944335938, "completions/mean_terminated_length": 584.1824340820312, "completions/min_length": 133.0, "completions/min_terminated_length": 133.0, "epoch": 1.485131195335277, "grad_norm": 0.12960033118724823, "learning_rate": 1e-06, "loss": -0.0033, "num_tokens": 92962293.0, "reward": 0.598214328289032, "reward_std": 0.18077604472637177, "rewards/verify_math_reward/mean": 0.5982142686843872, "rewards/verify_math_reward/std": 0.49053287506103516, "step": 159 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3950.0, "completions/mean_length": 625.9453125, "completions/mean_terminated_length": 582.814697265625, "completions/min_length": 122.0, "completions/min_terminated_length": 122.0, "epoch": 1.4944606413994168, "grad_norm": 0.11636026948690414, "learning_rate": 1e-06, "loss": 0.0016, "num_tokens": 93568188.0, "reward": 0.6116071939468384, "reward_std": 0.19178421795368195, "rewards/verify_math_reward/mean": 0.6116071343421936, "rewards/verify_math_reward/std": 0.4876568913459778, "step": 160 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0267857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3769.0, "completions/mean_length": 677.3683471679688, "completions/mean_terminated_length": 583.2775268554688, "completions/min_length": 113.0, "completions/min_terminated_length": 113.0, "epoch": 1.5037900874635568, "grad_norm": 0.12696631252765656, "learning_rate": 1e-06, "loss": -0.0043, "num_tokens": 94159198.0, "reward": 0.606026828289032, "reward_std": 0.20361904799938202, "rewards/verify_math_reward/mean": 0.6060267686843872, "rewards/verify_math_reward/std": 0.48890191316604614, "step": 161 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3322.0, "completions/mean_length": 651.8348388671875, "completions/mean_terminated_length": 577.2177734375, "completions/min_length": 84.0, "completions/min_terminated_length": 84.0, "epoch": 1.5131195335276968, "grad_norm": 0.12342068552970886, "learning_rate": 1e-06, "loss": 0.0034, "num_tokens": 94760858.0, "reward": 0.543526828289032, "reward_std": 0.19497555494308472, "rewards/verify_math_reward/mean": 0.5435267686843872, "rewards/verify_math_reward/std": 0.49838000535964966, "step": 162 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 4096.0, "completions/max_terminated_length": 2867.0, "completions/mean_length": 660.2600708007812, "completions/mean_terminated_length": 577.8023071289062, "completions/min_length": 163.0, "completions/min_terminated_length": 163.0, "epoch": 1.5224489795918368, "grad_norm": 0.12338287383317947, "learning_rate": 1e-06, "loss": 0.0085, "num_tokens": 95368051.0, "reward": 0.5078125, "reward_std": 0.2057993859052658, "rewards/verify_math_reward/mean": 0.5078125, "rewards/verify_math_reward/std": 0.5002182126045227, "step": 163 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2217.0, "completions/mean_length": 606.5949096679688, "completions/mean_terminated_length": 539.1091918945312, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 1.5317784256559768, "grad_norm": 0.11960810422897339, "learning_rate": 1e-06, "loss": 0.0062, "num_tokens": 95935416.0, "reward": 0.5625, "reward_std": 0.1624344140291214, "rewards/verify_math_reward/mean": 0.5625, "rewards/verify_math_reward/std": 0.49635544419288635, "step": 164 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2296.0, "completions/mean_length": 649.0111694335938, "completions/mean_terminated_length": 594.2970581054688, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 1.5411078717201168, "grad_norm": 0.12979981303215027, "learning_rate": 1e-06, "loss": 0.0009, "num_tokens": 96551474.0, "reward": 0.5491071939468384, "reward_std": 0.1957344114780426, "rewards/verify_math_reward/mean": 0.5491071343421936, "rewards/verify_math_reward/std": 0.49786055088043213, "step": 165 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3962.0, "completions/mean_length": 561.068115234375, "completions/mean_terminated_length": 521.1704711914062, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 1.5504373177842565, "grad_norm": 0.12584321200847626, "learning_rate": 1e-06, "loss": 0.0014, "num_tokens": 97106791.0, "reward": 0.5792410969734192, "reward_std": 0.16904886066913605, "rewards/verify_math_reward/mean": 0.5792410969734192, "rewards/verify_math_reward/std": 0.49395665526390076, "step": 166 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2662.0, "completions/mean_length": 604.646240234375, "completions/mean_terminated_length": 557.2522583007812, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 1.5597667638483965, "grad_norm": 0.13442009687423706, "learning_rate": 1e-06, "loss": 0.0044, "num_tokens": 97682306.0, "reward": 0.6082589626312256, "reward_std": 0.22616049647331238, "rewards/verify_math_reward/mean": 0.6082589030265808, "rewards/verify_math_reward/std": 0.4884119927883148, "step": 167 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 4054.0, "completions/mean_length": 690.2366333007812, "completions/mean_terminated_length": 612.4794311523438, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 1.5690962099125363, "grad_norm": 0.10918771475553513, "learning_rate": 1e-06, "loss": -0.0092, "num_tokens": 98303182.0, "reward": 0.5736607313156128, "reward_std": 0.16732457280158997, "rewards/verify_math_reward/mean": 0.5736607313156128, "rewards/verify_math_reward/std": 0.4948205351829529, "step": 168 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.024553571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3861.0, "completions/mean_length": 645.9799194335938, "completions/mean_terminated_length": 559.1372680664062, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 1.5784256559766763, "grad_norm": 0.13866642117500305, "learning_rate": 1e-06, "loss": -0.0091, "num_tokens": 98879668.0, "reward": 0.6071428656578064, "reward_std": 0.20290479063987732, "rewards/verify_math_reward/mean": 0.6071428656578064, "rewards/verify_math_reward/std": 0.48865827918052673, "step": 169 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3954.0, "completions/mean_length": 650.765625, "completions/mean_terminated_length": 596.079345703125, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 1.5877551020408163, "grad_norm": 0.12869887053966522, "learning_rate": 1e-06, "loss": 0.0034, "num_tokens": 99494578.0, "reward": 0.609375, "reward_std": 0.2069612294435501, "rewards/verify_math_reward/mean": 0.609375, "rewards/verify_math_reward/std": 0.48816296458244324, "step": 170 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3807.0, "completions/mean_length": 655.984375, "completions/mean_terminated_length": 573.4240112304688, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 1.5970845481049563, "grad_norm": 0.1265682876110077, "learning_rate": 1e-06, "loss": -0.0008, "num_tokens": 100087412.0, "reward": 0.5602678656578064, "reward_std": 0.20218871533870697, "rewards/verify_math_reward/mean": 0.5602678656578064, "rewards/verify_math_reward/std": 0.4966317415237427, "step": 171 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3613.0, "completions/mean_length": 645.5424194335938, "completions/mean_terminated_length": 590.7732543945312, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 1.6064139941690962, "grad_norm": 0.1304820328950882, "learning_rate": 1e-06, "loss": 0.0132, "num_tokens": 100700226.0, "reward": 0.6194196939468384, "reward_std": 0.22792786359786987, "rewards/verify_math_reward/mean": 0.6194196343421936, "rewards/verify_math_reward/std": 0.48580074310302734, "step": 172 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0267857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3721.0, "completions/mean_length": 650.1428833007812, "completions/mean_terminated_length": 555.302734375, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 1.6157434402332362, "grad_norm": 0.13903263211250305, "learning_rate": 1e-06, "loss": -0.0067, "num_tokens": 101264898.0, "reward": 0.6127232313156128, "reward_std": 0.20933659374713898, "rewards/verify_math_reward/mean": 0.6127232313156128, "rewards/verify_math_reward/std": 0.4873998463153839, "step": 173 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3639.0, "completions/mean_length": 602.0435791015625, "completions/mean_terminated_length": 546.5839233398438, "completions/min_length": 108.0, "completions/min_terminated_length": 108.0, "epoch": 1.6250728862973762, "grad_norm": 0.1289028376340866, "learning_rate": 1e-06, "loss": 0.0141, "num_tokens": 101837297.0, "reward": 0.59375, "reward_std": 0.18400652706623077, "rewards/verify_math_reward/mean": 0.59375, "rewards/verify_math_reward/std": 0.4914066195487976, "step": 174 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3603.0, "completions/mean_length": 646.989990234375, "completions/mean_terminated_length": 592.2437744140625, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 1.634402332361516, "grad_norm": 0.11323468387126923, "learning_rate": 1e-06, "loss": 0.0092, "num_tokens": 102468024.0, "reward": 0.5491071939468384, "reward_std": 0.17284639179706573, "rewards/verify_math_reward/mean": 0.5491071343421936, "rewards/verify_math_reward/std": 0.49786055088043213, "step": 175 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2894.0, "completions/mean_length": 644.4330444335938, "completions/mean_terminated_length": 601.5322265625, "completions/min_length": 165.0, "completions/min_terminated_length": 165.0, "epoch": 1.643731778425656, "grad_norm": 0.1305655539035797, "learning_rate": 1e-06, "loss": 0.0017, "num_tokens": 103090684.0, "reward": 0.5814732313156128, "reward_std": 0.21876651048660278, "rewards/verify_math_reward/mean": 0.5814732313156128, "rewards/verify_math_reward/std": 0.4935929775238037, "step": 176 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2166.0, "completions/mean_length": 627.2902221679688, "completions/mean_terminated_length": 568.2315673828125, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 1.6530612244897958, "grad_norm": 0.14653432369232178, "learning_rate": 1e-06, "loss": 0.0105, "num_tokens": 103681912.0, "reward": 0.551339328289032, "reward_std": 0.19617947936058044, "rewards/verify_math_reward/mean": 0.5513392686843872, "rewards/verify_math_reward/std": 0.4976350665092468, "step": 177 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2545.0, "completions/mean_length": 616.9732666015625, "completions/mean_terminated_length": 545.6492309570312, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 1.6623906705539357, "grad_norm": 0.1253194659948349, "learning_rate": 1e-06, "loss": 0.0071, "num_tokens": 104244448.0, "reward": 0.6316964626312256, "reward_std": 0.19009242951869965, "rewards/verify_math_reward/mean": 0.6316964030265808, "rewards/verify_math_reward/std": 0.4826137125492096, "step": 178 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0267857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2242.0, "completions/mean_length": 634.7545166015625, "completions/mean_terminated_length": 539.4907836914062, "completions/min_length": 115.0, "completions/min_terminated_length": 115.0, "epoch": 1.6717201166180757, "grad_norm": 0.12943314015865326, "learning_rate": 1e-06, "loss": -0.0054, "num_tokens": 104797068.0, "reward": 0.5881696939468384, "reward_std": 0.19813157618045807, "rewards/verify_math_reward/mean": 0.5881696343421936, "rewards/verify_math_reward/std": 0.4924395978450775, "step": 179 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0267857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3817.0, "completions/mean_length": 629.8092041015625, "completions/mean_terminated_length": 534.4093627929688, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 1.6810495626822157, "grad_norm": 0.12720946967601776, "learning_rate": 1e-06, "loss": -0.0101, "num_tokens": 105349977.0, "reward": 0.6049107313156128, "reward_std": 0.18771639466285706, "rewards/verify_math_reward/mean": 0.6049107313156128, "rewards/verify_math_reward/std": 0.48914292454719543, "step": 180 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.025669642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 4078.0, "completions/mean_length": 647.8828125, "completions/mean_terminated_length": 557.0390014648438, "completions/min_length": 137.0, "completions/min_terminated_length": 137.0, "epoch": 1.6903790087463557, "grad_norm": 0.13440246880054474, "learning_rate": 1e-06, "loss": 0.0071, "num_tokens": 105919120.0, "reward": 0.6339285969734192, "reward_std": 0.19730742275714874, "rewards/verify_math_reward/mean": 0.6339285969734192, "rewards/verify_math_reward/std": 0.48199838399887085, "step": 181 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3583.0, "completions/mean_length": 728.0826416015625, "completions/mean_terminated_length": 619.4400634765625, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 1.6997084548104957, "grad_norm": 0.11702921986579895, "learning_rate": 1e-06, "loss": 0.0027, "num_tokens": 106561298.0, "reward": 0.5055803656578064, "reward_std": 0.17870266735553741, "rewards/verify_math_reward/mean": 0.5055803656578064, "rewards/verify_math_reward/std": 0.5002480745315552, "step": 182 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3089.0, "completions/mean_length": 538.7433471679688, "completions/mean_terminated_length": 498.59368896484375, "completions/min_length": 137.0, "completions/min_terminated_length": 137.0, "epoch": 1.7090379008746357, "grad_norm": 0.15399344265460968, "learning_rate": 1e-06, "loss": 0.0199, "num_tokens": 107098828.0, "reward": 0.637276828289032, "reward_std": 0.20012785494327545, "rewards/verify_math_reward/mean": 0.6372767686843872, "rewards/verify_math_reward/std": 0.481054425239563, "step": 183 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.024553571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3040.0, "completions/mean_length": 675.8214721679688, "completions/mean_terminated_length": 589.72998046875, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 1.7183673469387755, "grad_norm": 0.13055773079395294, "learning_rate": 1e-06, "loss": -0.0042, "num_tokens": 107707100.0, "reward": 0.5870535969734192, "reward_std": 0.1916026771068573, "rewards/verify_math_reward/mean": 0.5870535969734192, "rewards/verify_math_reward/std": 0.49263834953308105, "step": 184 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2774.0, "completions/mean_length": 665.2221069335938, "completions/mean_terminated_length": 594.8872680664062, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 1.7276967930029155, "grad_norm": 0.10972083359956741, "learning_rate": 1e-06, "loss": 0.0023, "num_tokens": 108315195.0, "reward": 0.5602678656578064, "reward_std": 0.1711532026529312, "rewards/verify_math_reward/mean": 0.5602678656578064, "rewards/verify_math_reward/std": 0.4966317117214203, "step": 185 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2784.0, "completions/mean_length": 629.7578125, "completions/mean_terminated_length": 570.7412109375, "completions/min_length": 108.0, "completions/min_terminated_length": 108.0, "epoch": 1.7370262390670554, "grad_norm": 0.13642588257789612, "learning_rate": 1e-06, "loss": 0.0118, "num_tokens": 108909866.0, "reward": 0.5323660969734192, "reward_std": 0.21319982409477234, "rewards/verify_math_reward/mean": 0.5323660969734192, "rewards/verify_math_reward/std": 0.4992299973964691, "step": 186 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2607.0, "completions/mean_length": 567.2142944335938, "completions/mean_terminated_length": 519.312255859375, "completions/min_length": 112.0, "completions/min_terminated_length": 112.0, "epoch": 1.7463556851311952, "grad_norm": 0.13879670202732086, "learning_rate": 1e-06, "loss": 0.0015, "num_tokens": 109456682.0, "reward": 0.6662946939468384, "reward_std": 0.18118861317634583, "rewards/verify_math_reward/mean": 0.6662946343421936, "rewards/verify_math_reward/std": 0.47179922461509705, "step": 187 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3396.0, "completions/mean_length": 672.8705444335938, "completions/mean_terminated_length": 606.6666259765625, "completions/min_length": 97.0, "completions/min_terminated_length": 97.0, "epoch": 1.7556851311953352, "grad_norm": 0.12180299311876297, "learning_rate": 1e-06, "loss": -0.0007, "num_tokens": 110080958.0, "reward": 0.5870535969734192, "reward_std": 0.1836727410554886, "rewards/verify_math_reward/mean": 0.5870535969734192, "rewards/verify_math_reward/std": 0.49263837933540344, "step": 188 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.030133928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3343.0, "completions/mean_length": 677.661865234375, "completions/mean_terminated_length": 571.453369140625, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 1.7650145772594752, "grad_norm": 0.15158618986606598, "learning_rate": 1e-06, "loss": -0.0238, "num_tokens": 110674239.0, "reward": 0.609375, "reward_std": 0.20200437307357788, "rewards/verify_math_reward/mean": 0.609375, "rewards/verify_math_reward/std": 0.48816296458244324, "step": 189 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.030133928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3283.0, "completions/mean_length": 710.3270263671875, "completions/mean_terminated_length": 605.1334838867188, "completions/min_length": 117.0, "completions/min_terminated_length": 117.0, "epoch": 1.7743440233236152, "grad_norm": 0.12813588976860046, "learning_rate": 1e-06, "loss": 0.0102, "num_tokens": 111284884.0, "reward": 0.5814732313156128, "reward_std": 0.2077547013759613, "rewards/verify_math_reward/mean": 0.5814732313156128, "rewards/verify_math_reward/std": 0.4935929775238037, "step": 190 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3707.0, "completions/mean_length": 588.7355346679688, "completions/mean_terminated_length": 537.0996704101562, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 1.7836734693877552, "grad_norm": 0.12251389771699905, "learning_rate": 1e-06, "loss": -0.0182, "num_tokens": 111849495.0, "reward": 0.6238839626312256, "reward_std": 0.14789608120918274, "rewards/verify_math_reward/mean": 0.6238839030265808, "rewards/verify_math_reward/std": 0.48468026518821716, "step": 191 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2250.0, "completions/mean_length": 640.328125, "completions/mean_terminated_length": 565.4617919921875, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 1.7930029154518952, "grad_norm": 0.14359082281589508, "learning_rate": 1e-06, "loss": 0.0146, "num_tokens": 112429053.0, "reward": 0.6160714626312256, "reward_std": 0.21830935776233673, "rewards/verify_math_reward/mean": 0.6160714030265808, "rewards/verify_math_reward/std": 0.486612468957901, "step": 192 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0267857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4041.0, "completions/mean_length": 700.6975708007812, "completions/mean_terminated_length": 607.2488403320312, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 1.802332361516035, "grad_norm": 0.1354796439409256, "learning_rate": 1e-06, "loss": -0.0025, "num_tokens": 113051070.0, "reward": 0.5502232313156128, "reward_std": 0.16180570423603058, "rewards/verify_math_reward/mean": 0.5502232313156128, "rewards/verify_math_reward/std": 0.49774909019470215, "step": 193 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4077.0, "completions/mean_length": 636.5826416015625, "completions/mean_terminated_length": 565.66064453125, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 1.811661807580175, "grad_norm": 0.13040165603160858, "learning_rate": 1e-06, "loss": 0.0081, "num_tokens": 113633176.0, "reward": 0.6116071939468384, "reward_std": 0.18179410696029663, "rewards/verify_math_reward/mean": 0.6116071343421936, "rewards/verify_math_reward/std": 0.48765692114830017, "step": 194 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3589.0, "completions/mean_length": 594.7522583007812, "completions/mean_terminated_length": 547.2239990234375, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 1.820991253644315, "grad_norm": 0.13067464530467987, "learning_rate": 1e-06, "loss": -0.0096, "num_tokens": 114201306.0, "reward": 0.5926339626312256, "reward_std": 0.1868947446346283, "rewards/verify_math_reward/mean": 0.5926339030265808, "rewards/verify_math_reward/std": 0.49161845445632935, "step": 195 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3883.0, "completions/mean_length": 653.5803833007812, "completions/mean_terminated_length": 579.0010986328125, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 1.8303206997084547, "grad_norm": 0.12206802517175674, "learning_rate": 1e-06, "loss": -0.0098, "num_tokens": 114791962.0, "reward": 0.6316964626312256, "reward_std": 0.19723325967788696, "rewards/verify_math_reward/mean": 0.6316964030265808, "rewards/verify_math_reward/std": 0.4826137125492096, "step": 196 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.030133928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2613.0, "completions/mean_length": 739.4866333007812, "completions/mean_terminated_length": 635.1990966796875, "completions/min_length": 175.0, "completions/min_terminated_length": 175.0, "epoch": 1.8396501457725947, "grad_norm": 0.12905940413475037, "learning_rate": 1e-06, "loss": -0.0063, "num_tokens": 115444758.0, "reward": 0.4520089626312256, "reward_std": 0.2306734323501587, "rewards/verify_math_reward/mean": 0.4520089328289032, "rewards/verify_math_reward/std": 0.49796947836875916, "step": 197 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0345982142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3602.0, "completions/mean_length": 688.4989013671875, "completions/mean_terminated_length": 566.3803100585938, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 1.8489795918367347, "grad_norm": 0.14780296385288239, "learning_rate": 1e-06, "loss": 0.0027, "num_tokens": 116030213.0, "reward": 0.5234375, "reward_std": 0.19797931611537933, "rewards/verify_math_reward/mean": 0.5234375, "rewards/verify_math_reward/std": 0.49972933530807495, "step": 198 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.029017857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3771.0, "completions/mean_length": 653.6272583007812, "completions/mean_terminated_length": 550.751708984375, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 1.8583090379008746, "grad_norm": 0.12445133179426193, "learning_rate": 1e-06, "loss": 0.0057, "num_tokens": 116589935.0, "reward": 0.566964328289032, "reward_std": 0.17103557288646698, "rewards/verify_math_reward/mean": 0.5669642686843872, "rewards/verify_math_reward/std": 0.49577224254608154, "step": 199 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.024553571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3527.0, "completions/mean_length": 665.0982666015625, "completions/mean_terminated_length": 578.73681640625, "completions/min_length": 111.0, "completions/min_terminated_length": 111.0, "epoch": 1.8676384839650146, "grad_norm": 0.11815723031759262, "learning_rate": 1e-06, "loss": 0.0007, "num_tokens": 117187143.0, "reward": 0.637276828289032, "reward_std": 0.15263791382312775, "rewards/verify_math_reward/mean": 0.6372767686843872, "rewards/verify_math_reward/std": 0.481054425239563, "step": 200 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3769.0, "completions/mean_length": 669.4285888671875, "completions/mean_terminated_length": 558.8939819335938, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 1.8769679300291546, "grad_norm": 0.14831843972206116, "learning_rate": 1e-06, "loss": 0.0008, "num_tokens": 117752879.0, "reward": 0.5814732313156128, "reward_std": 0.2200452983379364, "rewards/verify_math_reward/mean": 0.5814732313156128, "rewards/verify_math_reward/std": 0.4935929775238037, "step": 201 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 4096.0, "completions/max_terminated_length": 4032.0, "completions/mean_length": 737.7188110351562, "completions/mean_terminated_length": 629.3870849609375, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 1.8862973760932946, "grad_norm": 0.12336394190788269, "learning_rate": 1e-06, "loss": -0.0024, "num_tokens": 118379267.0, "reward": 0.5680803656578064, "reward_std": 0.20985834300518036, "rewards/verify_math_reward/mean": 0.5680803656578064, "rewards/verify_math_reward/std": 0.4956200420856476, "step": 202 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0267857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2685.0, "completions/mean_length": 672.3348388671875, "completions/mean_terminated_length": 578.10546875, "completions/min_length": 118.0, "completions/min_terminated_length": 118.0, "epoch": 1.8956268221574344, "grad_norm": 0.1384689062833786, "learning_rate": 1e-06, "loss": 0.0078, "num_tokens": 118975167.0, "reward": 0.5546875, "reward_std": 0.21891483664512634, "rewards/verify_math_reward/mean": 0.5546875, "rewards/verify_math_reward/std": 0.4972778558731079, "step": 203 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3466.0, "completions/mean_length": 637.8761596679688, "completions/mean_terminated_length": 598.8453979492188, "completions/min_length": 118.0, "completions/min_terminated_length": 118.0, "epoch": 1.9049562682215744, "grad_norm": 0.137050598859787, "learning_rate": 1e-06, "loss": 0.0073, "num_tokens": 119602696.0, "reward": 0.6116071939468384, "reward_std": 0.21756118535995483, "rewards/verify_math_reward/mean": 0.6116071343421936, "rewards/verify_math_reward/std": 0.48765692114830017, "step": 204 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0267857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2670.0, "completions/mean_length": 690.0234985351562, "completions/mean_terminated_length": 596.2809448242188, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 1.9142857142857141, "grad_norm": 0.11969118565320969, "learning_rate": 1e-06, "loss": 0.0068, "num_tokens": 120208469.0, "reward": 0.5234375, "reward_std": 0.1738969385623932, "rewards/verify_math_reward/mean": 0.5234375, "rewards/verify_math_reward/std": 0.49972933530807495, "step": 205 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3551.0, "completions/mean_length": 695.3069458007812, "completions/mean_terminated_length": 637.4063720703125, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 1.9236151603498541, "grad_norm": 0.12369006127119064, "learning_rate": 1e-06, "loss": 0.0218, "num_tokens": 120849648.0, "reward": 0.5647321939468384, "reward_std": 0.18701057136058807, "rewards/verify_math_reward/mean": 0.5647321343421936, "rewards/verify_math_reward/std": 0.49606895446777344, "step": 206 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4088.0, "completions/mean_length": 658.7913208007812, "completions/mean_terminated_length": 588.3246459960938, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 1.9329446064139941, "grad_norm": 0.13501164317131042, "learning_rate": 1e-06, "loss": -0.0006, "num_tokens": 121459645.0, "reward": 0.613839328289032, "reward_std": 0.18873807787895203, "rewards/verify_math_reward/mean": 0.6138392686843872, "rewards/verify_math_reward/std": 0.48714008927345276, "step": 207 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 4045.0, "completions/mean_length": 699.2801513671875, "completions/mean_terminated_length": 641.447265625, "completions/min_length": 163.0, "completions/min_terminated_length": 163.0, "epoch": 1.9422740524781341, "grad_norm": 0.13782964646816254, "learning_rate": 1e-06, "loss": 0.0111, "num_tokens": 122115752.0, "reward": 0.5457589626312256, "reward_std": 0.2335277497768402, "rewards/verify_math_reward/mean": 0.5457589030265808, "rewards/verify_math_reward/std": 0.4981797933578491, "step": 208 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 4076.0, "completions/mean_length": 584.8817138671875, "completions/mean_terminated_length": 525.10107421875, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 1.951603498542274, "grad_norm": 0.1312982738018036, "learning_rate": 1e-06, "loss": 0.01, "num_tokens": 122663590.0, "reward": 0.6595982313156128, "reward_std": 0.17908497154712677, "rewards/verify_math_reward/mean": 0.6595982313156128, "rewards/verify_math_reward/std": 0.4741089344024658, "step": 209 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3579.0, "completions/mean_length": 621.958740234375, "completions/mean_terminated_length": 550.7369384765625, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 1.960932944606414, "grad_norm": 0.1384764015674591, "learning_rate": 1e-06, "loss": 0.0076, "num_tokens": 123229761.0, "reward": 0.6808035969734192, "reward_std": 0.16187915205955505, "rewards/verify_math_reward/mean": 0.6808035969734192, "rewards/verify_math_reward/std": 0.4664256274700165, "step": 210 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 2659.0, "completions/mean_length": 607.6451416015625, "completions/mean_terminated_length": 568.2731323242188, "completions/min_length": 159.0, "completions/min_terminated_length": 159.0, "epoch": 1.970262390670554, "grad_norm": 0.13262683153152466, "learning_rate": 1e-06, "loss": 0.0041, "num_tokens": 123823171.0, "reward": 0.6149553656578064, "reward_std": 0.20038609206676483, "rewards/verify_math_reward/mean": 0.6149553656578064, "rewards/verify_math_reward/std": 0.4868776500225067, "step": 211 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0279017857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3672.0, "completions/mean_length": 634.1652221679688, "completions/mean_terminated_length": 534.8013916015625, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 1.9795918367346939, "grad_norm": 0.12873639166355133, "learning_rate": 1e-06, "loss": -0.0015, "num_tokens": 124373519.0, "reward": 0.6171875, "reward_std": 0.18344198167324066, "rewards/verify_math_reward/mean": 0.6171875, "rewards/verify_math_reward/std": 0.4863446056842804, "step": 212 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2938.0, "completions/mean_length": 606.8125, "completions/mean_terminated_length": 531.2200317382812, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 1.9889212827988338, "grad_norm": 0.12236711382865906, "learning_rate": 1e-06, "loss": 0.0205, "num_tokens": 124932151.0, "reward": 0.6316964626312256, "reward_std": 0.16930918395519257, "rewards/verify_math_reward/mean": 0.6316964030265808, "rewards/verify_math_reward/std": 0.4826137125492096, "step": 213 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014204545454545414, "completions/max_length": 4096.0, "completions/max_terminated_length": 3598.0, "completions/mean_length": 618.5653686523438, "completions/mean_terminated_length": 568.4581909179688, "completions/min_length": 165.0, "completions/min_terminated_length": 165.0, "epoch": 1.9982507288629736, "grad_norm": 0.11970631778240204, "learning_rate": 1e-06, "loss": 0.0103, "num_tokens": 125525642.0, "reward": 0.6484375, "reward_std": 0.1632571667432785, "rewards/verify_math_reward/mean": 0.6484375, "rewards/verify_math_reward/std": 0.4777248501777649, "step": 214 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0267857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3812.0, "completions/mean_length": 659.1395263671875, "completions/mean_terminated_length": 564.5469970703125, "completions/min_length": 81.0, "completions/min_terminated_length": 81.0, "epoch": 2.00932944606414, "grad_norm": 0.13896454870700836, "learning_rate": 1e-06, "loss": -0.0104, "num_tokens": 126106415.0, "reward": 0.606026828289032, "reward_std": 0.2027532458305359, "rewards/verify_math_reward/mean": 0.6060267686843872, "rewards/verify_math_reward/std": 0.48890194296836853, "step": 215 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.036830357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3344.0, "completions/mean_length": 714.7076416015625, "completions/mean_terminated_length": 585.4113159179688, "completions/min_length": 153.0, "completions/min_terminated_length": 153.0, "epoch": 2.01865889212828, "grad_norm": 0.1265566051006317, "learning_rate": 1e-06, "loss": -0.0014, "num_tokens": 126701305.0, "reward": 0.5725446939468384, "reward_std": 0.17724093794822693, "rewards/verify_math_reward/mean": 0.5725446343421936, "rewards/verify_math_reward/std": 0.49498558044433594, "step": 216 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.033482142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3856.0, "completions/mean_length": 676.4375, "completions/mean_terminated_length": 557.9769287109375, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 2.02798833819242, "grad_norm": 0.1350981891155243, "learning_rate": 1e-06, "loss": 0.0013, "num_tokens": 127278209.0, "reward": 0.5881696939468384, "reward_std": 0.1869703084230423, "rewards/verify_math_reward/mean": 0.5881696343421936, "rewards/verify_math_reward/std": 0.4924396276473999, "step": 217 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3798.0, "completions/mean_length": 707.5971069335938, "completions/mean_terminated_length": 634.1881103515625, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 2.03731778425656, "grad_norm": 0.12518104910850525, "learning_rate": 1e-06, "loss": -0.001, "num_tokens": 127925704.0, "reward": 0.546875, "reward_std": 0.18457287549972534, "rewards/verify_math_reward/mean": 0.546875, "rewards/verify_math_reward/std": 0.4980759024620056, "step": 218 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0279017857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3915.0, "completions/mean_length": 621.2767944335938, "completions/mean_terminated_length": 521.5430297851562, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 2.0466472303206995, "grad_norm": 0.1590966433286667, "learning_rate": 1e-06, "loss": 0.0036, "num_tokens": 128467312.0, "reward": 0.6707589626312256, "reward_std": 0.2203381508588791, "rewards/verify_math_reward/mean": 0.6707589030265808, "rewards/verify_math_reward/std": 0.4702001214027405, "step": 219 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0390625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3369.0, "completions/mean_length": 749.3795166015625, "completions/mean_terminated_length": 613.3379516601562, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 2.0559766763848395, "grad_norm": 0.13837730884552002, "learning_rate": 1e-06, "loss": 0.001, "num_tokens": 129089932.0, "reward": 0.551339328289032, "reward_std": 0.23375487327575684, "rewards/verify_math_reward/mean": 0.5513392686843872, "rewards/verify_math_reward/std": 0.4976350665092468, "step": 220 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.029017857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2578.0, "completions/mean_length": 677.390625, "completions/mean_terminated_length": 575.2252807617188, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 2.0653061224489795, "grad_norm": 0.11362986266613007, "learning_rate": 1e-06, "loss": -0.0022, "num_tokens": 129678290.0, "reward": 0.6183035969734192, "reward_std": 0.15895067155361176, "rewards/verify_math_reward/mean": 0.6183035969734192, "rewards/verify_math_reward/std": 0.4860740303993225, "step": 221 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3582.0, "completions/mean_length": 685.7377319335938, "completions/mean_terminated_length": 607.8778076171875, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 2.0746355685131195, "grad_norm": 0.13333557546138763, "learning_rate": 1e-06, "loss": 0.0065, "num_tokens": 130302439.0, "reward": 0.5926339626312256, "reward_std": 0.18701240420341492, "rewards/verify_math_reward/mean": 0.5926339030265808, "rewards/verify_math_reward/std": 0.49161845445632935, "step": 222 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 4096.0, "completions/max_terminated_length": 2646.0, "completions/mean_length": 625.0670166015625, "completions/mean_terminated_length": 541.7645874023438, "completions/min_length": 133.0, "completions/min_terminated_length": 133.0, "epoch": 2.0839650145772595, "grad_norm": 0.14015834033489227, "learning_rate": 1e-06, "loss": -0.0212, "num_tokens": 130877603.0, "reward": 0.6573660969734192, "reward_std": 0.16661031544208527, "rewards/verify_math_reward/mean": 0.6573660969734192, "rewards/verify_math_reward/std": 0.47485533356666565, "step": 223 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.036830357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2983.0, "completions/mean_length": 700.3861694335938, "completions/mean_terminated_length": 570.5422973632812, "completions/min_length": 173.0, "completions/min_terminated_length": 173.0, "epoch": 2.0932944606413995, "grad_norm": 0.14076173305511475, "learning_rate": 1e-06, "loss": -0.0149, "num_tokens": 131455685.0, "reward": 0.6473214626312256, "reward_std": 0.1885526329278946, "rewards/verify_math_reward/mean": 0.6473214030265808, "rewards/verify_math_reward/std": 0.47807058691978455, "step": 224 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0323660714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2877.0, "completions/mean_length": 722.5111694335938, "completions/mean_terminated_length": 609.6724243164062, "completions/min_length": 166.0, "completions/min_terminated_length": 166.0, "epoch": 2.1026239067055394, "grad_norm": 0.13184206187725067, "learning_rate": 1e-06, "loss": 0.0054, "num_tokens": 132082679.0, "reward": 0.5323660969734192, "reward_std": 0.2120707631111145, "rewards/verify_math_reward/mean": 0.5323660969734192, "rewards/verify_math_reward/std": 0.4992299973964691, "step": 225 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 4046.0, "completions/mean_length": 702.5938110351562, "completions/mean_terminated_length": 629.0763549804688, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 2.1119533527696794, "grad_norm": 0.1343090981245041, "learning_rate": 1e-06, "loss": 0.0128, "num_tokens": 132726227.0, "reward": 0.6004464626312256, "reward_std": 0.19554010033607483, "rewards/verify_math_reward/mean": 0.6004464030265808, "rewards/verify_math_reward/std": 0.49008017778396606, "step": 226 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2959.0, "completions/mean_length": 628.0960083007812, "completions/mean_terminated_length": 552.9646606445312, "completions/min_length": 162.0, "completions/min_terminated_length": 162.0, "epoch": 2.1212827988338194, "grad_norm": 0.1360645443201065, "learning_rate": 1e-06, "loss": 0.0035, "num_tokens": 133293929.0, "reward": 0.6506696939468384, "reward_std": 0.18118861317634583, "rewards/verify_math_reward/mean": 0.6506696343421936, "rewards/verify_math_reward/std": 0.47702476382255554, "step": 227 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 1966.0, "completions/mean_length": 652.671875, "completions/mean_terminated_length": 574.0570678710938, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 2.130612244897959, "grad_norm": 0.13314826786518097, "learning_rate": 1e-06, "loss": 0.0052, "num_tokens": 133885211.0, "reward": 0.6283482313156128, "reward_std": 0.20380564033985138, "rewards/verify_math_reward/mean": 0.6283482313156128, "rewards/verify_math_reward/std": 0.4835159480571747, "step": 228 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0345982142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3639.0, "completions/mean_length": 776.6741333007812, "completions/mean_terminated_length": 657.715576171875, "completions/min_length": 205.0, "completions/min_terminated_length": 205.0, "epoch": 2.139941690962099, "grad_norm": 0.11701515316963196, "learning_rate": 1e-06, "loss": 0.0059, "num_tokens": 134549071.0, "reward": 0.574776828289032, "reward_std": 0.17813995480537415, "rewards/verify_math_reward/mean": 0.5747767686843872, "rewards/verify_math_reward/std": 0.49465295672416687, "step": 229 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.030133928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3908.0, "completions/mean_length": 645.9732666015625, "completions/mean_terminated_length": 538.7802124023438, "completions/min_length": 94.0, "completions/min_terminated_length": 94.0, "epoch": 2.149271137026239, "grad_norm": 0.14322766661643982, "learning_rate": 1e-06, "loss": -0.0087, "num_tokens": 135102863.0, "reward": 0.6316964626312256, "reward_std": 0.1775447428226471, "rewards/verify_math_reward/mean": 0.6316964030265808, "rewards/verify_math_reward/std": 0.482613742351532, "step": 230 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.025669642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3102.0, "completions/mean_length": 658.2980346679688, "completions/mean_terminated_length": 567.7285766601562, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 2.158600583090379, "grad_norm": 0.13096362352371216, "learning_rate": 1e-06, "loss": -0.0072, "num_tokens": 135688226.0, "reward": 0.5948660969734192, "reward_std": 0.16340941190719604, "rewards/verify_math_reward/mean": 0.5948660969734192, "rewards/verify_math_reward/std": 0.49119213223457336, "step": 231 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0435267857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3176.0, "completions/mean_length": 758.7299194335938, "completions/mean_terminated_length": 606.8588256835938, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 2.167930029154519, "grad_norm": 0.12270905077457428, "learning_rate": 1e-06, "loss": -0.0064, "num_tokens": 136298680.0, "reward": 0.5323660969734192, "reward_std": 0.16604506969451904, "rewards/verify_math_reward/mean": 0.5323660969734192, "rewards/verify_math_reward/std": 0.4992299973964691, "step": 232 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.030133928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3398.0, "completions/mean_length": 720.1719360351562, "completions/mean_terminated_length": 615.2842407226562, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 2.177259475218659, "grad_norm": 0.12233763188123703, "learning_rate": 1e-06, "loss": 0.0078, "num_tokens": 136924626.0, "reward": 0.578125, "reward_std": 0.17878004908561707, "rewards/verify_math_reward/mean": 0.578125, "rewards/verify_math_reward/std": 0.4941346049308777, "step": 233 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3202.0, "completions/mean_length": 639.75, "completions/mean_terminated_length": 556.7999877929688, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 2.186588921282799, "grad_norm": 0.13310506939888, "learning_rate": 1e-06, "loss": -0.0099, "num_tokens": 137519090.0, "reward": 0.5401785969734192, "reward_std": 0.2103782743215561, "rewards/verify_math_reward/mean": 0.5401785969734192, "rewards/verify_math_reward/std": 0.49866142868995667, "step": 234 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2326.0, "completions/mean_length": 583.7109375, "completions/mean_terminated_length": 536.0328369140625, "completions/min_length": 156.0, "completions/min_terminated_length": 156.0, "epoch": 2.195918367346939, "grad_norm": 0.13909703493118286, "learning_rate": 1e-06, "loss": 0.0191, "num_tokens": 138080239.0, "reward": 0.6383928656578064, "reward_std": 0.189639613032341, "rewards/verify_math_reward/mean": 0.6383928656578064, "rewards/verify_math_reward/std": 0.4807341694831848, "step": 235 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3518.0, "completions/mean_length": 615.6217041015625, "completions/mean_terminated_length": 556.3643798828125, "completions/min_length": 115.0, "completions/min_terminated_length": 115.0, "epoch": 2.205247813411079, "grad_norm": 0.133534237742424, "learning_rate": 1e-06, "loss": -0.0069, "num_tokens": 138659828.0, "reward": 0.6328125, "reward_std": 0.18250201642513275, "rewards/verify_math_reward/mean": 0.6328125, "rewards/verify_math_reward/std": 0.48230743408203125, "step": 236 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3193.0, "completions/mean_length": 655.09375, "completions/mean_terminated_length": 584.55126953125, "completions/min_length": 107.0, "completions/min_terminated_length": 107.0, "epoch": 2.2145772594752184, "grad_norm": 0.13340818881988525, "learning_rate": 1e-06, "loss": 0.0142, "num_tokens": 139256472.0, "reward": 0.578125, "reward_std": 0.21609443426132202, "rewards/verify_math_reward/mean": 0.578125, "rewards/verify_math_reward/std": 0.4941346049308777, "step": 237 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0267857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3982.0, "completions/mean_length": 655.091552734375, "completions/mean_terminated_length": 560.3875732421875, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 2.2239067055393584, "grad_norm": 0.11444026231765747, "learning_rate": 1e-06, "loss": 0.0053, "num_tokens": 139826866.0, "reward": 0.5703125, "reward_std": 0.1689378321170807, "rewards/verify_math_reward/mean": 0.5703125, "rewards/verify_math_reward/std": 0.49530795216560364, "step": 238 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.033482142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2182.0, "completions/mean_length": 667.599365234375, "completions/mean_terminated_length": 548.8325805664062, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 2.2332361516034984, "grad_norm": 0.14029905200004578, "learning_rate": 1e-06, "loss": 0.0062, "num_tokens": 140393347.0, "reward": 0.6104910969734192, "reward_std": 0.1975356638431549, "rewards/verify_math_reward/mean": 0.6104910969734192, "rewards/verify_math_reward/std": 0.48791128396987915, "step": 239 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0323660714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3994.0, "completions/mean_length": 680.1283569335938, "completions/mean_terminated_length": 565.8719482421875, "completions/min_length": 133.0, "completions/min_terminated_length": 133.0, "epoch": 2.2425655976676384, "grad_norm": 0.11632180213928223, "learning_rate": 1e-06, "loss": 0.004, "num_tokens": 140959102.0, "reward": 0.6707589626312256, "reward_std": 0.15634779632091522, "rewards/verify_math_reward/mean": 0.6707589030265808, "rewards/verify_math_reward/std": 0.4702001214027405, "step": 240 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.029017857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2092.0, "completions/mean_length": 658.4642944335938, "completions/mean_terminated_length": 555.7333374023438, "completions/min_length": 156.0, "completions/min_terminated_length": 156.0, "epoch": 2.2518950437317784, "grad_norm": 0.14235104620456696, "learning_rate": 1e-06, "loss": 0.0108, "num_tokens": 141531070.0, "reward": 0.6116071939468384, "reward_std": 0.18994523584842682, "rewards/verify_math_reward/mean": 0.6116071343421936, "rewards/verify_math_reward/std": 0.4876568913459778, "step": 241 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0279017857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3761.0, "completions/mean_length": 656.7890625, "completions/mean_terminated_length": 558.0745849609375, "completions/min_length": 100.0, "completions/min_terminated_length": 100.0, "epoch": 2.2612244897959184, "grad_norm": 0.13869857788085938, "learning_rate": 1e-06, "loss": -0.0119, "num_tokens": 142097121.0, "reward": 0.6674107313156128, "reward_std": 0.19182021915912628, "rewards/verify_math_reward/mean": 0.6674107313156128, "rewards/verify_math_reward/std": 0.47140392661094666, "step": 242 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0401785714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3107.0, "completions/mean_length": 686.208740234375, "completions/mean_terminated_length": 543.4732666015625, "completions/min_length": 153.0, "completions/min_terminated_length": 153.0, "epoch": 2.2705539358600584, "grad_norm": 0.14248910546302795, "learning_rate": 1e-06, "loss": -0.0084, "num_tokens": 142650164.0, "reward": 0.5770089626312256, "reward_std": 0.18378081917762756, "rewards/verify_math_reward/mean": 0.5770089030265808, "rewards/verify_math_reward/std": 0.4943099319934845, "step": 243 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.033482142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3887.0, "completions/mean_length": 624.0346069335938, "completions/mean_terminated_length": 503.7586669921875, "completions/min_length": 75.0, "completions/min_terminated_length": 75.0, "epoch": 2.2798833819241984, "grad_norm": 0.1280028373003006, "learning_rate": 1e-06, "loss": -0.0021, "num_tokens": 143163483.0, "reward": 0.6238839626312256, "reward_std": 0.129900723695755, "rewards/verify_math_reward/mean": 0.6238839030265808, "rewards/verify_math_reward/std": 0.4846802353858948, "step": 244 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0345982142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3291.0, "completions/mean_length": 694.8717041015625, "completions/mean_terminated_length": 572.9815063476562, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 2.2892128279883384, "grad_norm": 0.1106211245059967, "learning_rate": 1e-06, "loss": 0.0024, "num_tokens": 143742368.0, "reward": 0.6037946939468384, "reward_std": 0.13598665595054626, "rewards/verify_math_reward/mean": 0.6037946343421936, "rewards/verify_math_reward/std": 0.48938122391700745, "step": 245 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.025669642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2396.0, "completions/mean_length": 672.7522583007812, "completions/mean_terminated_length": 582.5635986328125, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 2.298542274052478, "grad_norm": 0.1433972269296646, "learning_rate": 1e-06, "loss": 0.0177, "num_tokens": 144343938.0, "reward": 0.5691964626312256, "reward_std": 0.20079974830150604, "rewards/verify_math_reward/mean": 0.5691964030265808, "rewards/verify_math_reward/std": 0.4954652488231659, "step": 246 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2332.0, "completions/mean_length": 622.880615234375, "completions/mean_terminated_length": 567.751708984375, "completions/min_length": 92.0, "completions/min_terminated_length": 92.0, "epoch": 2.307871720116618, "grad_norm": 0.12620839476585388, "learning_rate": 1e-06, "loss": -0.0013, "num_tokens": 144940383.0, "reward": 0.5837053656578064, "reward_std": 0.15811511874198914, "rewards/verify_math_reward/mean": 0.5837053656578064, "rewards/verify_math_reward/std": 0.49321892857551575, "step": 247 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0279017857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3649.0, "completions/mean_length": 644.1685791015625, "completions/mean_terminated_length": 545.0918579101562, "completions/min_length": 173.0, "completions/min_terminated_length": 173.0, "epoch": 2.317201166180758, "grad_norm": 0.14193283021450043, "learning_rate": 1e-06, "loss": -0.0003, "num_tokens": 145506158.0, "reward": 0.6026785969734192, "reward_std": 0.19986753165721893, "rewards/verify_math_reward/mean": 0.6026785969734192, "rewards/verify_math_reward/std": 0.48961687088012695, "step": 248 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.041294642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3798.0, "completions/mean_length": 750.5111694335938, "completions/mean_terminated_length": 606.4097900390625, "completions/min_length": 118.0, "completions/min_terminated_length": 118.0, "epoch": 2.326530612244898, "grad_norm": 0.11254996061325073, "learning_rate": 1e-06, "loss": -0.0109, "num_tokens": 146117720.0, "reward": 0.574776828289032, "reward_std": 0.14635655283927917, "rewards/verify_math_reward/mean": 0.5747767686843872, "rewards/verify_math_reward/std": 0.49465295672416687, "step": 249 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.024553571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3998.0, "completions/mean_length": 660.3582763671875, "completions/mean_terminated_length": 573.8775634765625, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 2.335860058309038, "grad_norm": 0.13078922033309937, "learning_rate": 1e-06, "loss": 0.0002, "num_tokens": 146702353.0, "reward": 0.660714328289032, "reward_std": 0.19174326956272125, "rewards/verify_math_reward/mean": 0.6607142686843872, "rewards/verify_math_reward/std": 0.4737313687801361, "step": 250 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.036830357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3579.0, "completions/mean_length": 678.640625, "completions/mean_terminated_length": 547.9652099609375, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 2.345189504373178, "grad_norm": 0.13873854279518127, "learning_rate": 1e-06, "loss": -0.0014, "num_tokens": 147267087.0, "reward": 0.5725446939468384, "reward_std": 0.17719633877277374, "rewards/verify_math_reward/mean": 0.5725446343421936, "rewards/verify_math_reward/std": 0.49498558044433594, "step": 251 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0345982142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 1744.0, "completions/mean_length": 649.4140625, "completions/mean_terminated_length": 525.894775390625, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 2.354518950437318, "grad_norm": 0.15454351902008057, "learning_rate": 1e-06, "loss": 0.0021, "num_tokens": 147807314.0, "reward": 0.6049107313156128, "reward_std": 0.186372309923172, "rewards/verify_math_reward/mean": 0.6049107313156128, "rewards/verify_math_reward/std": 0.48914292454719543, "step": 252 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.029017857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3047.0, "completions/mean_length": 666.1439819335938, "completions/mean_terminated_length": 563.6425170898438, "completions/min_length": 96.0, "completions/min_terminated_length": 96.0, "epoch": 2.363848396501458, "grad_norm": 0.11001133918762207, "learning_rate": 1e-06, "loss": -0.0008, "num_tokens": 148382819.0, "reward": 0.652901828289032, "reward_std": 0.13531264662742615, "rewards/verify_math_reward/mean": 0.6529017686843872, "rewards/verify_math_reward/std": 0.47631320357322693, "step": 253 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.024553571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 4089.0, "completions/mean_length": 704.7913208007812, "completions/mean_terminated_length": 619.4290161132812, "completions/min_length": 166.0, "completions/min_terminated_length": 166.0, "epoch": 2.373177842565598, "grad_norm": 0.12934285402297974, "learning_rate": 1e-06, "loss": -0.0133, "num_tokens": 149006656.0, "reward": 0.5658482313156128, "reward_std": 0.18768611550331116, "rewards/verify_math_reward/mean": 0.5658482313156128, "rewards/verify_math_reward/std": 0.49592188000679016, "step": 254 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0279017857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3474.0, "completions/mean_length": 643.083740234375, "completions/mean_terminated_length": 543.9758911132812, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 2.3825072886297374, "grad_norm": 0.11873723566532135, "learning_rate": 1e-06, "loss": -0.0023, "num_tokens": 149565491.0, "reward": 0.6696428656578064, "reward_std": 0.1295202374458313, "rewards/verify_math_reward/mean": 0.6696428656578064, "rewards/verify_math_reward/std": 0.47060438990592957, "step": 255 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0323660714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3615.0, "completions/mean_length": 709.325927734375, "completions/mean_terminated_length": 596.046142578125, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 2.3918367346938774, "grad_norm": 0.12077955901622772, "learning_rate": 1e-06, "loss": -0.0145, "num_tokens": 150175959.0, "reward": 0.5703125, "reward_std": 0.16247829794883728, "rewards/verify_math_reward/mean": 0.5703125, "rewards/verify_math_reward/std": 0.49530795216560364, "step": 256 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.044642857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3995.0, "completions/mean_length": 744.130615234375, "completions/mean_terminated_length": 587.5011596679688, "completions/min_length": 113.0, "completions/min_terminated_length": 113.0, "epoch": 2.4011661807580174, "grad_norm": 0.14024953544139862, "learning_rate": 1e-06, "loss": -0.0139, "num_tokens": 150767372.0, "reward": 0.59375, "reward_std": 0.187199667096138, "rewards/verify_math_reward/mean": 0.59375, "rewards/verify_math_reward/std": 0.4914066195487976, "step": 257 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2258.0, "completions/mean_length": 600.0245971679688, "completions/mean_terminated_length": 536.4613647460938, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 2.4104956268221573, "grad_norm": 0.14611251652240753, "learning_rate": 1e-06, "loss": 0.0015, "num_tokens": 151324362.0, "reward": 0.65625, "reward_std": 0.20647519826889038, "rewards/verify_math_reward/mean": 0.65625, "rewards/verify_math_reward/std": 0.4752241373062134, "step": 258 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 4094.0, "completions/mean_length": 626.0949096679688, "completions/mean_terminated_length": 546.873291015625, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 2.4198250728862973, "grad_norm": 0.13245181739330292, "learning_rate": 1e-06, "loss": -0.0051, "num_tokens": 151894959.0, "reward": 0.6506696939468384, "reward_std": 0.1780746579170227, "rewards/verify_math_reward/mean": 0.6506696343421936, "rewards/verify_math_reward/std": 0.47702476382255554, "step": 259 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.036830357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2869.0, "completions/mean_length": 704.2500610351562, "completions/mean_terminated_length": 574.5538940429688, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 2.4291545189504373, "grad_norm": 0.1365259885787964, "learning_rate": 1e-06, "loss": -0.006, "num_tokens": 152482431.0, "reward": 0.5736607313156128, "reward_std": 0.20718877017498016, "rewards/verify_math_reward/mean": 0.5736607313156128, "rewards/verify_math_reward/std": 0.4948205351829529, "step": 260 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.041294642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2463.0, "completions/mean_length": 711.029052734375, "completions/mean_terminated_length": 565.2269897460938, "completions/min_length": 95.0, "completions/min_terminated_length": 95.0, "epoch": 2.4384839650145773, "grad_norm": 0.1361416131258011, "learning_rate": 1e-06, "loss": -0.0225, "num_tokens": 153063241.0, "reward": 0.6127232313156128, "reward_std": 0.18179228901863098, "rewards/verify_math_reward/mean": 0.6127232313156128, "rewards/verify_math_reward/std": 0.4873998463153839, "step": 261 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.030133928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2678.0, "completions/mean_length": 657.9765625, "completions/mean_terminated_length": 551.156494140625, "completions/min_length": 172.0, "completions/min_terminated_length": 172.0, "epoch": 2.4478134110787173, "grad_norm": 0.14126794040203094, "learning_rate": 1e-06, "loss": -0.0006, "num_tokens": 153631036.0, "reward": 0.5881696939468384, "reward_std": 0.19235128164291382, "rewards/verify_math_reward/mean": 0.5881696343421936, "rewards/verify_math_reward/std": 0.4924395978450775, "step": 262 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.033482142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3345.0, "completions/mean_length": 676.4230346679688, "completions/mean_terminated_length": 557.9619140625, "completions/min_length": 113.0, "completions/min_terminated_length": 113.0, "epoch": 2.4571428571428573, "grad_norm": 0.12177924811840057, "learning_rate": 1e-06, "loss": 0.0084, "num_tokens": 154199039.0, "reward": 0.637276828289032, "reward_std": 0.1572147011756897, "rewards/verify_math_reward/mean": 0.6372767686843872, "rewards/verify_math_reward/std": 0.481054425239563, "step": 263 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0379464285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2730.0, "completions/mean_length": 685.5011596679688, "completions/mean_terminated_length": 550.9802856445312, "completions/min_length": 102.0, "completions/min_terminated_length": 102.0, "epoch": 2.466472303206997, "grad_norm": 0.14432471990585327, "learning_rate": 1e-06, "loss": -0.0026, "num_tokens": 154768216.0, "reward": 0.598214328289032, "reward_std": 0.18051277101039886, "rewards/verify_math_reward/mean": 0.5982142686843872, "rewards/verify_math_reward/std": 0.49053287506103516, "step": 264 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0424107142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2733.0, "completions/mean_length": 725.1328735351562, "completions/mean_terminated_length": 575.84033203125, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 2.4758017492711373, "grad_norm": 0.1281932145357132, "learning_rate": 1e-06, "loss": 0.006, "num_tokens": 155351031.0, "reward": 0.590401828289032, "reward_std": 0.1468462198972702, "rewards/verify_math_reward/mean": 0.5904017686843872, "rewards/verify_math_reward/std": 0.49203425645828247, "step": 265 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0345982142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3313.0, "completions/mean_length": 671.2645263671875, "completions/mean_terminated_length": 548.5283203125, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 2.485131195335277, "grad_norm": 0.12536077201366425, "learning_rate": 1e-06, "loss": 0.0066, "num_tokens": 155908852.0, "reward": 0.660714328289032, "reward_std": 0.1725817173719406, "rewards/verify_math_reward/mean": 0.6607142686843872, "rewards/verify_math_reward/std": 0.4737313687801361, "step": 266 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3356.0, "completions/mean_length": 637.2098388671875, "completions/mean_terminated_length": 574.3226928710938, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 2.494460641399417, "grad_norm": 0.13122303783893585, "learning_rate": 1e-06, "loss": 0.0074, "num_tokens": 156500464.0, "reward": 0.6160714626312256, "reward_std": 0.1717965006828308, "rewards/verify_math_reward/mean": 0.6160714030265808, "rewards/verify_math_reward/std": 0.486612468957901, "step": 267 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0647321428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 4065.0, "completions/mean_length": 856.1016235351562, "completions/mean_terminated_length": 631.8604125976562, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 2.503790087463557, "grad_norm": 0.12117322534322739, "learning_rate": 1e-06, "loss": -0.015, "num_tokens": 157128939.0, "reward": 0.5803571939468384, "reward_std": 0.1734090894460678, "rewards/verify_math_reward/mean": 0.5803571343421936, "rewards/verify_math_reward/std": 0.4937761127948761, "step": 268 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0267857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3355.0, "completions/mean_length": 662.0614013671875, "completions/mean_terminated_length": 567.54931640625, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 2.513119533527697, "grad_norm": 0.1250961571931839, "learning_rate": 1e-06, "loss": -0.0073, "num_tokens": 157708682.0, "reward": 0.6238839626312256, "reward_std": 0.16825860738754272, "rewards/verify_math_reward/mean": 0.6238839030265808, "rewards/verify_math_reward/std": 0.48468026518821716, "step": 269 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 4096.0, "completions/max_terminated_length": 2995.0, "completions/mean_length": 689.9475708007812, "completions/mean_terminated_length": 580.0748901367188, "completions/min_length": 117.0, "completions/min_terminated_length": 117.0, "epoch": 2.522448979591837, "grad_norm": 0.125711128115654, "learning_rate": 1e-06, "loss": -0.0008, "num_tokens": 158305963.0, "reward": 0.5915178656578064, "reward_std": 0.17355993390083313, "rewards/verify_math_reward/mean": 0.5915178656578064, "rewards/verify_math_reward/std": 0.49182769656181335, "step": 270 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.036830357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3048.0, "completions/mean_length": 759.0469360351562, "completions/mean_terminated_length": 631.4461059570312, "completions/min_length": 181.0, "completions/min_terminated_length": 181.0, "epoch": 2.5317784256559768, "grad_norm": 0.13090497255325317, "learning_rate": 1e-06, "loss": 0.0045, "num_tokens": 158961901.0, "reward": 0.5691964626312256, "reward_std": 0.2081337720155716, "rewards/verify_math_reward/mean": 0.5691964030265808, "rewards/verify_math_reward/std": 0.4954652488231659, "step": 271 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 4096.0, "completions/max_terminated_length": 2411.0, "completions/mean_length": 663.7991333007812, "completions/mean_terminated_length": 553.0829467773438, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 2.5411078717201168, "grad_norm": 0.12194574624300003, "learning_rate": 1e-06, "loss": -0.0086, "num_tokens": 159537065.0, "reward": 0.6127232313156128, "reward_std": 0.14553029835224152, "rewards/verify_math_reward/mean": 0.6127232313156128, "rewards/verify_math_reward/std": 0.4873998463153839, "step": 272 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.030133928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3856.0, "completions/mean_length": 683.8850708007812, "completions/mean_terminated_length": 577.8699951171875, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 2.5504373177842563, "grad_norm": 0.13890667259693146, "learning_rate": 1e-06, "loss": 0.0009, "num_tokens": 160121370.0, "reward": 0.5959821939468384, "reward_std": 0.22105351090431213, "rewards/verify_math_reward/mean": 0.5959821343421936, "rewards/verify_math_reward/std": 0.490975022315979, "step": 273 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0558035714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3950.0, "completions/mean_length": 850.3705444335938, "completions/mean_terminated_length": 658.5484619140625, "completions/min_length": 110.0, "completions/min_terminated_length": 110.0, "epoch": 2.5597667638483967, "grad_norm": 0.13573136925697327, "learning_rate": 1e-06, "loss": -0.0225, "num_tokens": 160763310.0, "reward": 0.5457589626312256, "reward_std": 0.20929309725761414, "rewards/verify_math_reward/mean": 0.5457589030265808, "rewards/verify_math_reward/std": 0.4981798231601715, "step": 274 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.036830357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2766.0, "completions/mean_length": 709.935302734375, "completions/mean_terminated_length": 580.45654296875, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 2.5690962099125363, "grad_norm": 0.14654983580112457, "learning_rate": 1e-06, "loss": 0.0005, "num_tokens": 161351324.0, "reward": 0.5569196939468384, "reward_std": 0.21263712644577026, "rewards/verify_math_reward/mean": 0.5569196343421936, "rewards/verify_math_reward/std": 0.49702703952789307, "step": 275 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0323660714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 4027.0, "completions/mean_length": 710.7522583007812, "completions/mean_terminated_length": 597.5201416015625, "completions/min_length": 157.0, "completions/min_terminated_length": 157.0, "epoch": 2.5784256559766763, "grad_norm": 0.1251104772090912, "learning_rate": 1e-06, "loss": -0.0156, "num_tokens": 161966646.0, "reward": 0.629464328289032, "reward_std": 0.16311588883399963, "rewards/verify_math_reward/mean": 0.6294642686843872, "rewards/verify_math_reward/std": 0.4832179844379425, "step": 276 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.033482142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3229.0, "completions/mean_length": 686.9375610351562, "completions/mean_terminated_length": 568.8406372070312, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 2.5877551020408163, "grad_norm": 0.12175661325454712, "learning_rate": 1e-06, "loss": 0.0034, "num_tokens": 162546198.0, "reward": 0.6127232313156128, "reward_std": 0.14545612037181854, "rewards/verify_math_reward/mean": 0.6127232313156128, "rewards/verify_math_reward/std": 0.4873998463153839, "step": 277 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0345982142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3774.0, "completions/mean_length": 721.3504638671875, "completions/mean_terminated_length": 600.4092407226562, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 2.5970845481049563, "grad_norm": 0.12625743448734283, "learning_rate": 1e-06, "loss": -0.0029, "num_tokens": 163158144.0, "reward": 0.5535714626312256, "reward_std": 0.19624477624893188, "rewards/verify_math_reward/mean": 0.5535714030265808, "rewards/verify_math_reward/std": 0.4973994791507721, "step": 278 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.041294642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2079.0, "completions/mean_length": 699.4453735351562, "completions/mean_terminated_length": 553.1443481445312, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 2.6064139941690962, "grad_norm": 0.14673063158988953, "learning_rate": 1e-06, "loss": -0.0063, "num_tokens": 163720319.0, "reward": 0.6439732313156128, "reward_std": 0.1889663189649582, "rewards/verify_math_reward/mean": 0.6439732313156128, "rewards/verify_math_reward/std": 0.47909072041511536, "step": 279 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.033482142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3980.0, "completions/mean_length": 682.8538208007812, "completions/mean_terminated_length": 564.615478515625, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 2.6157434402332362, "grad_norm": 0.1423073261976242, "learning_rate": 1e-06, "loss": 0.0072, "num_tokens": 164293660.0, "reward": 0.59375, "reward_std": 0.18821631371974945, "rewards/verify_math_reward/mean": 0.59375, "rewards/verify_math_reward/std": 0.4914066195487976, "step": 280 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.029017857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3634.0, "completions/mean_length": 622.4832763671875, "completions/mean_terminated_length": 518.677001953125, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 2.6250728862973762, "grad_norm": 0.13733068108558655, "learning_rate": 1e-06, "loss": -0.002, "num_tokens": 164825829.0, "reward": 0.684151828289032, "reward_std": 0.17292234301567078, "rewards/verify_math_reward/mean": 0.6841517686843872, "rewards/verify_math_reward/std": 0.4651124179363251, "step": 281 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3279.0, "completions/mean_length": 651.2421875, "completions/mean_terminated_length": 572.5947265625, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 2.6344023323615158, "grad_norm": 0.134793221950531, "learning_rate": 1e-06, "loss": -0.0025, "num_tokens": 165412086.0, "reward": 0.6986607313156128, "reward_std": 0.17134006321430206, "rewards/verify_math_reward/mean": 0.6986607313156128, "rewards/verify_math_reward/std": 0.4590960144996643, "step": 282 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.041294642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3373.0, "completions/mean_length": 703.3582763671875, "completions/mean_terminated_length": 557.225830078125, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 2.643731778425656, "grad_norm": 0.14702339470386505, "learning_rate": 1e-06, "loss": -0.0083, "num_tokens": 165971255.0, "reward": 0.6473214626312256, "reward_std": 0.19043126702308655, "rewards/verify_math_reward/mean": 0.6473214030265808, "rewards/verify_math_reward/std": 0.47807058691978455, "step": 283 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3780.0, "completions/mean_length": 627.9967041015625, "completions/mean_terminated_length": 548.8184814453125, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 2.6530612244897958, "grad_norm": 0.14755719900131226, "learning_rate": 1e-06, "loss": 0.0125, "num_tokens": 166547276.0, "reward": 0.6618303656578064, "reward_std": 0.2054309993982315, "rewards/verify_math_reward/mean": 0.6618303656578064, "rewards/verify_math_reward/std": 0.4733508229255676, "step": 284 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.046875, "completions/max_length": 4096.0, "completions/max_terminated_length": 3496.0, "completions/mean_length": 752.3772583007812, "completions/mean_terminated_length": 587.936767578125, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 2.6623906705539357, "grad_norm": 0.1372850388288498, "learning_rate": 1e-06, "loss": -0.0177, "num_tokens": 167138942.0, "reward": 0.6037946939468384, "reward_std": 0.1795377880334854, "rewards/verify_math_reward/mean": 0.6037946343421936, "rewards/verify_math_reward/std": 0.48938122391700745, "step": 285 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.049107142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3962.0, "completions/mean_length": 834.3270263671875, "completions/mean_terminated_length": 665.8837890625, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 2.6717201166180757, "grad_norm": 0.11591921746730804, "learning_rate": 1e-06, "loss": -0.0029, "num_tokens": 167795235.0, "reward": 0.4966517984867096, "reward_std": 0.18193678557872772, "rewards/verify_math_reward/mean": 0.4966517984867096, "rewards/verify_math_reward/std": 0.5002680420875549, "step": 286 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0379464285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3330.0, "completions/mean_length": 740.8438110351562, "completions/mean_terminated_length": 608.5057983398438, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 2.6810495626822157, "grad_norm": 0.1284467726945877, "learning_rate": 1e-06, "loss": -0.0119, "num_tokens": 168414383.0, "reward": 0.5491071939468384, "reward_std": 0.19166797399520874, "rewards/verify_math_reward/mean": 0.5491071343421936, "rewards/verify_math_reward/std": 0.49786055088043213, "step": 287 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0345982142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3599.0, "completions/mean_length": 717.2232666015625, "completions/mean_terminated_length": 596.1340942382812, "completions/min_length": 93.0, "completions/min_terminated_length": 93.0, "epoch": 2.6903790087463557, "grad_norm": 0.12711331248283386, "learning_rate": 1e-06, "loss": -0.0121, "num_tokens": 169014887.0, "reward": 0.5803571939468384, "reward_std": 0.1764167845249176, "rewards/verify_math_reward/mean": 0.5803571343421936, "rewards/verify_math_reward/std": 0.4937761127948761, "step": 288 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0390625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2618.0, "completions/mean_length": 737.8783569335938, "completions/mean_terminated_length": 601.3693237304688, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 2.6997084548104957, "grad_norm": 0.12846635282039642, "learning_rate": 1e-06, "loss": 0.0049, "num_tokens": 169616954.0, "reward": 0.5446428656578064, "reward_std": 0.19043126702308655, "rewards/verify_math_reward/mean": 0.5446428656578064, "rewards/verify_math_reward/std": 0.49828118085861206, "step": 289 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3065.0, "completions/mean_length": 672.2310791015625, "completions/mean_terminated_length": 561.786865234375, "completions/min_length": 105.0, "completions/min_terminated_length": 105.0, "epoch": 2.7090379008746357, "grad_norm": 0.143126979470253, "learning_rate": 1e-06, "loss": 0.0022, "num_tokens": 170192033.0, "reward": 0.6328125, "reward_std": 0.20628906786441803, "rewards/verify_math_reward/mean": 0.6328125, "rewards/verify_math_reward/std": 0.48230743408203125, "step": 290 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0279017857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2952.0, "completions/mean_length": 676.6707763671875, "completions/mean_terminated_length": 578.5269775390625, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 2.7183673469387752, "grad_norm": 0.13283273577690125, "learning_rate": 1e-06, "loss": -0.0192, "num_tokens": 170786226.0, "reward": 0.6350446939468384, "reward_std": 0.16361257433891296, "rewards/verify_math_reward/mean": 0.6350446343421936, "rewards/verify_math_reward/std": 0.481686532497406, "step": 291 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0390625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3502.0, "completions/mean_length": 749.4386596679688, "completions/mean_terminated_length": 613.3995361328125, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 2.7276967930029157, "grad_norm": 0.13216552138328552, "learning_rate": 1e-06, "loss": -0.0158, "num_tokens": 171416731.0, "reward": 0.5546875, "reward_std": 0.19084130227565765, "rewards/verify_math_reward/mean": 0.5546875, "rewards/verify_math_reward/std": 0.4972778558731079, "step": 292 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0513392857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3361.0, "completions/mean_length": 826.5346069335938, "completions/mean_terminated_length": 649.5988159179688, "completions/min_length": 174.0, "completions/min_terminated_length": 174.0, "epoch": 2.7370262390670552, "grad_norm": 0.13431333005428314, "learning_rate": 1e-06, "loss": -0.0139, "num_tokens": 172059850.0, "reward": 0.578125, "reward_std": 0.21409814059734344, "rewards/verify_math_reward/mean": 0.578125, "rewards/verify_math_reward/std": 0.4941346049308777, "step": 293 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0390625, "completions/max_length": 4096.0, "completions/max_terminated_length": 4036.0, "completions/mean_length": 732.0123291015625, "completions/mean_terminated_length": 595.2647705078125, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 2.746355685131195, "grad_norm": 0.13596613705158234, "learning_rate": 1e-06, "loss": 0.0014, "num_tokens": 172673205.0, "reward": 0.5647321939468384, "reward_std": 0.1773567646741867, "rewards/verify_math_reward/mean": 0.5647321343421936, "rewards/verify_math_reward/std": 0.49606895446777344, "step": 294 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0345982142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3955.0, "completions/mean_length": 710.5457763671875, "completions/mean_terminated_length": 589.2173461914062, "completions/min_length": 120.0, "completions/min_terminated_length": 120.0, "epoch": 2.755685131195335, "grad_norm": 0.13402144610881805, "learning_rate": 1e-06, "loss": -0.0041, "num_tokens": 173268798.0, "reward": 0.6361607313156128, "reward_std": 0.1869696080684662, "rewards/verify_math_reward/mean": 0.6361607313156128, "rewards/verify_math_reward/std": 0.4813718795776367, "step": 295 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.033482142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3040.0, "completions/mean_length": 715.3951416015625, "completions/mean_terminated_length": 598.2840576171875, "completions/min_length": 153.0, "completions/min_terminated_length": 153.0, "epoch": 2.765014577259475, "grad_norm": 0.13711683452129364, "learning_rate": 1e-06, "loss": 0.0119, "num_tokens": 173875576.0, "reward": 0.6205357313156128, "reward_std": 0.18092577159404755, "rewards/verify_math_reward/mean": 0.6205357313156128, "rewards/verify_math_reward/std": 0.4855247139930725, "step": 296 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 4085.0, "completions/mean_length": 650.9855346679688, "completions/mean_terminated_length": 588.3488159179688, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 2.774344023323615, "grad_norm": 0.1356029063463211, "learning_rate": 1e-06, "loss": 0.0124, "num_tokens": 174481611.0, "reward": 0.5524553656578064, "reward_std": 0.19230668246746063, "rewards/verify_math_reward/mean": 0.5524553656578064, "rewards/verify_math_reward/std": 0.49751853942871094, "step": 297 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.033482142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3929.0, "completions/mean_length": 730.5736694335938, "completions/mean_terminated_length": 613.9884643554688, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 2.783673469387755, "grad_norm": 0.12608329951763153, "learning_rate": 1e-06, "loss": 0.0002, "num_tokens": 175105597.0, "reward": 0.5636160969734192, "reward_std": 0.1888921558856964, "rewards/verify_math_reward/mean": 0.5636160969734192, "rewards/verify_math_reward/std": 0.49621346592903137, "step": 298 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0267857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3211.0, "completions/mean_length": 690.0904541015625, "completions/mean_terminated_length": 596.3497314453125, "completions/min_length": 161.0, "completions/min_terminated_length": 161.0, "epoch": 2.793002915451895, "grad_norm": 0.1373504251241684, "learning_rate": 1e-06, "loss": -0.0018, "num_tokens": 175713446.0, "reward": 0.5959821939468384, "reward_std": 0.19411678612232208, "rewards/verify_math_reward/mean": 0.5959821343421936, "rewards/verify_math_reward/std": 0.490975022315979, "step": 299 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.044642857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3949.0, "completions/mean_length": 787.927490234375, "completions/mean_terminated_length": 633.3446044921875, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 2.8023323615160347, "grad_norm": 0.13010147213935852, "learning_rate": 1e-06, "loss": -0.0057, "num_tokens": 176339549.0, "reward": 0.6082589626312256, "reward_std": 0.197828471660614, "rewards/verify_math_reward/mean": 0.6082589030265808, "rewards/verify_math_reward/std": 0.4884119927883148, "step": 300 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0357142857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3866.0, "completions/mean_length": 710.0156860351562, "completions/mean_terminated_length": 584.6088256835938, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 2.811661807580175, "grad_norm": 0.13526295125484467, "learning_rate": 1e-06, "loss": -0.0009, "num_tokens": 176925299.0, "reward": 0.5948660969734192, "reward_std": 0.17400822043418884, "rewards/verify_math_reward/mean": 0.5948660969734192, "rewards/verify_math_reward/std": 0.49119213223457336, "step": 301 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0457589285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2630.0, "completions/mean_length": 766.9464721679688, "completions/mean_terminated_length": 607.3076171875, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 2.8209912536443147, "grad_norm": 0.13222624361515045, "learning_rate": 1e-06, "loss": 0.0022, "num_tokens": 177528235.0, "reward": 0.5714285969734192, "reward_std": 0.20474882423877716, "rewards/verify_math_reward/mean": 0.5714285969734192, "rewards/verify_math_reward/std": 0.49514803290367126, "step": 302 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0457589285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3639.0, "completions/mean_length": 815.3839721679688, "completions/mean_terminated_length": 658.0678100585938, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 2.8303206997084547, "grad_norm": 0.13167433440685272, "learning_rate": 1e-06, "loss": 0.004, "num_tokens": 178180747.0, "reward": 0.574776828289032, "reward_std": 0.18655845522880554, "rewards/verify_math_reward/mean": 0.5747767686843872, "rewards/verify_math_reward/std": 0.49465295672416687, "step": 303 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.052455357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2785.0, "completions/mean_length": 772.6763916015625, "completions/mean_terminated_length": 588.6996459960938, "completions/min_length": 109.0, "completions/min_terminated_length": 109.0, "epoch": 2.8396501457725947, "grad_norm": 0.13399431109428406, "learning_rate": 1e-06, "loss": -0.0236, "num_tokens": 178771577.0, "reward": 0.6395089626312256, "reward_std": 0.20369574427604675, "rewards/verify_math_reward/mean": 0.6395089030265808, "rewards/verify_math_reward/std": 0.4804111123085022, "step": 304 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0513392857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4059.0, "completions/mean_length": 773.997802734375, "completions/mean_terminated_length": 594.2188110351562, "completions/min_length": 156.0, "completions/min_terminated_length": 156.0, "epoch": 2.8489795918367347, "grad_norm": 0.13628385961055756, "learning_rate": 1e-06, "loss": -0.0091, "num_tokens": 179375927.0, "reward": 0.5892857313156128, "reward_std": 0.18911674618721008, "rewards/verify_math_reward/mean": 0.5892857313156128, "rewards/verify_math_reward/std": 0.49223825335502625, "step": 305 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.052455357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 4093.0, "completions/mean_length": 757.5892944335938, "completions/mean_terminated_length": 572.7774047851562, "completions/min_length": 166.0, "completions/min_terminated_length": 166.0, "epoch": 2.8583090379008746, "grad_norm": 0.1519719660282135, "learning_rate": 1e-06, "loss": -0.0135, "num_tokens": 179945351.0, "reward": 0.6104910969734192, "reward_std": 0.22432535886764526, "rewards/verify_math_reward/mean": 0.6104910969734192, "rewards/verify_math_reward/std": 0.48791125416755676, "step": 306 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0390625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3822.0, "completions/mean_length": 735.1261596679688, "completions/mean_terminated_length": 598.5051879882812, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 2.8676384839650146, "grad_norm": 0.12454867362976074, "learning_rate": 1e-06, "loss": 0.0023, "num_tokens": 180544224.0, "reward": 0.6071428656578064, "reward_std": 0.16111847758293152, "rewards/verify_math_reward/mean": 0.6071428656578064, "rewards/verify_math_reward/std": 0.48865824937820435, "step": 307 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.033482142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2904.0, "completions/mean_length": 703.9564819335938, "completions/mean_terminated_length": 586.4491577148438, "completions/min_length": 161.0, "completions/min_terminated_length": 161.0, "epoch": 2.8769679300291546, "grad_norm": 0.1262216717004776, "learning_rate": 1e-06, "loss": -0.0226, "num_tokens": 181138897.0, "reward": 0.6540178656578064, "reward_std": 0.16389724612236023, "rewards/verify_math_reward/mean": 0.6540178656578064, "rewards/verify_math_reward/std": 0.4759531021118164, "step": 308 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0279017857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2071.0, "completions/mean_length": 687.1428833007812, "completions/mean_terminated_length": 589.2996215820312, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 2.8862973760932946, "grad_norm": 0.14330852031707764, "learning_rate": 1e-06, "loss": -0.003, "num_tokens": 181739313.0, "reward": 0.5714285969734192, "reward_std": 0.2055736929178238, "rewards/verify_math_reward/mean": 0.5714285969734192, "rewards/verify_math_reward/std": 0.49514803290367126, "step": 309 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.033482142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2574.0, "completions/mean_length": 757.286865234375, "completions/mean_terminated_length": 641.6270141601562, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 2.8956268221574346, "grad_norm": 0.12904781103134155, "learning_rate": 1e-06, "loss": -0.0127, "num_tokens": 182380298.0, "reward": 0.5558035969734192, "reward_std": 0.20741517841815948, "rewards/verify_math_reward/mean": 0.5558035969734192, "rewards/verify_math_reward/std": 0.49715372920036316, "step": 310 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0345982142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3326.0, "completions/mean_length": 696.5201416015625, "completions/mean_terminated_length": 574.6890258789062, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 2.904956268221574, "grad_norm": 0.1322242170572281, "learning_rate": 1e-06, "loss": -0.0087, "num_tokens": 182976588.0, "reward": 0.660714328289032, "reward_std": 0.16108639538288116, "rewards/verify_math_reward/mean": 0.6607142686843872, "rewards/verify_math_reward/std": 0.4737313687801361, "step": 311 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0345982142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2593.0, "completions/mean_length": 703.1808471679688, "completions/mean_terminated_length": 581.5884399414062, "completions/min_length": 101.0, "completions/min_terminated_length": 101.0, "epoch": 2.914285714285714, "grad_norm": 0.1302194595336914, "learning_rate": 1e-06, "loss": -0.0103, "num_tokens": 183570006.0, "reward": 0.6328125, "reward_std": 0.16622911393642426, "rewards/verify_math_reward/mean": 0.6328125, "rewards/verify_math_reward/std": 0.48230743408203125, "step": 312 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0613839285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3008.0, "completions/mean_length": 846.943115234375, "completions/mean_terminated_length": 634.4601440429688, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 2.923615160349854, "grad_norm": 0.13785329461097717, "learning_rate": 1e-06, "loss": -0.0443, "num_tokens": 184197243.0, "reward": 0.5636160969734192, "reward_std": 0.1965164840221405, "rewards/verify_math_reward/mean": 0.5636160969734192, "rewards/verify_math_reward/std": 0.49621346592903137, "step": 313 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0591517857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3083.0, "completions/mean_length": 844.5457763671875, "completions/mean_terminated_length": 640.1245727539062, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 2.932944606413994, "grad_norm": 0.13741254806518555, "learning_rate": 1e-06, "loss": 0.0042, "num_tokens": 184826340.0, "reward": 0.5345982313156128, "reward_std": 0.19531960785388947, "rewards/verify_math_reward/mean": 0.5345982313156128, "rewards/verify_math_reward/std": 0.4990801215171814, "step": 314 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0424107142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3755.0, "completions/mean_length": 752.6060791015625, "completions/mean_terminated_length": 604.5303344726562, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 2.942274052478134, "grad_norm": 0.14405564963817596, "learning_rate": 1e-06, "loss": 0.0138, "num_tokens": 185439739.0, "reward": 0.5725446939468384, "reward_std": 0.19828493893146515, "rewards/verify_math_reward/mean": 0.5725446343421936, "rewards/verify_math_reward/std": 0.49498558044433594, "step": 315 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.025669642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3917.0, "completions/mean_length": 671.7924194335938, "completions/mean_terminated_length": 581.5784912109375, "completions/min_length": 171.0, "completions/min_terminated_length": 171.0, "epoch": 2.951603498542274, "grad_norm": 0.13511620461940765, "learning_rate": 1e-06, "loss": 0.0039, "num_tokens": 186042529.0, "reward": 0.6205357313156128, "reward_std": 0.18201276659965515, "rewards/verify_math_reward/mean": 0.6205357313156128, "rewards/verify_math_reward/std": 0.4855247139930725, "step": 316 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0345982142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2425.0, "completions/mean_length": 769.5111694335938, "completions/mean_terminated_length": 650.2959594726562, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 2.960932944606414, "grad_norm": 0.1378905326128006, "learning_rate": 1e-06, "loss": -0.0012, "num_tokens": 186692347.0, "reward": 0.574776828289032, "reward_std": 0.21744313836097717, "rewards/verify_math_reward/mean": 0.5747767686843872, "rewards/verify_math_reward/std": 0.49465295672416687, "step": 317 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3056.0, "completions/mean_length": 670.747802734375, "completions/mean_terminated_length": 560.2557373046875, "completions/min_length": 161.0, "completions/min_terminated_length": 161.0, "epoch": 2.970262390670554, "grad_norm": 0.13824069499969482, "learning_rate": 1e-06, "loss": -0.0149, "num_tokens": 187267801.0, "reward": 0.637276828289032, "reward_std": 0.1700248271226883, "rewards/verify_math_reward/mean": 0.6372767686843872, "rewards/verify_math_reward/std": 0.481054425239563, "step": 318 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.041294642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3957.0, "completions/mean_length": 743.5469360351562, "completions/mean_terminated_length": 599.1455078125, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 2.979591836734694, "grad_norm": 0.147800013422966, "learning_rate": 1e-06, "loss": -0.0195, "num_tokens": 187872251.0, "reward": 0.598214328289032, "reward_std": 0.2073000818490982, "rewards/verify_math_reward/mean": 0.5982142686843872, "rewards/verify_math_reward/std": 0.49053287506103516, "step": 319 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.033482142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3505.0, "completions/mean_length": 719.9676513671875, "completions/mean_terminated_length": 603.0150146484375, "completions/min_length": 157.0, "completions/min_terminated_length": 157.0, "epoch": 2.9889212827988336, "grad_norm": 0.1389048844575882, "learning_rate": 1e-06, "loss": -0.0203, "num_tokens": 188491166.0, "reward": 0.6484375, "reward_std": 0.1920802742242813, "rewards/verify_math_reward/mean": 0.6484375, "rewards/verify_math_reward/std": 0.4777248501777649, "step": 320 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.045454545454545414, "completions/max_length": 4096.0, "completions/max_terminated_length": 2906.0, "completions/mean_length": 778.6278686523438, "completions/mean_terminated_length": 620.6577758789062, "completions/min_length": 112.0, "completions/min_terminated_length": 112.0, "epoch": 2.9982507288629736, "grad_norm": 0.1287021040916443, "learning_rate": 1e-06, "loss": -0.0073, "num_tokens": 189114243.0, "reward": 0.6328125, "reward_std": 0.17979742586612701, "rewards/verify_math_reward/mean": 0.6328125, "rewards/verify_math_reward/std": 0.48230743408203125, "step": 321 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0669642857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3831.0, "completions/mean_length": 921.7031860351562, "completions/mean_terminated_length": 693.8827514648438, "completions/min_length": 122.0, "completions/min_terminated_length": 122.0, "epoch": 3.00932944606414, "grad_norm": 0.13434647023677826, "learning_rate": 1e-06, "loss": -0.0095, "num_tokens": 189784617.0, "reward": 0.5100446939468384, "reward_std": 0.20407551527023315, "rewards/verify_math_reward/mean": 0.5100446343421936, "rewards/verify_math_reward/std": 0.5001782774925232, "step": 322 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0435267857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2655.0, "completions/mean_length": 795.4654541015625, "completions/mean_terminated_length": 645.2660522460938, "completions/min_length": 106.0, "completions/min_terminated_length": 106.0, "epoch": 3.01865889212828, "grad_norm": 0.12008710950613022, "learning_rate": 1e-06, "loss": -0.0149, "num_tokens": 190420106.0, "reward": 0.6160714626312256, "reward_std": 0.17209820449352264, "rewards/verify_math_reward/mean": 0.6160714030265808, "rewards/verify_math_reward/std": 0.486612468957901, "step": 323 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0457589285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 4028.0, "completions/mean_length": 761.6808471679688, "completions/mean_terminated_length": 601.7894897460938, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 3.02798833819242, "grad_norm": 0.13155941665172577, "learning_rate": 1e-06, "loss": -0.0158, "num_tokens": 191024300.0, "reward": 0.5691964626312256, "reward_std": 0.1790817528963089, "rewards/verify_math_reward/mean": 0.5691964030265808, "rewards/verify_math_reward/std": 0.4954652786254883, "step": 324 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0379464285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3180.0, "completions/mean_length": 740.3035888671875, "completions/mean_terminated_length": 607.9442749023438, "completions/min_length": 137.0, "completions/min_terminated_length": 137.0, "epoch": 3.03731778425656, "grad_norm": 0.13252215087413788, "learning_rate": 1e-06, "loss": 0.0011, "num_tokens": 191638940.0, "reward": 0.629464328289032, "reward_std": 0.1604149043560028, "rewards/verify_math_reward/mean": 0.6294642686843872, "rewards/verify_math_reward/std": 0.4832179844379425, "step": 325 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.030133928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3468.0, "completions/mean_length": 683.068115234375, "completions/mean_terminated_length": 577.0276489257812, "completions/min_length": 174.0, "completions/min_terminated_length": 174.0, "epoch": 3.0466472303206995, "grad_norm": 0.13193079829216003, "learning_rate": 1e-06, "loss": -0.0047, "num_tokens": 192220641.0, "reward": 0.6171875, "reward_std": 0.16788770258426666, "rewards/verify_math_reward/mean": 0.6171875, "rewards/verify_math_reward/std": 0.4863446056842804, "step": 326 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0390625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3660.0, "completions/mean_length": 777.6105346679688, "completions/mean_terminated_length": 642.7166137695312, "completions/min_length": 180.0, "completions/min_terminated_length": 180.0, "epoch": 3.0559766763848395, "grad_norm": 0.12973052263259888, "learning_rate": 1e-06, "loss": -0.0079, "num_tokens": 192866172.0, "reward": 0.609375, "reward_std": 0.16398420929908752, "rewards/verify_math_reward/mean": 0.609375, "rewards/verify_math_reward/std": 0.48816296458244324, "step": 327 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.041294642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2679.0, "completions/mean_length": 752.8348388671875, "completions/mean_terminated_length": 608.8335571289062, "completions/min_length": 167.0, "completions/min_terminated_length": 167.0, "epoch": 3.0653061224489795, "grad_norm": 0.1253746896982193, "learning_rate": 1e-06, "loss": -0.0106, "num_tokens": 193481912.0, "reward": 0.6004464626312256, "reward_std": 0.16642162203788757, "rewards/verify_math_reward/mean": 0.6004464030265808, "rewards/verify_math_reward/std": 0.49008017778396606, "step": 328 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0457589285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3062.0, "completions/mean_length": 734.8326416015625, "completions/mean_terminated_length": 573.65380859375, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 3.0746355685131195, "grad_norm": 0.12909363210201263, "learning_rate": 1e-06, "loss": -0.0075, "num_tokens": 194063194.0, "reward": 0.6428571939468384, "reward_std": 0.15097863972187042, "rewards/verify_math_reward/mean": 0.6428571343421936, "rewards/verify_math_reward/std": 0.4794250428676605, "step": 329 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.052455357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3504.0, "completions/mean_length": 821.8214721679688, "completions/mean_terminated_length": 640.5653686523438, "completions/min_length": 133.0, "completions/min_terminated_length": 133.0, "epoch": 3.0839650145772595, "grad_norm": 0.13262464106082916, "learning_rate": 1e-06, "loss": -0.0178, "num_tokens": 194702770.0, "reward": 0.5401785969734192, "reward_std": 0.20598556101322174, "rewards/verify_math_reward/mean": 0.5401785969734192, "rewards/verify_math_reward/std": 0.49866142868995667, "step": 330 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0357142857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3871.0, "completions/mean_length": 725.6373291015625, "completions/mean_terminated_length": 600.8090209960938, "completions/min_length": 117.0, "completions/min_terminated_length": 117.0, "epoch": 3.0932944606413995, "grad_norm": 0.1538906842470169, "learning_rate": 1e-06, "loss": -0.0009, "num_tokens": 195308645.0, "reward": 0.6462053656578064, "reward_std": 0.20779426395893097, "rewards/verify_math_reward/mean": 0.6462053656578064, "rewards/verify_math_reward/std": 0.478413462638855, "step": 331 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0379464285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2818.0, "completions/mean_length": 733.4788208007812, "completions/mean_terminated_length": 600.850341796875, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 3.1026239067055394, "grad_norm": 0.13098306953907013, "learning_rate": 1e-06, "loss": -0.0074, "num_tokens": 195913794.0, "reward": 0.6361607313156128, "reward_std": 0.17325936257839203, "rewards/verify_math_reward/mean": 0.6361607313156128, "rewards/verify_math_reward/std": 0.4813718795776367, "step": 332 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0379464285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2617.0, "completions/mean_length": 677.46875, "completions/mean_terminated_length": 542.6310424804688, "completions/min_length": 119.0, "completions/min_terminated_length": 119.0, "epoch": 3.1119533527696794, "grad_norm": 0.12563659250736237, "learning_rate": 1e-06, "loss": -0.0169, "num_tokens": 196476870.0, "reward": 0.660714328289032, "reward_std": 0.148612841963768, "rewards/verify_math_reward/mean": 0.6607142686843872, "rewards/verify_math_reward/std": 0.4737313389778137, "step": 333 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0479910714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3885.0, "completions/mean_length": 813.8158569335938, "completions/mean_terminated_length": 648.35986328125, "completions/min_length": 169.0, "completions/min_terminated_length": 169.0, "epoch": 3.1212827988338194, "grad_norm": 0.13097511231899261, "learning_rate": 1e-06, "loss": -0.0096, "num_tokens": 197130753.0, "reward": 0.5814732313156128, "reward_std": 0.17179329693317413, "rewards/verify_math_reward/mean": 0.5814732313156128, "rewards/verify_math_reward/std": 0.4935929775238037, "step": 334 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.041294642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3680.0, "completions/mean_length": 707.6663208007812, "completions/mean_terminated_length": 561.7194213867188, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 3.130612244897959, "grad_norm": 0.14367610216140747, "learning_rate": 1e-06, "loss": -0.0061, "num_tokens": 197694814.0, "reward": 0.6930803656578064, "reward_std": 0.18543826043605804, "rewards/verify_math_reward/mean": 0.6930803656578064, "rewards/verify_math_reward/std": 0.46147334575653076, "step": 335 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0424107142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3441.0, "completions/mean_length": 784.130615234375, "completions/mean_terminated_length": 637.4510498046875, "completions/min_length": 194.0, "completions/min_terminated_length": 194.0, "epoch": 3.139941690962099, "grad_norm": 0.12637105584144592, "learning_rate": 1e-06, "loss": -0.0138, "num_tokens": 198332083.0, "reward": 0.5680803656578064, "reward_std": 0.1907668560743332, "rewards/verify_math_reward/mean": 0.5680803656578064, "rewards/verify_math_reward/std": 0.4956200420856476, "step": 336 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.041294642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2695.0, "completions/mean_length": 718.239990234375, "completions/mean_terminated_length": 572.74853515625, "completions/min_length": 170.0, "completions/min_terminated_length": 170.0, "epoch": 3.149271137026239, "grad_norm": 0.13643290102481842, "learning_rate": 1e-06, "loss": -0.0233, "num_tokens": 198911378.0, "reward": 0.6696428656578064, "reward_std": 0.17818161845207214, "rewards/verify_math_reward/mean": 0.6696428656578064, "rewards/verify_math_reward/std": 0.47060438990592957, "step": 337 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0747767857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3612.0, "completions/mean_length": 953.9129638671875, "completions/mean_terminated_length": 699.9686279296875, "completions/min_length": 94.0, "completions/min_terminated_length": 94.0, "epoch": 3.158600583090379, "grad_norm": 0.14042812585830688, "learning_rate": 1e-06, "loss": -0.0225, "num_tokens": 199606236.0, "reward": 0.5078125, "reward_std": 0.20572523772716522, "rewards/verify_math_reward/mean": 0.5078125, "rewards/verify_math_reward/std": 0.5002182126045227, "step": 338 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.046875, "completions/max_length": 4096.0, "completions/max_terminated_length": 3683.0, "completions/mean_length": 801.0625610351562, "completions/mean_terminated_length": 639.016357421875, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 3.167930029154519, "grad_norm": 0.12723299860954285, "learning_rate": 1e-06, "loss": 0.0063, "num_tokens": 200244484.0, "reward": 0.625, "reward_std": 0.1662386953830719, "rewards/verify_math_reward/mean": 0.625, "rewards/verify_math_reward/std": 0.48439329862594604, "step": 339 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.049107142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3886.0, "completions/mean_length": 784.4241333007812, "completions/mean_terminated_length": 613.4037475585938, "completions/min_length": 156.0, "completions/min_terminated_length": 156.0, "epoch": 3.177259475218659, "grad_norm": 0.14140678942203522, "learning_rate": 1e-06, "loss": -0.01, "num_tokens": 200852000.0, "reward": 0.6272321939468384, "reward_std": 0.18426865339279175, "rewards/verify_math_reward/mean": 0.6272321343421936, "rewards/verify_math_reward/std": 0.4838111400604248, "step": 340 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3868.0, "completions/mean_length": 730.6752319335938, "completions/mean_terminated_length": 622.1163330078125, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 3.186588921282799, "grad_norm": 0.1404697149991989, "learning_rate": 1e-06, "loss": 0.0014, "num_tokens": 201474669.0, "reward": 0.6205357313156128, "reward_std": 0.2093295454978943, "rewards/verify_math_reward/mean": 0.6205357313156128, "rewards/verify_math_reward/std": 0.4855247139930725, "step": 341 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0535714285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3940.0, "completions/mean_length": 855.9699096679688, "completions/mean_terminated_length": 672.5719604492188, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 3.195918367346939, "grad_norm": 0.1275302618741989, "learning_rate": 1e-06, "loss": -0.0152, "num_tokens": 202138482.0, "reward": 0.5725446939468384, "reward_std": 0.20038749277591705, "rewards/verify_math_reward/mean": 0.5725446343421936, "rewards/verify_math_reward/std": 0.49498558044433594, "step": 342 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0613839285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2908.0, "completions/mean_length": 827.2846069335938, "completions/mean_terminated_length": 613.5160522460938, "completions/min_length": 165.0, "completions/min_terminated_length": 165.0, "epoch": 3.205247813411079, "grad_norm": 0.14738671481609344, "learning_rate": 1e-06, "loss": -0.0267, "num_tokens": 202740553.0, "reward": 0.6417410969734192, "reward_std": 0.21538016200065613, "rewards/verify_math_reward/mean": 0.6417410969734192, "rewards/verify_math_reward/std": 0.47975653409957886, "step": 343 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.046875, "completions/max_length": 4096.0, "completions/max_terminated_length": 2358.0, "completions/mean_length": 794.0335083007812, "completions/mean_terminated_length": 631.6416625976562, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 3.2145772594752184, "grad_norm": 0.13328631222248077, "learning_rate": 1e-06, "loss": -0.0075, "num_tokens": 203368535.0, "reward": 0.5792410969734192, "reward_std": 0.17145699262619019, "rewards/verify_math_reward/mean": 0.5792410969734192, "rewards/verify_math_reward/std": 0.49395665526390076, "step": 344 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0401785714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3802.0, "completions/mean_length": 832.3995971679688, "completions/mean_terminated_length": 695.78369140625, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 3.2239067055393584, "grad_norm": 0.1348450630903244, "learning_rate": 1e-06, "loss": -0.0015, "num_tokens": 204061693.0, "reward": 0.574776828289032, "reward_std": 0.18201346695423126, "rewards/verify_math_reward/mean": 0.5747767686843872, "rewards/verify_math_reward/std": 0.49465295672416687, "step": 345 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0457589285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2567.0, "completions/mean_length": 774.4877319335938, "completions/mean_terminated_length": 615.2105102539062, "completions/min_length": 160.0, "completions/min_terminated_length": 160.0, "epoch": 3.2332361516034984, "grad_norm": 0.13413555920124054, "learning_rate": 1e-06, "loss": 0.0046, "num_tokens": 204671650.0, "reward": 0.660714328289032, "reward_std": 0.14530597627162933, "rewards/verify_math_reward/mean": 0.6607142686843872, "rewards/verify_math_reward/std": 0.4737313687801361, "step": 346 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.046875, "completions/max_length": 4096.0, "completions/max_terminated_length": 3891.0, "completions/mean_length": 791.2344360351562, "completions/mean_terminated_length": 628.7048950195312, "completions/min_length": 123.0, "completions/min_terminated_length": 123.0, "epoch": 3.2425655976676384, "grad_norm": 0.13582055270671844, "learning_rate": 1e-06, "loss": -0.0156, "num_tokens": 205299796.0, "reward": 0.6104910969734192, "reward_std": 0.19294606149196625, "rewards/verify_math_reward/mean": 0.6104910969734192, "rewards/verify_math_reward/std": 0.48791125416755676, "step": 347 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0379464285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2645.0, "completions/mean_length": 706.6217041015625, "completions/mean_terminated_length": 572.933837890625, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 3.2518950437317784, "grad_norm": 0.13521024584770203, "learning_rate": 1e-06, "loss": -0.0113, "num_tokens": 205884865.0, "reward": 0.645089328289032, "reward_std": 0.18464843928813934, "rewards/verify_math_reward/mean": 0.6450892686843872, "rewards/verify_math_reward/std": 0.4787535071372986, "step": 348 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0814732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4049.0, "completions/mean_length": 996.7288208007812, "completions/mean_terminated_length": 721.8238525390625, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 3.2612244897959184, "grad_norm": 0.12205115705728531, "learning_rate": 1e-06, "loss": -0.0238, "num_tokens": 206573774.0, "reward": 0.5089285969734192, "reward_std": 0.17626340687274933, "rewards/verify_math_reward/mean": 0.5089285969734192, "rewards/verify_math_reward/std": 0.5001994967460632, "step": 349 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.049107142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2760.0, "completions/mean_length": 789.8225708007812, "completions/mean_terminated_length": 619.0809936523438, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 3.2705539358600584, "grad_norm": 0.13562090694904327, "learning_rate": 1e-06, "loss": -0.027, "num_tokens": 207193695.0, "reward": 0.5558035969734192, "reward_std": 0.18370482325553894, "rewards/verify_math_reward/mean": 0.5558035969734192, "rewards/verify_math_reward/std": 0.49715372920036316, "step": 350 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0558035714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3762.0, "completions/mean_length": 830.6975708007812, "completions/mean_terminated_length": 637.7127685546875, "completions/min_length": 121.0, "completions/min_terminated_length": 121.0, "epoch": 3.2798833819241984, "grad_norm": 0.13589246571063995, "learning_rate": 1e-06, "loss": -0.0036, "num_tokens": 207819856.0, "reward": 0.5803571939468384, "reward_std": 0.1640915721654892, "rewards/verify_math_reward/mean": 0.5803571343421936, "rewards/verify_math_reward/std": 0.4937761127948761, "step": 351 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.052455357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2989.0, "completions/mean_length": 877.3136596679688, "completions/mean_terminated_length": 699.1295776367188, "completions/min_length": 133.0, "completions/min_terminated_length": 133.0, "epoch": 3.2892128279883384, "grad_norm": 0.1251857578754425, "learning_rate": 1e-06, "loss": -0.0371, "num_tokens": 208508065.0, "reward": 0.6015625, "reward_std": 0.19414816796779633, "rewards/verify_math_reward/mean": 0.6015625, "rewards/verify_math_reward/std": 0.48984986543655396, "step": 352 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0591517857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3697.0, "completions/mean_length": 823.2980346679688, "completions/mean_terminated_length": 617.5409545898438, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 3.298542274052478, "grad_norm": 0.14291563630104065, "learning_rate": 1e-06, "loss": -0.0214, "num_tokens": 209122172.0, "reward": 0.6339285969734192, "reward_std": 0.19520379602909088, "rewards/verify_math_reward/mean": 0.6339285969734192, "rewards/verify_math_reward/std": 0.48199838399887085, "step": 353 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0558035714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 4090.0, "completions/mean_length": 848.1451416015625, "completions/mean_terminated_length": 656.1914672851562, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 3.307871720116618, "grad_norm": 0.13552476465702057, "learning_rate": 1e-06, "loss": -0.0086, "num_tokens": 209768230.0, "reward": 0.5602678656578064, "reward_std": 0.19181881844997406, "rewards/verify_math_reward/mean": 0.5602678656578064, "rewards/verify_math_reward/std": 0.4966317415237427, "step": 354 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0279017857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3176.0, "completions/mean_length": 659.3795166015625, "completions/mean_terminated_length": 560.7393798828125, "completions/min_length": 153.0, "completions/min_terminated_length": 153.0, "epoch": 3.317201166180758, "grad_norm": 0.13598616421222687, "learning_rate": 1e-06, "loss": 0.0058, "num_tokens": 210334002.0, "reward": 0.7042410969734192, "reward_std": 0.14478211104869843, "rewards/verify_math_reward/mean": 0.7042410969734192, "rewards/verify_math_reward/std": 0.45663803815841675, "step": 355 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0457589285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3969.0, "completions/mean_length": 791.5814819335938, "completions/mean_terminated_length": 633.1239624023438, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 3.326530612244898, "grad_norm": 0.1426772177219391, "learning_rate": 1e-06, "loss": -0.0031, "num_tokens": 210966083.0, "reward": 0.6584821939468384, "reward_std": 0.20719128847122192, "rewards/verify_math_reward/mean": 0.6584821343421936, "rewards/verify_math_reward/std": 0.4744836091995239, "step": 356 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0535714285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3949.0, "completions/mean_length": 876.513427734375, "completions/mean_terminated_length": 694.2783203125, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 3.335860058309038, "grad_norm": 0.13519161939620972, "learning_rate": 1e-06, "loss": -0.0165, "num_tokens": 211651935.0, "reward": 0.546875, "reward_std": 0.19663341343402863, "rewards/verify_math_reward/mean": 0.546875, "rewards/verify_math_reward/std": 0.4980759024620056, "step": 357 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0479910714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3788.0, "completions/mean_length": 769.3594360351562, "completions/mean_terminated_length": 601.662353515625, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 3.345189504373178, "grad_norm": 0.14134107530117035, "learning_rate": 1e-06, "loss": -0.0208, "num_tokens": 212248921.0, "reward": 0.65625, "reward_std": 0.18475720286369324, "rewards/verify_math_reward/mean": 0.65625, "rewards/verify_math_reward/std": 0.4752241373062134, "step": 358 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0703125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3232.0, "completions/mean_length": 898.0324096679688, "completions/mean_terminated_length": 656.1692504882812, "completions/min_length": 94.0, "completions/min_terminated_length": 94.0, "epoch": 3.354518950437318, "grad_norm": 0.12622913718223572, "learning_rate": 1e-06, "loss": -0.0161, "num_tokens": 212890230.0, "reward": 0.5870535969734192, "reward_std": 0.16855987906455994, "rewards/verify_math_reward/mean": 0.5870535969734192, "rewards/verify_math_reward/std": 0.49263837933540344, "step": 359 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0636160714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3392.0, "completions/mean_length": 845.3672485351562, "completions/mean_terminated_length": 624.525634765625, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 3.363848396501458, "grad_norm": 0.13135181367397308, "learning_rate": 1e-06, "loss": -0.0056, "num_tokens": 213505119.0, "reward": 0.6171875, "reward_std": 0.16390681266784668, "rewards/verify_math_reward/mean": 0.6171875, "rewards/verify_math_reward/std": 0.4863446056842804, "step": 360 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0647321428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3638.0, "completions/mean_length": 812.7600708007812, "completions/mean_terminated_length": 585.5191040039062, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 3.373177842565598, "grad_norm": 0.14955846965312958, "learning_rate": 1e-06, "loss": -0.0245, "num_tokens": 214096504.0, "reward": 0.6272321939468384, "reward_std": 0.18513557314872742, "rewards/verify_math_reward/mean": 0.6272321343421936, "rewards/verify_math_reward/std": 0.4838111698627472, "step": 361 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0379464285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3908.0, "completions/mean_length": 715.8527221679688, "completions/mean_terminated_length": 582.5289916992188, "completions/min_length": 162.0, "completions/min_terminated_length": 162.0, "epoch": 3.3825072886297374, "grad_norm": 0.15491895377635956, "learning_rate": 1e-06, "loss": -0.0, "num_tokens": 214684796.0, "reward": 0.645089328289032, "reward_std": 0.21222344040870667, "rewards/verify_math_reward/mean": 0.6450892686843872, "rewards/verify_math_reward/std": 0.4787535071372986, "step": 362 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0424107142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2995.0, "completions/mean_length": 782.0413208007812, "completions/mean_terminated_length": 635.2692260742188, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 3.3918367346938774, "grad_norm": 0.14674150943756104, "learning_rate": 1e-06, "loss": -0.0152, "num_tokens": 215319953.0, "reward": 0.543526828289032, "reward_std": 0.20241807401180267, "rewards/verify_math_reward/mean": 0.5435267686843872, "rewards/verify_math_reward/std": 0.49838000535964966, "step": 363 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0558035714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3937.0, "completions/mean_length": 815.5201416015625, "completions/mean_terminated_length": 621.6383056640625, "completions/min_length": 160.0, "completions/min_terminated_length": 160.0, "epoch": 3.4011661807580174, "grad_norm": 0.13411541283130646, "learning_rate": 1e-06, "loss": 0.002, "num_tokens": 215943707.0, "reward": 0.5892857313156128, "reward_std": 0.17900757491588593, "rewards/verify_math_reward/mean": 0.5892857313156128, "rewards/verify_math_reward/std": 0.49223822355270386, "step": 364 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0435267857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3950.0, "completions/mean_length": 784.443115234375, "completions/mean_terminated_length": 633.7421264648438, "completions/min_length": 108.0, "completions/min_terminated_length": 108.0, "epoch": 3.4104956268221573, "grad_norm": 0.12934422492980957, "learning_rate": 1e-06, "loss": -0.0098, "num_tokens": 216571784.0, "reward": 0.6395089626312256, "reward_std": 0.16473278403282166, "rewards/verify_math_reward/mean": 0.6395089030265808, "rewards/verify_math_reward/std": 0.4804111421108246, "step": 365 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0345982142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3971.0, "completions/mean_length": 796.396240234375, "completions/mean_terminated_length": 678.1444702148438, "completions/min_length": 167.0, "completions/min_terminated_length": 167.0, "epoch": 3.4198250728862973, "grad_norm": 0.1159258708357811, "learning_rate": 1e-06, "loss": 0.0024, "num_tokens": 217260379.0, "reward": 0.5479910969734192, "reward_std": 0.18103523552417755, "rewards/verify_math_reward/mean": 0.5479910969734192, "rewards/verify_math_reward/std": 0.49796950817108154, "step": 366 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.060267857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3105.0, "completions/mean_length": 846.7154541015625, "completions/mean_terminated_length": 638.3289794921875, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 3.4291545189504373, "grad_norm": 0.1376204788684845, "learning_rate": 1e-06, "loss": -0.0124, "num_tokens": 217896060.0, "reward": 0.5859375, "reward_std": 0.16029544174671173, "rewards/verify_math_reward/mean": 0.5859375, "rewards/verify_math_reward/std": 0.4928344786167145, "step": 367 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0379464285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2955.0, "completions/mean_length": 704.5178833007812, "completions/mean_terminated_length": 570.7470703125, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 3.4384839650145773, "grad_norm": 0.14520323276519775, "learning_rate": 1e-06, "loss": -0.0186, "num_tokens": 218472124.0, "reward": 0.6930803656578064, "reward_std": 0.17577669024467468, "rewards/verify_math_reward/mean": 0.6930803656578064, "rewards/verify_math_reward/std": 0.46147337555885315, "step": 368 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0647321428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 2264.0, "completions/mean_length": 805.7455444335938, "completions/mean_terminated_length": 578.0191040039062, "completions/min_length": 174.0, "completions/min_terminated_length": 174.0, "epoch": 3.4478134110787173, "grad_norm": 0.1559911072254181, "learning_rate": 1e-06, "loss": -0.0396, "num_tokens": 219056856.0, "reward": 0.5814732313156128, "reward_std": 0.1884777694940567, "rewards/verify_math_reward/mean": 0.5814732313156128, "rewards/verify_math_reward/std": 0.4935929775238037, "step": 369 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0591517857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3876.0, "completions/mean_length": 855.8839721679688, "completions/mean_terminated_length": 652.1755981445312, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 3.4571428571428573, "grad_norm": 0.13544338941574097, "learning_rate": 1e-06, "loss": -0.028, "num_tokens": 219692504.0, "reward": 0.5892857313156128, "reward_std": 0.1836727261543274, "rewards/verify_math_reward/mean": 0.5892857313156128, "rewards/verify_math_reward/std": 0.49223825335502625, "step": 370 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.033482142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3912.0, "completions/mean_length": 712.3582763671875, "completions/mean_terminated_length": 595.1420288085938, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 3.466472303206997, "grad_norm": 0.12585312128067017, "learning_rate": 1e-06, "loss": 0.0096, "num_tokens": 220293105.0, "reward": 0.6707589626312256, "reward_std": 0.17404918372631073, "rewards/verify_math_reward/mean": 0.6707589030265808, "rewards/verify_math_reward/std": 0.4702001214027405, "step": 371 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3040.0, "completions/mean_length": 826.2098388671875, "completions/mean_terminated_length": 608.2238159179688, "completions/min_length": 117.0, "completions/min_terminated_length": 117.0, "epoch": 3.4758017492711373, "grad_norm": 0.14365816116333008, "learning_rate": 1e-06, "loss": -0.0285, "num_tokens": 220900005.0, "reward": 0.6104910969734192, "reward_std": 0.20459476113319397, "rewards/verify_math_reward/mean": 0.6104910969734192, "rewards/verify_math_reward/std": 0.48791125416755676, "step": 372 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.056919642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3636.0, "completions/mean_length": 827.7076416015625, "completions/mean_terminated_length": 630.44970703125, "completions/min_length": 172.0, "completions/min_terminated_length": 172.0, "epoch": 3.485131195335277, "grad_norm": 0.14375828206539154, "learning_rate": 1e-06, "loss": -0.018, "num_tokens": 221522103.0, "reward": 0.613839328289032, "reward_std": 0.18352049589157104, "rewards/verify_math_reward/mean": 0.6138392686843872, "rewards/verify_math_reward/std": 0.48714008927345276, "step": 373 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0703125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3994.0, "completions/mean_length": 876.318115234375, "completions/mean_terminated_length": 632.8126831054688, "completions/min_length": 171.0, "completions/min_terminated_length": 171.0, "epoch": 3.494460641399417, "grad_norm": 0.13252145051956177, "learning_rate": 1e-06, "loss": -0.0201, "num_tokens": 222146820.0, "reward": 0.6015625, "reward_std": 0.15887469053268433, "rewards/verify_math_reward/mean": 0.6015625, "rewards/verify_math_reward/std": 0.48984986543655396, "step": 374 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0535714285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3752.0, "completions/mean_length": 815.950927734375, "completions/mean_terminated_length": 630.2877197265625, "completions/min_length": 158.0, "completions/min_terminated_length": 158.0, "epoch": 3.503790087463557, "grad_norm": 0.1298367977142334, "learning_rate": 1e-06, "loss": -0.0095, "num_tokens": 222759280.0, "reward": 0.566964328289032, "reward_std": 0.16679365932941437, "rewards/verify_math_reward/mean": 0.5669642686843872, "rewards/verify_math_reward/std": 0.49577224254608154, "step": 375 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.060267857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3828.0, "completions/mean_length": 863.8660888671875, "completions/mean_terminated_length": 656.57958984375, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 3.513119533527697, "grad_norm": 0.13564053177833557, "learning_rate": 1e-06, "loss": -0.0202, "num_tokens": 223400536.0, "reward": 0.645089328289032, "reward_std": 0.19433686137199402, "rewards/verify_math_reward/mean": 0.6450892686843872, "rewards/verify_math_reward/std": 0.4787535071372986, "step": 376 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.049107142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3322.0, "completions/mean_length": 843.8370971679688, "completions/mean_terminated_length": 675.885009765625, "completions/min_length": 81.0, "completions/min_terminated_length": 81.0, "epoch": 3.522448979591837, "grad_norm": 0.14007282257080078, "learning_rate": 1e-06, "loss": -0.0381, "num_tokens": 224072686.0, "reward": 0.6004464626312256, "reward_std": 0.19309763610363007, "rewards/verify_math_reward/mean": 0.6004464030265808, "rewards/verify_math_reward/std": 0.49008017778396606, "step": 377 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0881696428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3827.0, "completions/mean_length": 936.2188110351562, "completions/mean_terminated_length": 630.6829833984375, "completions/min_length": 189.0, "completions/min_terminated_length": 189.0, "epoch": 3.5317784256559768, "grad_norm": 0.14223435521125793, "learning_rate": 1e-06, "loss": -0.0286, "num_tokens": 224682810.0, "reward": 0.5848214626312256, "reward_std": 0.18006137013435364, "rewards/verify_math_reward/mean": 0.5848214030265808, "rewards/verify_math_reward/std": 0.49302801489830017, "step": 378 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.049107142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3849.0, "completions/mean_length": 807.4642944335938, "completions/mean_terminated_length": 637.6337890625, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 3.5411078717201168, "grad_norm": 0.14385223388671875, "learning_rate": 1e-06, "loss": -0.0125, "num_tokens": 225310714.0, "reward": 0.6640625, "reward_std": 0.20113424956798553, "rewards/verify_math_reward/mean": 0.6640625, "rewards/verify_math_reward/std": 0.4725809693336487, "step": 379 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2413.0, "completions/mean_length": 818.2366333007812, "completions/mean_terminated_length": 599.7190551757812, "completions/min_length": 120.0, "completions/min_terminated_length": 120.0, "epoch": 3.5504373177842563, "grad_norm": 0.1361023336648941, "learning_rate": 1e-06, "loss": -0.0249, "num_tokens": 225914558.0, "reward": 0.606026828289032, "reward_std": 0.1762627214193344, "rewards/verify_math_reward/mean": 0.6060267686843872, "rewards/verify_math_reward/std": 0.48890194296836853, "step": 380 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.030133928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3871.0, "completions/mean_length": 701.216552734375, "completions/mean_terminated_length": 595.7399291992188, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 3.5597667638483967, "grad_norm": 0.15030620992183685, "learning_rate": 1e-06, "loss": 0.0088, "num_tokens": 226515488.0, "reward": 0.6551339626312256, "reward_std": 0.20752577483654022, "rewards/verify_math_reward/mean": 0.6551339030265808, "rewards/verify_math_reward/std": 0.4755900800228119, "step": 381 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0546875, "completions/max_length": 4096.0, "completions/max_terminated_length": 2559.0, "completions/mean_length": 805.2801513671875, "completions/mean_terminated_length": 614.9078979492188, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 3.5690962099125363, "grad_norm": 0.13644549250602722, "learning_rate": 1e-06, "loss": -0.0447, "num_tokens": 227127147.0, "reward": 0.598214328289032, "reward_std": 0.1886281818151474, "rewards/verify_math_reward/mean": 0.5982142686843872, "rewards/verify_math_reward/std": 0.49053287506103516, "step": 382 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0390625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2530.0, "completions/mean_length": 751.3248291015625, "completions/mean_terminated_length": 615.3623657226562, "completions/min_length": 104.0, "completions/min_terminated_length": 104.0, "epoch": 3.5784256559766763, "grad_norm": 0.1410011351108551, "learning_rate": 1e-06, "loss": -0.0067, "num_tokens": 227752982.0, "reward": 0.6049107313156128, "reward_std": 0.19899921119213104, "rewards/verify_math_reward/mean": 0.6049107313156128, "rewards/verify_math_reward/std": 0.48914292454719543, "step": 383 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0558035714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3538.0, "completions/mean_length": 792.4922485351562, "completions/mean_terminated_length": 597.2493896484375, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 3.5877551020408163, "grad_norm": 0.14608652889728546, "learning_rate": 1e-06, "loss": 0.0001, "num_tokens": 228351775.0, "reward": 0.6283482313156128, "reward_std": 0.16453734040260315, "rewards/verify_math_reward/mean": 0.6283482313156128, "rewards/verify_math_reward/std": 0.4835159182548523, "step": 384 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0457589285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2929.0, "completions/mean_length": 772.9308471679688, "completions/mean_terminated_length": 613.5789184570312, "completions/min_length": 162.0, "completions/min_terminated_length": 162.0, "epoch": 3.5970845481049563, "grad_norm": 0.13708025217056274, "learning_rate": 1e-06, "loss": -0.0003, "num_tokens": 228969657.0, "reward": 0.6238839626312256, "reward_std": 0.17577669024467468, "rewards/verify_math_reward/mean": 0.6238839030265808, "rewards/verify_math_reward/std": 0.4846802353858948, "step": 385 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.046875, "completions/max_length": 4096.0, "completions/max_terminated_length": 3409.0, "completions/mean_length": 806.5625610351562, "completions/mean_terminated_length": 644.786865234375, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 3.6064139941690962, "grad_norm": 0.13801677525043488, "learning_rate": 1e-06, "loss": -0.0027, "num_tokens": 229603593.0, "reward": 0.5948660969734192, "reward_std": 0.1791912168264389, "rewards/verify_math_reward/mean": 0.5948660969734192, "rewards/verify_math_reward/std": 0.49119213223457336, "step": 386 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0591517857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2749.0, "completions/mean_length": 845.9074096679688, "completions/mean_terminated_length": 641.57177734375, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 3.6157434402332362, "grad_norm": 0.13870160281658173, "learning_rate": 1e-06, "loss": -0.0152, "num_tokens": 230234822.0, "reward": 0.6227678656578064, "reward_std": 0.20790626108646393, "rewards/verify_math_reward/mean": 0.6227678656578064, "rewards/verify_math_reward/std": 0.4849644899368286, "step": 387 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0558035714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3625.0, "completions/mean_length": 825.7779541015625, "completions/mean_terminated_length": 632.5023803710938, "completions/min_length": 164.0, "completions/min_terminated_length": 164.0, "epoch": 3.6250728862973762, "grad_norm": 0.14085707068443298, "learning_rate": 1e-06, "loss": -0.0008, "num_tokens": 230859255.0, "reward": 0.5993303656578064, "reward_std": 0.18580886721611023, "rewards/verify_math_reward/mean": 0.5993303656578064, "rewards/verify_math_reward/std": 0.49030786752700806, "step": 388 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0401785714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2616.0, "completions/mean_length": 697.0502319335938, "completions/mean_terminated_length": 554.7686157226562, "completions/min_length": 98.0, "completions/min_terminated_length": 98.0, "epoch": 3.6344023323615158, "grad_norm": 0.15169435739517212, "learning_rate": 1e-06, "loss": -0.0167, "num_tokens": 231425676.0, "reward": 0.6651785969734192, "reward_std": 0.18716758489608765, "rewards/verify_math_reward/mean": 0.6651785969734192, "rewards/verify_math_reward/std": 0.47219160199165344, "step": 389 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3494.0, "completions/mean_length": 888.3616333007812, "completions/mean_terminated_length": 674.51904296875, "completions/min_length": 178.0, "completions/min_terminated_length": 178.0, "epoch": 3.643731778425656, "grad_norm": 0.14200334250926971, "learning_rate": 1e-06, "loss": 0.0048, "num_tokens": 232072504.0, "reward": 0.5524553656578064, "reward_std": 0.21560657024383545, "rewards/verify_math_reward/mean": 0.5524553656578064, "rewards/verify_math_reward/std": 0.49751853942871094, "step": 390 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0658482142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2384.0, "completions/mean_length": 844.3281860351562, "completions/mean_terminated_length": 615.1182861328125, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 3.6530612244897958, "grad_norm": 0.14360344409942627, "learning_rate": 1e-06, "loss": -0.0092, "num_tokens": 232673902.0, "reward": 0.6540178656578064, "reward_std": 0.20388302206993103, "rewards/verify_math_reward/mean": 0.6540178656578064, "rewards/verify_math_reward/std": 0.4759531021118164, "step": 391 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0658482142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3132.0, "completions/mean_length": 868.8236694335938, "completions/mean_terminated_length": 641.3404541015625, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 3.6623906705539357, "grad_norm": 0.13329817354679108, "learning_rate": 1e-06, "loss": -0.0339, "num_tokens": 233309688.0, "reward": 0.5691964626312256, "reward_std": 0.16183848679065704, "rewards/verify_math_reward/mean": 0.5691964030265808, "rewards/verify_math_reward/std": 0.4954652488231659, "step": 392 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3744.0, "completions/mean_length": 700.9074096679688, "completions/mean_terminated_length": 591.3882446289062, "completions/min_length": 168.0, "completions/min_terminated_length": 168.0, "epoch": 3.6717201166180757, "grad_norm": 0.11687356978654861, "learning_rate": 1e-06, "loss": -0.0187, "num_tokens": 233918197.0, "reward": 0.7064732313156128, "reward_std": 0.1381234973669052, "rewards/verify_math_reward/mean": 0.7064732313156128, "rewards/verify_math_reward/std": 0.4556320011615753, "step": 393 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.044642857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3973.0, "completions/mean_length": 798.2098388671875, "completions/mean_terminated_length": 644.1074829101562, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 3.6810495626822157, "grad_norm": 0.13066112995147705, "learning_rate": 1e-06, "loss": -0.0009, "num_tokens": 234562081.0, "reward": 0.6183035969734192, "reward_std": 0.18340173363685608, "rewards/verify_math_reward/mean": 0.6183035969734192, "rewards/verify_math_reward/std": 0.4860740303993225, "step": 394 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0647321428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3591.0, "completions/mean_length": 863.0379638671875, "completions/mean_terminated_length": 639.27685546875, "completions/min_length": 120.0, "completions/min_terminated_length": 120.0, "epoch": 3.6903790087463557, "grad_norm": 0.12336290627717972, "learning_rate": 1e-06, "loss": -0.0232, "num_tokens": 235182019.0, "reward": 0.5892857313156128, "reward_std": 0.152669295668602, "rewards/verify_math_reward/mean": 0.5892857313156128, "rewards/verify_math_reward/std": 0.49223825335502625, "step": 395 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0636160714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3988.0, "completions/mean_length": 813.911865234375, "completions/mean_terminated_length": 590.9332275390625, "completions/min_length": 190.0, "completions/min_terminated_length": 190.0, "epoch": 3.6997084548104957, "grad_norm": 0.13793021440505981, "learning_rate": 1e-06, "loss": -0.0293, "num_tokens": 235766756.0, "reward": 0.6395089626312256, "reward_std": 0.17107973992824554, "rewards/verify_math_reward/mean": 0.6395089030265808, "rewards/verify_math_reward/std": 0.4804111123085022, "step": 396 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0424107142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3688.0, "completions/mean_length": 735.0938110351562, "completions/mean_terminated_length": 586.242431640625, "completions/min_length": 137.0, "completions/min_terminated_length": 137.0, "epoch": 3.7090379008746357, "grad_norm": 0.1464046835899353, "learning_rate": 1e-06, "loss": 0.0095, "num_tokens": 236356112.0, "reward": 0.6551339626312256, "reward_std": 0.1889663189649582, "rewards/verify_math_reward/mean": 0.6551339030265808, "rewards/verify_math_reward/std": 0.4755900502204895, "step": 397 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.060267857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3841.0, "completions/mean_length": 872.0123291015625, "completions/mean_terminated_length": 665.2482299804688, "completions/min_length": 156.0, "completions/min_terminated_length": 156.0, "epoch": 3.7183673469387752, "grad_norm": 0.1336894929409027, "learning_rate": 1e-06, "loss": -0.0191, "num_tokens": 237006867.0, "reward": 0.5714285969734192, "reward_std": 0.18799132108688354, "rewards/verify_math_reward/mean": 0.5714285969734192, "rewards/verify_math_reward/std": 0.49514803290367126, "step": 398 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0323660714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3535.0, "completions/mean_length": 691.0391235351562, "completions/mean_terminated_length": 577.1476440429688, "completions/min_length": 157.0, "completions/min_terminated_length": 157.0, "epoch": 3.7276967930029157, "grad_norm": 0.11745762825012207, "learning_rate": 1e-06, "loss": -0.0188, "num_tokens": 237587446.0, "reward": 0.7031250596046448, "reward_std": 0.12320679426193237, "rewards/verify_math_reward/mean": 0.703125, "rewards/verify_math_reward/std": 0.4571361541748047, "step": 399 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0424107142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2893.0, "completions/mean_length": 780.8047485351562, "completions/mean_terminated_length": 633.9778442382812, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 3.7370262390670552, "grad_norm": 0.15450018644332886, "learning_rate": 1e-06, "loss": -0.0074, "num_tokens": 238211383.0, "reward": 0.6495535969734192, "reward_std": 0.2320307046175003, "rewards/verify_math_reward/mean": 0.6495535969734192, "rewards/verify_math_reward/std": 0.477376252412796, "step": 400 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.052455357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3241.0, "completions/mean_length": 803.6082763671875, "completions/mean_terminated_length": 621.3439331054688, "completions/min_length": 182.0, "completions/min_terminated_length": 182.0, "epoch": 3.746355685131195, "grad_norm": 0.12850894033908844, "learning_rate": 1e-06, "loss": -0.0054, "num_tokens": 238820568.0, "reward": 0.6774553656578064, "reward_std": 0.16037212312221527, "rewards/verify_math_reward/mean": 0.6774553656578064, "rewards/verify_math_reward/std": 0.4677111804485321, "step": 401 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0345982142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2589.0, "completions/mean_length": 761.5379638671875, "completions/mean_terminated_length": 642.0369873046875, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 3.755685131195335, "grad_norm": 0.12745584547519684, "learning_rate": 1e-06, "loss": -0.0159, "num_tokens": 239461130.0, "reward": 0.6350446939468384, "reward_std": 0.17607979476451874, "rewards/verify_math_reward/mean": 0.6350446343421936, "rewards/verify_math_reward/std": 0.4816865026950836, "step": 402 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0558035714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3306.0, "completions/mean_length": 825.7422485351562, "completions/mean_terminated_length": 632.4645385742188, "completions/min_length": 110.0, "completions/min_terminated_length": 110.0, "epoch": 3.765014577259475, "grad_norm": 0.15667924284934998, "learning_rate": 1e-06, "loss": -0.0284, "num_tokens": 240076939.0, "reward": 0.6283482313156128, "reward_std": 0.21045538783073425, "rewards/verify_math_reward/mean": 0.6283482313156128, "rewards/verify_math_reward/std": 0.4835159480571747, "step": 403 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0758928571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 2968.0, "completions/mean_length": 906.87841796875, "completions/mean_terminated_length": 644.9697875976562, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 3.774344023323615, "grad_norm": 0.13290569186210632, "learning_rate": 1e-06, "loss": -0.0356, "num_tokens": 240695558.0, "reward": 0.65625, "reward_std": 0.15063981711864471, "rewards/verify_math_reward/mean": 0.65625, "rewards/verify_math_reward/std": 0.4752241373062134, "step": 404 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0613839285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 4044.0, "completions/mean_length": 851.2489013671875, "completions/mean_terminated_length": 639.0475463867188, "completions/min_length": 179.0, "completions/min_terminated_length": 179.0, "epoch": 3.783673469387755, "grad_norm": 0.1469256579875946, "learning_rate": 1e-06, "loss": -0.0167, "num_tokens": 241334309.0, "reward": 0.6149553656578064, "reward_std": 0.21905934810638428, "rewards/verify_math_reward/mean": 0.6149553656578064, "rewards/verify_math_reward/std": 0.4868776500225067, "step": 405 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0558035714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3180.0, "completions/mean_length": 818.7745971679688, "completions/mean_terminated_length": 625.0850830078125, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 3.793002915451895, "grad_norm": 0.13795356452465057, "learning_rate": 1e-06, "loss": -0.0188, "num_tokens": 241942995.0, "reward": 0.6026785969734192, "reward_std": 0.18986350297927856, "rewards/verify_math_reward/mean": 0.6026785969734192, "rewards/verify_math_reward/std": 0.48961687088012695, "step": 406 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0502232142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4073.0, "completions/mean_length": 825.0558471679688, "completions/mean_terminated_length": 652.0916748046875, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 3.8023323615160347, "grad_norm": 0.13587205111980438, "learning_rate": 1e-06, "loss": -0.0142, "num_tokens": 242579757.0, "reward": 0.6707589626312256, "reward_std": 0.1813715398311615, "rewards/verify_math_reward/mean": 0.6707589030265808, "rewards/verify_math_reward/std": 0.4702001214027405, "step": 407 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0513392857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3190.0, "completions/mean_length": 764.341552734375, "completions/mean_terminated_length": 584.0399780273438, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 3.811661807580175, "grad_norm": 0.16450262069702148, "learning_rate": 1e-06, "loss": -0.0204, "num_tokens": 243155263.0, "reward": 0.640625, "reward_std": 0.1904633492231369, "rewards/verify_math_reward/mean": 0.640625, "rewards/verify_math_reward/std": 0.48008525371551514, "step": 408 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0814732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3973.0, "completions/mean_length": 964.9732666015625, "completions/mean_terminated_length": 687.2515258789062, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 3.8209912536443147, "grad_norm": 0.12772898375988007, "learning_rate": 1e-06, "loss": -0.0336, "num_tokens": 243807335.0, "reward": 0.5390625, "reward_std": 0.18554674088954926, "rewards/verify_math_reward/mean": 0.5390625, "rewards/verify_math_reward/std": 0.4987502098083496, "step": 409 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0703125, "completions/max_length": 4096.0, "completions/max_terminated_length": 2786.0, "completions/mean_length": 890.90966796875, "completions/mean_terminated_length": 648.5078125, "completions/min_length": 121.0, "completions/min_terminated_length": 121.0, "epoch": 3.8303206997084547, "grad_norm": 0.1236671730875969, "learning_rate": 1e-06, "loss": -0.0305, "num_tokens": 244439062.0, "reward": 0.5803571939468384, "reward_std": 0.1407584697008133, "rewards/verify_math_reward/mean": 0.5803571343421936, "rewards/verify_math_reward/std": 0.4937761425971985, "step": 410 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0636160714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2679.0, "completions/mean_length": 824.6395263671875, "completions/mean_terminated_length": 602.3897705078125, "completions/min_length": 158.0, "completions/min_terminated_length": 158.0, "epoch": 3.8396501457725947, "grad_norm": 0.12373624742031097, "learning_rate": 1e-06, "loss": -0.0081, "num_tokens": 245029283.0, "reward": 0.6305803656578064, "reward_std": 0.149286150932312, "rewards/verify_math_reward/mean": 0.6305803656578064, "rewards/verify_math_reward/std": 0.4829172194004059, "step": 411 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0558035714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3963.0, "completions/mean_length": 812.9129638671875, "completions/mean_terminated_length": 618.8770751953125, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 3.8489795918367347, "grad_norm": 0.12474401295185089, "learning_rate": 1e-06, "loss": -0.0275, "num_tokens": 245637429.0, "reward": 0.637276828289032, "reward_std": 0.1511622667312622, "rewards/verify_math_reward/mean": 0.6372767686843872, "rewards/verify_math_reward/std": 0.481054425239563, "step": 412 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0546875, "completions/max_length": 4096.0, "completions/max_terminated_length": 3725.0, "completions/mean_length": 758.2020263671875, "completions/mean_terminated_length": 565.1062622070312, "completions/min_length": 164.0, "completions/min_terminated_length": 164.0, "epoch": 3.8583090379008746, "grad_norm": 0.1259421408176422, "learning_rate": 1e-06, "loss": -0.0023, "num_tokens": 246195354.0, "reward": 0.7120535969734192, "reward_std": 0.16214017570018768, "rewards/verify_math_reward/mean": 0.7120535969734192, "rewards/verify_math_reward/std": 0.4530588984489441, "step": 413 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0424107142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3854.0, "completions/mean_length": 782.7678833007812, "completions/mean_terminated_length": 636.0280151367188, "completions/min_length": 159.0, "completions/min_terminated_length": 159.0, "epoch": 3.8676384839650146, "grad_norm": 0.1368919163942337, "learning_rate": 1e-06, "loss": -0.0193, "num_tokens": 246836738.0, "reward": 0.6328125, "reward_std": 0.1842258721590042, "rewards/verify_math_reward/mean": 0.6328125, "rewards/verify_math_reward/std": 0.48230743408203125, "step": 414 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0703125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3937.0, "completions/mean_length": 812.1361694335938, "completions/mean_terminated_length": 563.7766723632812, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 3.8769679300291546, "grad_norm": 0.1541055142879486, "learning_rate": 1e-06, "loss": -0.0267, "num_tokens": 247390324.0, "reward": 0.6707589626312256, "reward_std": 0.1791156381368637, "rewards/verify_math_reward/mean": 0.6707589030265808, "rewards/verify_math_reward/std": 0.4702001214027405, "step": 415 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1004464285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 4034.0, "completions/mean_length": 1001.8270263671875, "completions/mean_terminated_length": 656.3237915039062, "completions/min_length": 194.0, "completions/min_terminated_length": 194.0, "epoch": 3.8862973760932946, "grad_norm": 0.14875942468643188, "learning_rate": 1e-06, "loss": -0.033, "num_tokens": 248015425.0, "reward": 0.5658482313156128, "reward_std": 0.18051347136497498, "rewards/verify_math_reward/mean": 0.5658482313156128, "rewards/verify_math_reward/std": 0.49592188000679016, "step": 416 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0926339285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3460.0, "completions/mean_length": 947.64404296875, "completions/mean_terminated_length": 626.22509765625, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 3.8956268221574346, "grad_norm": 0.15762212872505188, "learning_rate": 1e-06, "loss": -0.0303, "num_tokens": 248617754.0, "reward": 0.6116071939468384, "reward_std": 0.19588413834571838, "rewards/verify_math_reward/mean": 0.6116071343421936, "rewards/verify_math_reward/std": 0.48765692114830017, "step": 417 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0513392857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3648.0, "completions/mean_length": 772.6361694335938, "completions/mean_terminated_length": 592.7835083007812, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 3.904956268221574, "grad_norm": 0.14572742581367493, "learning_rate": 1e-06, "loss": -0.0137, "num_tokens": 249213132.0, "reward": 0.6383928656578064, "reward_std": 0.1770893931388855, "rewards/verify_math_reward/mean": 0.6383928656578064, "rewards/verify_math_reward/std": 0.4807341694831848, "step": 418 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0881696428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3839.0, "completions/mean_length": 958.3995971679688, "completions/mean_terminated_length": 655.0086059570312, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 3.914285714285714, "grad_norm": 0.14375333487987518, "learning_rate": 1e-06, "loss": -0.043, "num_tokens": 249833482.0, "reward": 0.6116071939468384, "reward_std": 0.1910271793603897, "rewards/verify_math_reward/mean": 0.6116071343421936, "rewards/verify_math_reward/std": 0.4876568913459778, "step": 419 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0680803571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 2972.0, "completions/mean_length": 814.8326416015625, "completions/mean_terminated_length": 575.1305541992188, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 3.923615160349854, "grad_norm": 0.1448078751564026, "learning_rate": 1e-06, "loss": -0.0308, "num_tokens": 250401628.0, "reward": 0.6462053656578064, "reward_std": 0.1954641044139862, "rewards/verify_math_reward/mean": 0.6462053656578064, "rewards/verify_math_reward/std": 0.478413462638855, "step": 420 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0814732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3747.0, "completions/mean_length": 934.40966796875, "completions/mean_terminated_length": 653.9769287109375, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 3.932944606413994, "grad_norm": 0.1312483698129654, "learning_rate": 1e-06, "loss": -0.0277, "num_tokens": 251033115.0, "reward": 0.5959821939468384, "reward_std": 0.17070813477039337, "rewards/verify_math_reward/mean": 0.5959821343421936, "rewards/verify_math_reward/std": 0.490975022315979, "step": 421 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0904017857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2844.0, "completions/mean_length": 938.62841796875, "completions/mean_terminated_length": 624.8282470703125, "completions/min_length": 121.0, "completions/min_terminated_length": 121.0, "epoch": 3.942274052478134, "grad_norm": 0.13763560354709625, "learning_rate": 1e-06, "loss": -0.024, "num_tokens": 251628358.0, "reward": 0.598214328289032, "reward_std": 0.17468014359474182, "rewards/verify_math_reward/mean": 0.5982142686843872, "rewards/verify_math_reward/std": 0.49053290486335754, "step": 422 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0703125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3104.0, "completions/mean_length": 877.888427734375, "completions/mean_terminated_length": 634.5017700195312, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 3.951603498542274, "grad_norm": 0.1306796371936798, "learning_rate": 1e-06, "loss": -0.0224, "num_tokens": 252248058.0, "reward": 0.590401828289032, "reward_std": 0.1441427320241928, "rewards/verify_math_reward/mean": 0.5904017686843872, "rewards/verify_math_reward/std": 0.49203425645828247, "step": 423 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0613839285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3405.0, "completions/mean_length": 783.1819458007812, "completions/mean_terminated_length": 566.5291137695312, "completions/min_length": 180.0, "completions/min_terminated_length": 180.0, "epoch": 3.960932944606414, "grad_norm": 0.14820237457752228, "learning_rate": 1e-06, "loss": -0.0039, "num_tokens": 252807021.0, "reward": 0.6752232313156128, "reward_std": 0.17731650173664093, "rewards/verify_math_reward/mean": 0.6752232313156128, "rewards/verify_math_reward/std": 0.46855294704437256, "step": 424 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0479910714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2686.0, "completions/mean_length": 813.3392944335938, "completions/mean_terminated_length": 647.8593139648438, "completions/min_length": 153.0, "completions/min_terminated_length": 153.0, "epoch": 3.970262390670554, "grad_norm": 0.1265283226966858, "learning_rate": 1e-06, "loss": -0.0025, "num_tokens": 253448237.0, "reward": 0.6383928656578064, "reward_std": 0.14789676666259766, "rewards/verify_math_reward/mean": 0.6383928656578064, "rewards/verify_math_reward/std": 0.4807341694831848, "step": 425 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0747767857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3163.0, "completions/mean_length": 817.4107666015625, "completions/mean_terminated_length": 552.4342651367188, "completions/min_length": 165.0, "completions/min_terminated_length": 165.0, "epoch": 3.979591836734694, "grad_norm": 0.15848904848098755, "learning_rate": 1e-06, "loss": -0.0176, "num_tokens": 253996485.0, "reward": 0.65625, "reward_std": 0.1633433997631073, "rewards/verify_math_reward/mean": 0.65625, "rewards/verify_math_reward/std": 0.4752241373062134, "step": 426 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0926339285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3300.0, "completions/mean_length": 928.4308471679688, "completions/mean_terminated_length": 605.0504150390625, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 3.9889212827988336, "grad_norm": 0.14289753139019012, "learning_rate": 1e-06, "loss": -0.0418, "num_tokens": 254578759.0, "reward": 0.6584821939468384, "reward_std": 0.18231727182865143, "rewards/verify_math_reward/mean": 0.6584821343421936, "rewards/verify_math_reward/std": 0.4744836091995239, "step": 427 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03693181818181823, "completions/max_length": 4096.0, "completions/max_terminated_length": 3822.0, "completions/mean_length": 729.0454711914062, "completions/mean_terminated_length": 599.92919921875, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 3.9982507288629736, "grad_norm": 0.14556092023849487, "learning_rate": 1e-06, "loss": -0.0337, "num_tokens": 255159623.0, "reward": 0.6674107313156128, "reward_std": 0.18644899129867554, "rewards/verify_math_reward/mean": 0.6674107313156128, "rewards/verify_math_reward/std": 0.47140392661094666, "step": 428 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0792410714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2074.0, "completions/mean_length": 853.6361694335938, "completions/mean_terminated_length": 574.5963745117188, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 4.0093294460641395, "grad_norm": 0.15588702261447906, "learning_rate": 1e-06, "loss": -0.0277, "num_tokens": 255727761.0, "reward": 0.6517857313156128, "reward_std": 0.17585016787052155, "rewards/verify_math_reward/mean": 0.6517857313156128, "rewards/verify_math_reward/std": 0.47667041420936584, "step": 429 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0479910714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2577.0, "completions/mean_length": 804.4207763671875, "completions/mean_terminated_length": 638.4912109375, "completions/min_length": 156.0, "completions/min_terminated_length": 156.0, "epoch": 4.01865889212828, "grad_norm": 0.1348199099302292, "learning_rate": 1e-06, "loss": -0.0125, "num_tokens": 256357130.0, "reward": 0.609375, "reward_std": 0.16781283915042877, "rewards/verify_math_reward/mean": 0.609375, "rewards/verify_math_reward/std": 0.48816296458244324, "step": 430 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0636160714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3073.0, "completions/mean_length": 843.4207763671875, "completions/mean_terminated_length": 622.4469604492188, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 4.0279883381924195, "grad_norm": 0.14399297535419464, "learning_rate": 1e-06, "loss": -0.0185, "num_tokens": 256957755.0, "reward": 0.6383928656578064, "reward_std": 0.17336954176425934, "rewards/verify_math_reward/mean": 0.6383928656578064, "rewards/verify_math_reward/std": 0.4807341992855072, "step": 431 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0591517857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3801.0, "completions/mean_length": 859.8326416015625, "completions/mean_terminated_length": 656.3724975585938, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 4.03731778425656, "grad_norm": 0.1406911313533783, "learning_rate": 1e-06, "loss": -0.0198, "num_tokens": 257597061.0, "reward": 0.6506696939468384, "reward_std": 0.20203858613967896, "rewards/verify_math_reward/mean": 0.6506696343421936, "rewards/verify_math_reward/std": 0.47702476382255554, "step": 432 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.060267857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3694.0, "completions/mean_length": 870.8538208007812, "completions/mean_terminated_length": 664.0154418945312, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 4.0466472303206995, "grad_norm": 0.12592561542987823, "learning_rate": 1e-06, "loss": -0.015, "num_tokens": 258241218.0, "reward": 0.6049107313156128, "reward_std": 0.16330133378505707, "rewards/verify_math_reward/mean": 0.6049107313156128, "rewards/verify_math_reward/std": 0.48914292454719543, "step": 433 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0959821428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 4062.0, "completions/mean_length": 970.9531860351562, "completions/mean_terminated_length": 639.1580810546875, "completions/min_length": 162.0, "completions/min_terminated_length": 162.0, "epoch": 4.05597667638484, "grad_norm": 0.1565476655960083, "learning_rate": 1e-06, "loss": -0.0586, "num_tokens": 258858808.0, "reward": 0.5926339626312256, "reward_std": 0.24100124835968018, "rewards/verify_math_reward/mean": 0.5926339030265808, "rewards/verify_math_reward/std": 0.49161845445632935, "step": 434 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.052455357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3065.0, "completions/mean_length": 739.1752319335938, "completions/mean_terminated_length": 553.3439331054688, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 4.0653061224489795, "grad_norm": 0.15354785323143005, "learning_rate": 1e-06, "loss": -0.0075, "num_tokens": 259412597.0, "reward": 0.6674107313156128, "reward_std": 0.16600088775157928, "rewards/verify_math_reward/mean": 0.6674107313156128, "rewards/verify_math_reward/std": 0.47140392661094666, "step": 435 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0803571428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3878.0, "completions/mean_length": 891.4464721679688, "completions/mean_terminated_length": 611.4368896484375, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 4.07463556851312, "grad_norm": 0.13570523262023926, "learning_rate": 1e-06, "loss": -0.0166, "num_tokens": 260006725.0, "reward": 0.6584821939468384, "reward_std": 0.17048059403896332, "rewards/verify_math_reward/mean": 0.6584821343421936, "rewards/verify_math_reward/std": 0.4744836091995239, "step": 436 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0859375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3185.0, "completions/mean_length": 911.8248291015625, "completions/mean_terminated_length": 612.4578857421875, "completions/min_length": 156.0, "completions/min_terminated_length": 156.0, "epoch": 4.0839650145772595, "grad_norm": 0.1456775665283203, "learning_rate": 1e-06, "loss": -0.0405, "num_tokens": 260597984.0, "reward": 0.5703125, "reward_std": 0.1639414280653, "rewards/verify_math_reward/mean": 0.5703125, "rewards/verify_math_reward/std": 0.49530795216560364, "step": 437 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0803571428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3499.0, "completions/mean_length": 956.8460083007812, "completions/mean_terminated_length": 682.5509643554688, "completions/min_length": 172.0, "completions/min_terminated_length": 172.0, "epoch": 4.093294460641399, "grad_norm": 0.1299942433834076, "learning_rate": 1e-06, "loss": -0.0383, "num_tokens": 261241614.0, "reward": 0.5881696939468384, "reward_std": 0.18749207258224487, "rewards/verify_math_reward/mean": 0.5881696343421936, "rewards/verify_math_reward/std": 0.4924396276473999, "step": 438 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0814732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3715.0, "completions/mean_length": 933.3694458007812, "completions/mean_terminated_length": 652.844482421875, "completions/min_length": 91.0, "completions/min_terminated_length": 91.0, "epoch": 4.1026239067055394, "grad_norm": 0.143808975815773, "learning_rate": 1e-06, "loss": -0.0022, "num_tokens": 261869505.0, "reward": 0.5758928656578064, "reward_std": 0.17822733521461487, "rewards/verify_math_reward/mean": 0.5758928656578064, "rewards/verify_math_reward/std": 0.49448272585868835, "step": 439 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0558035714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2522.0, "completions/mean_length": 869.5736694335938, "completions/mean_terminated_length": 678.8865356445312, "completions/min_length": 185.0, "completions/min_terminated_length": 185.0, "epoch": 4.111953352769679, "grad_norm": 0.12110484391450882, "learning_rate": 1e-06, "loss": -0.0054, "num_tokens": 262525931.0, "reward": 0.5580357313156128, "reward_std": 0.17404848337173462, "rewards/verify_math_reward/mean": 0.5580357313156128, "rewards/verify_math_reward/std": 0.49689778685569763, "step": 440 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0669642857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3422.0, "completions/mean_length": 889.1685791015625, "completions/mean_terminated_length": 659.0131225585938, "completions/min_length": 160.0, "completions/min_terminated_length": 160.0, "epoch": 4.121282798833819, "grad_norm": 0.12871553003787994, "learning_rate": 1e-06, "loss": -0.0113, "num_tokens": 263160586.0, "reward": 0.6383928656578064, "reward_std": 0.17175164818763733, "rewards/verify_math_reward/mean": 0.6383928656578064, "rewards/verify_math_reward/std": 0.4807341694831848, "step": 441 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0613839285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3879.0, "completions/mean_length": 875.021240234375, "completions/mean_terminated_length": 664.3745727539062, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 4.130612244897959, "grad_norm": 0.14722591638565063, "learning_rate": 1e-06, "loss": -0.0157, "num_tokens": 263809621.0, "reward": 0.6071428656578064, "reward_std": 0.18385820090770721, "rewards/verify_math_reward/mean": 0.6071428656578064, "rewards/verify_math_reward/std": 0.48865827918052673, "step": 442 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0725446428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3830.0, "completions/mean_length": 875.8939819335938, "completions/mean_terminated_length": 624.0204467773438, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 4.139941690962099, "grad_norm": 0.12880070507526398, "learning_rate": 1e-06, "loss": -0.0562, "num_tokens": 264426542.0, "reward": 0.6015625, "reward_std": 0.1623249500989914, "rewards/verify_math_reward/mean": 0.6015625, "rewards/verify_math_reward/std": 0.48984986543655396, "step": 443 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0714285714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2907.0, "completions/mean_length": 849.6964721679688, "completions/mean_terminated_length": 599.9807739257812, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 4.149271137026239, "grad_norm": 0.1354241520166397, "learning_rate": 1e-06, "loss": -0.0183, "num_tokens": 265022830.0, "reward": 0.645089328289032, "reward_std": 0.15887397527694702, "rewards/verify_math_reward/mean": 0.6450892686843872, "rewards/verify_math_reward/std": 0.4787535071372986, "step": 444 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0747767857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3499.0, "completions/mean_length": 862.200927734375, "completions/mean_terminated_length": 600.8444213867188, "completions/min_length": 159.0, "completions/min_terminated_length": 159.0, "epoch": 4.158600583090379, "grad_norm": 0.14667780697345734, "learning_rate": 1e-06, "loss": -0.0144, "num_tokens": 265611938.0, "reward": 0.6662946939468384, "reward_std": 0.17171911895275116, "rewards/verify_math_reward/mean": 0.6662946343421936, "rewards/verify_math_reward/std": 0.47179925441741943, "step": 445 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.056919642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3973.0, "completions/mean_length": 830.2857666015625, "completions/mean_terminated_length": 633.1834106445312, "completions/min_length": 166.0, "completions/min_terminated_length": 166.0, "epoch": 4.167930029154519, "grad_norm": 0.1396162509918213, "learning_rate": 1e-06, "loss": -0.0254, "num_tokens": 266233002.0, "reward": 0.6741071939468384, "reward_std": 0.18539658188819885, "rewards/verify_math_reward/mean": 0.6741071343421936, "rewards/verify_math_reward/std": 0.4689692556858063, "step": 446 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0870535714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3792.0, "completions/mean_length": 939.3381958007812, "completions/mean_terminated_length": 638.3362426757812, "completions/min_length": 176.0, "completions/min_terminated_length": 176.0, "epoch": 4.1772594752186585, "grad_norm": 0.13883663713932037, "learning_rate": 1e-06, "loss": -0.021, "num_tokens": 266845569.0, "reward": 0.6194196939468384, "reward_std": 0.1640167087316513, "rewards/verify_math_reward/mean": 0.6194196343421936, "rewards/verify_math_reward/std": 0.48580074310302734, "step": 447 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0669642857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2600.0, "completions/mean_length": 847.0703735351562, "completions/mean_terminated_length": 613.8934936523438, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 4.186588921282799, "grad_norm": 0.1400126814842224, "learning_rate": 1e-06, "loss": -0.0264, "num_tokens": 267454888.0, "reward": 0.6383928656578064, "reward_std": 0.17600379884243011, "rewards/verify_math_reward/mean": 0.6383928656578064, "rewards/verify_math_reward/std": 0.4807341694831848, "step": 448 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0825892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3421.0, "completions/mean_length": 908.8516235351562, "completions/mean_terminated_length": 621.9306640625, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 4.1959183673469385, "grad_norm": 0.13154536485671997, "learning_rate": 1e-06, "loss": -0.0478, "num_tokens": 268055235.0, "reward": 0.6205357313156128, "reward_std": 0.17299722135066986, "rewards/verify_math_reward/mean": 0.6205357313156128, "rewards/verify_math_reward/std": 0.4855247139930725, "step": 449 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0770089285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3038.0, "completions/mean_length": 870.099365234375, "completions/mean_terminated_length": 600.94921875, "completions/min_length": 100.0, "completions/min_terminated_length": 100.0, "epoch": 4.205247813411079, "grad_norm": 0.15522713959217072, "learning_rate": 1e-06, "loss": -0.0086, "num_tokens": 268640556.0, "reward": 0.6428571939468384, "reward_std": 0.18170854449272156, "rewards/verify_math_reward/mean": 0.6428571343421936, "rewards/verify_math_reward/std": 0.4794250428676605, "step": 450 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.046875, "completions/max_length": 4096.0, "completions/max_terminated_length": 3886.0, "completions/mean_length": 818.6127319335938, "completions/mean_terminated_length": 657.4296875, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 4.214577259475218, "grad_norm": 0.16115118563175201, "learning_rate": 1e-06, "loss": -0.0335, "num_tokens": 269293521.0, "reward": 0.6662946939468384, "reward_std": 0.17862850427627563, "rewards/verify_math_reward/mean": 0.6662946343421936, "rewards/verify_math_reward/std": 0.47179925441741943, "step": 451 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0658482142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3746.0, "completions/mean_length": 829.3225708007812, "completions/mean_terminated_length": 599.054931640625, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 4.223906705539359, "grad_norm": 0.126956507563591, "learning_rate": 1e-06, "loss": -0.0266, "num_tokens": 269881898.0, "reward": 0.613839328289032, "reward_std": 0.1487216353416443, "rewards/verify_math_reward/mean": 0.6138392686843872, "rewards/verify_math_reward/std": 0.48714008927345276, "step": 452 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0669642857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3857.0, "completions/mean_length": 916.2991333007812, "completions/mean_terminated_length": 688.0908813476562, "completions/min_length": 166.0, "completions/min_terminated_length": 166.0, "epoch": 4.233236151603498, "grad_norm": 0.15128618478775024, "learning_rate": 1e-06, "loss": -0.0144, "num_tokens": 270551022.0, "reward": 0.5915178656578064, "reward_std": 0.2105737328529358, "rewards/verify_math_reward/mean": 0.5915178656578064, "rewards/verify_math_reward/std": 0.49182769656181335, "step": 453 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0647321428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 2793.0, "completions/mean_length": 851.7913208007812, "completions/mean_terminated_length": 627.2518310546875, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 4.242565597667639, "grad_norm": 0.1338435858488083, "learning_rate": 1e-06, "loss": -0.0384, "num_tokens": 271173427.0, "reward": 0.6462053656578064, "reward_std": 0.18414919078350067, "rewards/verify_math_reward/mean": 0.6462053656578064, "rewards/verify_math_reward/std": 0.478413462638855, "step": 454 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3159.0, "completions/mean_length": 858.3795166015625, "completions/mean_terminated_length": 642.5381469726562, "completions/min_length": 115.0, "completions/min_terminated_length": 115.0, "epoch": 4.251895043731778, "grad_norm": 0.14865481853485107, "learning_rate": 1e-06, "loss": -0.0194, "num_tokens": 271798143.0, "reward": 0.6618303656578064, "reward_std": 0.18746885657310486, "rewards/verify_math_reward/mean": 0.6618303656578064, "rewards/verify_math_reward/std": 0.4733508229255676, "step": 455 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0758928571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3802.0, "completions/mean_length": 878.0256958007812, "completions/mean_terminated_length": 613.74755859375, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 4.261224489795918, "grad_norm": 0.12724542617797852, "learning_rate": 1e-06, "loss": -0.0332, "num_tokens": 272391158.0, "reward": 0.6462053656578064, "reward_std": 0.1444704234600067, "rewards/verify_math_reward/mean": 0.6462053656578064, "rewards/verify_math_reward/std": 0.478413462638855, "step": 456 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0770089285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3373.0, "completions/mean_length": 897.0435791015625, "completions/mean_terminated_length": 630.1414794921875, "completions/min_length": 116.0, "completions/min_terminated_length": 116.0, "epoch": 4.270553935860058, "grad_norm": 0.13616903126239777, "learning_rate": 1e-06, "loss": -0.0444, "num_tokens": 272997917.0, "reward": 0.6104910969734192, "reward_std": 0.17732398211956024, "rewards/verify_math_reward/mean": 0.6104910969734192, "rewards/verify_math_reward/std": 0.48791128396987915, "step": 457 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0680803571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 4093.0, "completions/mean_length": 897.5045166015625, "completions/mean_terminated_length": 663.8419189453125, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 4.279883381924198, "grad_norm": 0.16025783121585846, "learning_rate": 1e-06, "loss": 0.0052, "num_tokens": 273637897.0, "reward": 0.6026785969734192, "reward_std": 0.17649208009243011, "rewards/verify_math_reward/mean": 0.6026785969734192, "rewards/verify_math_reward/std": 0.48961687088012695, "step": 458 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 4088.0, "completions/mean_length": 910.2344360351562, "completions/mean_terminated_length": 640.2542724609375, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 4.289212827988338, "grad_norm": 0.13533426821231842, "learning_rate": 1e-06, "loss": -0.0204, "num_tokens": 274255523.0, "reward": 0.6272321939468384, "reward_std": 0.15349668264389038, "rewards/verify_math_reward/mean": 0.6272321343421936, "rewards/verify_math_reward/std": 0.4838111698627472, "step": 459 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0837053571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3380.0, "completions/mean_length": 970.6027221679688, "completions/mean_terminated_length": 685.0913696289062, "completions/min_length": 153.0, "completions/min_terminated_length": 153.0, "epoch": 4.298542274052478, "grad_norm": 0.14569327235221863, "learning_rate": 1e-06, "loss": -0.0348, "num_tokens": 274918207.0, "reward": 0.5837053656578064, "reward_std": 0.20147305727005005, "rewards/verify_math_reward/mean": 0.5837053656578064, "rewards/verify_math_reward/std": 0.49321892857551575, "step": 460 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0680803571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3801.0, "completions/mean_length": 906.9654541015625, "completions/mean_terminated_length": 673.9940185546875, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 4.307871720116618, "grad_norm": 0.16724221408367157, "learning_rate": 1e-06, "loss": -0.0306, "num_tokens": 275572344.0, "reward": 0.5892857313156128, "reward_std": 0.20227426290512085, "rewards/verify_math_reward/mean": 0.5892857313156128, "rewards/verify_math_reward/std": 0.49223825335502625, "step": 461 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0691964285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3826.0, "completions/mean_length": 835.8058471679688, "completions/mean_terminated_length": 593.4412841796875, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 4.317201166180758, "grad_norm": 0.12606185674667358, "learning_rate": 1e-06, "loss": -0.0281, "num_tokens": 276161930.0, "reward": 0.6808035969734192, "reward_std": 0.12569020688533783, "rewards/verify_math_reward/mean": 0.6808035969734192, "rewards/verify_math_reward/std": 0.4664256274700165, "step": 462 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0870535714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3984.0, "completions/mean_length": 906.0625610351562, "completions/mean_terminated_length": 601.8875732421875, "completions/min_length": 104.0, "completions/min_terminated_length": 104.0, "epoch": 4.326530612244898, "grad_norm": 0.1581316590309143, "learning_rate": 1e-06, "loss": -0.0299, "num_tokens": 276744802.0, "reward": 0.5870535969734192, "reward_std": 0.17735928297042847, "rewards/verify_math_reward/mean": 0.5870535969734192, "rewards/verify_math_reward/std": 0.49263837933540344, "step": 463 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0658482142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3549.0, "completions/mean_length": 858.4408569335938, "completions/mean_terminated_length": 630.2257690429688, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 4.335860058309038, "grad_norm": 0.12013786286115646, "learning_rate": 1e-06, "loss": -0.0447, "num_tokens": 277359117.0, "reward": 0.6629464626312256, "reward_std": 0.15521912276744843, "rewards/verify_math_reward/mean": 0.6629464030265808, "rewards/verify_math_reward/std": 0.47296738624572754, "step": 464 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0558035714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3948.0, "completions/mean_length": 772.7299194335938, "completions/mean_terminated_length": 576.3191528320312, "completions/min_length": 156.0, "completions/min_terminated_length": 156.0, "epoch": 4.345189504373177, "grad_norm": 0.13454978168010712, "learning_rate": 1e-06, "loss": -0.0133, "num_tokens": 277933627.0, "reward": 0.6941964626312256, "reward_std": 0.1457599252462387, "rewards/verify_math_reward/mean": 0.6941964030265808, "rewards/verify_math_reward/std": 0.46100425720214844, "step": 465 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0613839285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3881.0, "completions/mean_length": 824.3705444335938, "completions/mean_terminated_length": 610.411376953125, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 4.354518950437318, "grad_norm": 0.13409213721752167, "learning_rate": 1e-06, "loss": -0.0177, "num_tokens": 278534631.0, "reward": 0.7042410969734192, "reward_std": 0.15631456673145294, "rewards/verify_math_reward/mean": 0.7042410969734192, "rewards/verify_math_reward/std": 0.45663803815841675, "step": 466 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0680803571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3801.0, "completions/mean_length": 825.2745971679688, "completions/mean_terminated_length": 586.3353271484375, "completions/min_length": 164.0, "completions/min_terminated_length": 164.0, "epoch": 4.363848396501457, "grad_norm": 0.11728842556476593, "learning_rate": 1e-06, "loss": -0.0349, "num_tokens": 279111685.0, "reward": 0.6272321939468384, "reward_std": 0.13947828114032745, "rewards/verify_math_reward/mean": 0.6272321343421936, "rewards/verify_math_reward/std": 0.4838111400604248, "step": 467 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0636160714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 4033.0, "completions/mean_length": 842.5625610351562, "completions/mean_terminated_length": 621.5303955078125, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 4.373177842565598, "grad_norm": 0.12964990735054016, "learning_rate": 1e-06, "loss": -0.0382, "num_tokens": 279718773.0, "reward": 0.6584821939468384, "reward_std": 0.14628027379512787, "rewards/verify_math_reward/mean": 0.6584821343421936, "rewards/verify_math_reward/std": 0.4744836091995239, "step": 468 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0613839285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3740.0, "completions/mean_length": 862.4855346679688, "completions/mean_terminated_length": 651.01904296875, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 4.382507288629737, "grad_norm": 0.1491265892982483, "learning_rate": 1e-06, "loss": -0.033, "num_tokens": 280356200.0, "reward": 0.6049107313156128, "reward_std": 0.18385820090770721, "rewards/verify_math_reward/mean": 0.6049107313156128, "rewards/verify_math_reward/std": 0.48914292454719543, "step": 469 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0792410714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3143.0, "completions/mean_length": 939.2310791015625, "completions/mean_terminated_length": 667.5575561523438, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 4.391836734693878, "grad_norm": 0.15070927143096924, "learning_rate": 1e-06, "loss": -0.0101, "num_tokens": 281002951.0, "reward": 0.5625, "reward_std": 0.17544110119342804, "rewards/verify_math_reward/mean": 0.5625, "rewards/verify_math_reward/std": 0.49635544419288635, "step": 470 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0680803571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 4080.0, "completions/mean_length": 888.2835083007812, "completions/mean_terminated_length": 653.9473266601562, "completions/min_length": 178.0, "completions/min_terminated_length": 178.0, "epoch": 4.401166180758017, "grad_norm": 0.13083180785179138, "learning_rate": 1e-06, "loss": -0.0109, "num_tokens": 281646629.0, "reward": 0.6116071939468384, "reward_std": 0.16506867110729218, "rewards/verify_math_reward/mean": 0.6116071343421936, "rewards/verify_math_reward/std": 0.48765692114830017, "step": 471 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0535714285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 4055.0, "completions/mean_length": 801.9420166015625, "completions/mean_terminated_length": 615.48583984375, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 4.410495626822158, "grad_norm": 0.13732460141181946, "learning_rate": 1e-06, "loss": -0.0055, "num_tokens": 282260009.0, "reward": 0.6361607313156128, "reward_std": 0.16326673328876495, "rewards/verify_math_reward/mean": 0.6361607313156128, "rewards/verify_math_reward/std": 0.4813718795776367, "step": 472 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0881696428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 2586.0, "completions/mean_length": 903.263427734375, "completions/mean_terminated_length": 594.541015625, "completions/min_length": 117.0, "completions/min_terminated_length": 117.0, "epoch": 4.419825072886297, "grad_norm": 0.16347962617874146, "learning_rate": 1e-06, "loss": -0.0228, "num_tokens": 282837573.0, "reward": 0.621651828289032, "reward_std": 0.20421750843524933, "rewards/verify_math_reward/mean": 0.6216517686843872, "rewards/verify_math_reward/std": 0.4852459728717804, "step": 473 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0636160714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3732.0, "completions/mean_length": 858.0502319335938, "completions/mean_terminated_length": 638.0703125, "completions/min_length": 164.0, "completions/min_terminated_length": 164.0, "epoch": 4.429154518950437, "grad_norm": 0.14548324048519135, "learning_rate": 1e-06, "loss": -0.0393, "num_tokens": 283463882.0, "reward": 0.6316964626312256, "reward_std": 0.19415000081062317, "rewards/verify_math_reward/mean": 0.6316964030265808, "rewards/verify_math_reward/std": 0.4826137125492096, "step": 474 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0948660714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3960.0, "completions/mean_length": 982.5480346679688, "completions/mean_terminated_length": 656.2305908203125, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 4.438483965014577, "grad_norm": 0.12172096967697144, "learning_rate": 1e-06, "loss": -0.0221, "num_tokens": 284087333.0, "reward": 0.5613839626312256, "reward_std": 0.1447400450706482, "rewards/verify_math_reward/mean": 0.5613839030265808, "rewards/verify_math_reward/std": 0.496494859457016, "step": 475 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0546875, "completions/max_length": 4096.0, "completions/max_terminated_length": 3045.0, "completions/mean_length": 793.3761596679688, "completions/mean_terminated_length": 602.315185546875, "completions/min_length": 160.0, "completions/min_terminated_length": 160.0, "epoch": 4.447813411078717, "grad_norm": 0.14543366432189941, "learning_rate": 1e-06, "loss": -0.0257, "num_tokens": 284686502.0, "reward": 0.6741071939468384, "reward_std": 0.16476556658744812, "rewards/verify_math_reward/mean": 0.6741071343421936, "rewards/verify_math_reward/std": 0.4689692258834839, "step": 476 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0647321428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 2731.0, "completions/mean_length": 817.685302734375, "completions/mean_terminated_length": 590.7852172851562, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 4.457142857142857, "grad_norm": 0.15591642260551453, "learning_rate": 1e-06, "loss": -0.0229, "num_tokens": 285267420.0, "reward": 0.6662946939468384, "reward_std": 0.18889102339744568, "rewards/verify_math_reward/mean": 0.6662946343421936, "rewards/verify_math_reward/std": 0.47179925441741943, "step": 477 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0558035714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3399.0, "completions/mean_length": 852.4230346679688, "completions/mean_terminated_length": 660.7222290039062, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 4.466472303206997, "grad_norm": 0.13825039565563202, "learning_rate": 1e-06, "loss": -0.0378, "num_tokens": 285913887.0, "reward": 0.637276828289032, "reward_std": 0.18907366693019867, "rewards/verify_math_reward/mean": 0.6372767686843872, "rewards/verify_math_reward/std": 0.481054425239563, "step": 478 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0993303571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3001.0, "completions/mean_length": 954.0100708007812, "completions/mean_terminated_length": 607.4956665039062, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 4.475801749271137, "grad_norm": 0.1647224873304367, "learning_rate": 1e-06, "loss": -0.0647, "num_tokens": 286494920.0, "reward": 0.6618303656578064, "reward_std": 0.19554077088832855, "rewards/verify_math_reward/mean": 0.6618303656578064, "rewards/verify_math_reward/std": 0.4733508229255676, "step": 479 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0970982142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4022.0, "completions/mean_length": 981.6897583007812, "completions/mean_terminated_length": 646.7762451171875, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 4.485131195335277, "grad_norm": 0.13880734145641327, "learning_rate": 1e-06, "loss": -0.0264, "num_tokens": 287111986.0, "reward": 0.5848214626312256, "reward_std": 0.16450344026088715, "rewards/verify_math_reward/mean": 0.5848214030265808, "rewards/verify_math_reward/std": 0.49302801489830017, "step": 480 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0870535714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3929.0, "completions/mean_length": 876.583740234375, "completions/mean_terminated_length": 569.5978393554688, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 4.494460641399417, "grad_norm": 0.14553415775299072, "learning_rate": 1e-06, "loss": -0.0336, "num_tokens": 287664693.0, "reward": 0.6049107313156128, "reward_std": 0.16404810547828674, "rewards/verify_math_reward/mean": 0.6049107313156128, "rewards/verify_math_reward/std": 0.48914292454719543, "step": 481 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0948660714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3793.0, "completions/mean_length": 947.739990234375, "completions/mean_terminated_length": 617.7743530273438, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 4.503790087463557, "grad_norm": 0.13704046607017517, "learning_rate": 1e-06, "loss": -0.029, "num_tokens": 288257948.0, "reward": 0.6261160969734192, "reward_std": 0.15980690717697144, "rewards/verify_math_reward/mean": 0.6261160969734192, "rewards/verify_math_reward/std": 0.48410359025001526, "step": 482 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0758928571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 2332.0, "completions/mean_length": 817.3672485351562, "completions/mean_terminated_length": 548.1074829101562, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 4.513119533527696, "grad_norm": 0.13461345434188843, "learning_rate": 1e-06, "loss": -0.0355, "num_tokens": 288791949.0, "reward": 0.6852678656578064, "reward_std": 0.14098599553108215, "rewards/verify_math_reward/mean": 0.6852678656578064, "rewards/verify_math_reward/std": 0.46466848254203796, "step": 483 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0647321428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3147.0, "completions/mean_length": 880.9676513671875, "completions/mean_terminated_length": 658.447509765625, "completions/min_length": 170.0, "completions/min_terminated_length": 170.0, "epoch": 4.522448979591837, "grad_norm": 0.13937199115753174, "learning_rate": 1e-06, "loss": -0.0133, "num_tokens": 289426024.0, "reward": 0.6049107313156128, "reward_std": 0.16619662940502167, "rewards/verify_math_reward/mean": 0.6049107313156128, "rewards/verify_math_reward/std": 0.48914292454719543, "step": 484 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0959821428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3191.0, "completions/mean_length": 985.5792846679688, "completions/mean_terminated_length": 655.3370361328125, "completions/min_length": 153.0, "completions/min_terminated_length": 153.0, "epoch": 4.531778425655976, "grad_norm": 0.14126333594322205, "learning_rate": 1e-06, "loss": -0.0281, "num_tokens": 290049727.0, "reward": 0.5647321939468384, "reward_std": 0.14748378098011017, "rewards/verify_math_reward/mean": 0.5647321343421936, "rewards/verify_math_reward/std": 0.49606895446777344, "step": 485 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0390625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2850.0, "completions/mean_length": 743.3895263671875, "completions/mean_terminated_length": 607.1044921875, "completions/min_length": 186.0, "completions/min_terminated_length": 186.0, "epoch": 4.541107871720117, "grad_norm": 0.13064506649971008, "learning_rate": 1e-06, "loss": 0.0008, "num_tokens": 290650796.0, "reward": 0.6752232313156128, "reward_std": 0.13805679976940155, "rewards/verify_math_reward/mean": 0.6752232313156128, "rewards/verify_math_reward/std": 0.46855294704437256, "step": 486 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0658482142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3197.0, "completions/mean_length": 880.872802734375, "completions/mean_terminated_length": 654.2389526367188, "completions/min_length": 173.0, "completions/min_terminated_length": 173.0, "epoch": 4.550437317784256, "grad_norm": 0.13167732954025269, "learning_rate": 1e-06, "loss": -0.037, "num_tokens": 291284402.0, "reward": 0.6495535969734192, "reward_std": 0.16728220880031586, "rewards/verify_math_reward/mean": 0.6495535969734192, "rewards/verify_math_reward/std": 0.477376252412796, "step": 487 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0658482142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3039.0, "completions/mean_length": 811.8381958007812, "completions/mean_terminated_length": 580.3380737304688, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 4.559766763848397, "grad_norm": 0.15319648385047913, "learning_rate": 1e-06, "loss": -0.0087, "num_tokens": 291856041.0, "reward": 0.7053571939468384, "reward_std": 0.17818161845207214, "rewards/verify_math_reward/mean": 0.7053571343421936, "rewards/verify_math_reward/std": 0.45613667368888855, "step": 488 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1104910714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 4078.0, "completions/mean_length": 1012.21435546875, "completions/mean_terminated_length": 629.1593627929688, "completions/min_length": 81.0, "completions/min_terminated_length": 81.0, "epoch": 4.569096209912536, "grad_norm": 0.142557293176651, "learning_rate": 1e-06, "loss": -0.0411, "num_tokens": 292450921.0, "reward": 0.6171875, "reward_std": 0.17205290496349335, "rewards/verify_math_reward/mean": 0.6171875, "rewards/verify_math_reward/std": 0.4863446056842804, "step": 489 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0859375, "completions/max_length": 4096.0, "completions/max_terminated_length": 2693.0, "completions/mean_length": 946.9230346679688, "completions/mean_terminated_length": 650.85595703125, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 4.578425655976677, "grad_norm": 0.14201919734477997, "learning_rate": 1e-06, "loss": -0.04, "num_tokens": 293081524.0, "reward": 0.5736607313156128, "reward_std": 0.19776137173175812, "rewards/verify_math_reward/mean": 0.5736607313156128, "rewards/verify_math_reward/std": 0.4948205351829529, "step": 490 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0580357142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2370.0, "completions/mean_length": 807.294677734375, "completions/mean_terminated_length": 604.6730346679688, "completions/min_length": 182.0, "completions/min_terminated_length": 182.0, "epoch": 4.587755102040816, "grad_norm": 0.13329870998859406, "learning_rate": 1e-06, "loss": -0.0107, "num_tokens": 293677708.0, "reward": 0.6729910969734192, "reward_std": 0.1462477743625641, "rewards/verify_math_reward/mean": 0.6729910969734192, "rewards/verify_math_reward/std": 0.46938255429267883, "step": 491 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0580357142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3301.0, "completions/mean_length": 820.8638916015625, "completions/mean_terminated_length": 619.0782470703125, "completions/min_length": 114.0, "completions/min_terminated_length": 114.0, "epoch": 4.597084548104956, "grad_norm": 0.1489163190126419, "learning_rate": 1e-06, "loss": -0.022, "num_tokens": 294288514.0, "reward": 0.6428571939468384, "reward_std": 0.19122150540351868, "rewards/verify_math_reward/mean": 0.6428571343421936, "rewards/verify_math_reward/std": 0.47942501306533813, "step": 492 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0837053571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 2537.0, "completions/mean_length": 900.7120971679688, "completions/mean_terminated_length": 608.8161010742188, "completions/min_length": 158.0, "completions/min_terminated_length": 158.0, "epoch": 4.606413994169096, "grad_norm": 0.15610577166080475, "learning_rate": 1e-06, "loss": -0.0386, "num_tokens": 294874144.0, "reward": 0.582589328289032, "reward_std": 0.1680738478899002, "rewards/verify_math_reward/mean": 0.5825892686843872, "rewards/verify_math_reward/std": 0.493407279253006, "step": 493 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0658482142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4082.0, "completions/mean_length": 853.8560791015625, "completions/mean_terminated_length": 625.3178100585938, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 4.615743440233236, "grad_norm": 0.13953043520450592, "learning_rate": 1e-06, "loss": -0.0571, "num_tokens": 295476919.0, "reward": 0.6718750596046448, "reward_std": 0.18077494204044342, "rewards/verify_math_reward/mean": 0.671875, "rewards/verify_math_reward/std": 0.46979284286499023, "step": 494 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0892857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4072.0, "completions/mean_length": 905.6261596679688, "completions/mean_terminated_length": 592.8443603515625, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 4.625072886297376, "grad_norm": 0.14395247399806976, "learning_rate": 1e-06, "loss": -0.0533, "num_tokens": 296046312.0, "reward": 0.6729910969734192, "reward_std": 0.17510268092155457, "rewards/verify_math_reward/mean": 0.6729910969734192, "rewards/verify_math_reward/std": 0.46938255429267883, "step": 495 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1037946428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 2773.0, "completions/mean_length": 977.607177734375, "completions/mean_terminated_length": 616.4483032226562, "completions/min_length": 118.0, "completions/min_terminated_length": 118.0, "epoch": 4.634402332361516, "grad_norm": 0.1408366560935974, "learning_rate": 1e-06, "loss": -0.0168, "num_tokens": 296633080.0, "reward": 0.5948660969734192, "reward_std": 0.17592641711235046, "rewards/verify_math_reward/mean": 0.5948660969734192, "rewards/verify_math_reward/std": 0.49119213223457336, "step": 496 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0345982142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4043.0, "completions/mean_length": 715.794677734375, "completions/mean_terminated_length": 594.654296875, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 4.643731778425656, "grad_norm": 0.12047068029642105, "learning_rate": 1e-06, "loss": -0.0206, "num_tokens": 297230288.0, "reward": 0.6897321939468384, "reward_std": 0.1327543556690216, "rewards/verify_math_reward/mean": 0.6897321343421936, "rewards/verify_math_reward/std": 0.462861567735672, "step": 497 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3825.0, "completions/mean_length": 962.5569458007812, "completions/mean_terminated_length": 638.4076538085938, "completions/min_length": 110.0, "completions/min_terminated_length": 110.0, "epoch": 4.653061224489796, "grad_norm": 0.13730822503566742, "learning_rate": 1e-06, "loss": -0.0552, "num_tokens": 297838211.0, "reward": 0.6495535969734192, "reward_std": 0.1579635739326477, "rewards/verify_math_reward/mean": 0.6495535969734192, "rewards/verify_math_reward/std": 0.477376252412796, "step": 498 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0792410714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3474.0, "completions/mean_length": 886.5357666015625, "completions/mean_terminated_length": 610.3272705078125, "completions/min_length": 171.0, "completions/min_terminated_length": 171.0, "epoch": 4.662390670553936, "grad_norm": 0.15910038352012634, "learning_rate": 1e-06, "loss": -0.0318, "num_tokens": 298432763.0, "reward": 0.6785714626312256, "reward_std": 0.19554010033607483, "rewards/verify_math_reward/mean": 0.6785714030265808, "rewards/verify_math_reward/std": 0.46728572249412537, "step": 499 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0613839285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3054.0, "completions/mean_length": 845.0078735351562, "completions/mean_terminated_length": 632.3983154296875, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 4.671720116618076, "grad_norm": 0.14126543700695038, "learning_rate": 1e-06, "loss": -0.0309, "num_tokens": 299055754.0, "reward": 0.6205357313156128, "reward_std": 0.17494861781597137, "rewards/verify_math_reward/mean": 0.6205357313156128, "rewards/verify_math_reward/std": 0.4855247139930725, "step": 500 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0736607142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2202.0, "completions/mean_length": 889.982177734375, "completions/mean_terminated_length": 635.0457763671875, "completions/min_length": 178.0, "completions/min_terminated_length": 178.0, "epoch": 4.681049562682215, "grad_norm": 0.1432536244392395, "learning_rate": 1e-06, "loss": -0.0299, "num_tokens": 299678042.0, "reward": 0.6171875, "reward_std": 0.1544705480337143, "rewards/verify_math_reward/mean": 0.6171875, "rewards/verify_math_reward/std": 0.4863446056842804, "step": 501 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0814732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3824.0, "completions/mean_length": 939.3839721679688, "completions/mean_terminated_length": 659.3925170898438, "completions/min_length": 158.0, "completions/min_terminated_length": 158.0, "epoch": 4.690379008746356, "grad_norm": 0.14592066407203674, "learning_rate": 1e-06, "loss": -0.034, "num_tokens": 300319298.0, "reward": 0.5703125, "reward_std": 0.17999425530433655, "rewards/verify_math_reward/mean": 0.5703125, "rewards/verify_math_reward/std": 0.49530795216560364, "step": 502 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1004464285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3352.0, "completions/mean_length": 951.3292846679688, "completions/mean_terminated_length": 600.1873168945312, "completions/min_length": 123.0, "completions/min_terminated_length": 123.0, "epoch": 4.699708454810495, "grad_norm": 0.15302255749702454, "learning_rate": 1e-06, "loss": -0.048, "num_tokens": 300897329.0, "reward": 0.6037946939468384, "reward_std": 0.19294606149196625, "rewards/verify_math_reward/mean": 0.6037946343421936, "rewards/verify_math_reward/std": 0.48938122391700745, "step": 503 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0892857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2738.0, "completions/mean_length": 968.7600708007812, "completions/mean_terminated_length": 662.1679077148438, "completions/min_length": 175.0, "completions/min_terminated_length": 175.0, "epoch": 4.709037900874636, "grad_norm": 0.13722142577171326, "learning_rate": 1e-06, "loss": -0.0316, "num_tokens": 301529250.0, "reward": 0.6261160969734192, "reward_std": 0.15699605643749237, "rewards/verify_math_reward/mean": 0.6261160969734192, "rewards/verify_math_reward/std": 0.48410359025001526, "step": 504 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0758928571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3700.0, "completions/mean_length": 903.2154541015625, "completions/mean_terminated_length": 641.0060424804688, "completions/min_length": 173.0, "completions/min_terminated_length": 173.0, "epoch": 4.718367346938775, "grad_norm": 0.1396508514881134, "learning_rate": 1e-06, "loss": -0.0216, "num_tokens": 302154771.0, "reward": 0.6004464626312256, "reward_std": 0.16686920821666718, "rewards/verify_math_reward/mean": 0.6004464030265808, "rewards/verify_math_reward/std": 0.49008017778396606, "step": 505 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0859375, "completions/max_length": 4096.0, "completions/max_terminated_length": 2350.0, "completions/mean_length": 935.7332763671875, "completions/mean_terminated_length": 638.6141967773438, "completions/min_length": 158.0, "completions/min_terminated_length": 158.0, "epoch": 4.727696793002916, "grad_norm": 0.14874345064163208, "learning_rate": 1e-06, "loss": -0.0444, "num_tokens": 302767308.0, "reward": 0.5881696939468384, "reward_std": 0.15905873477458954, "rewards/verify_math_reward/mean": 0.5881696343421936, "rewards/verify_math_reward/std": 0.4924395978450775, "step": 506 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0636160714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3552.0, "completions/mean_length": 817.3080444335938, "completions/mean_terminated_length": 594.5601806640625, "completions/min_length": 157.0, "completions/min_terminated_length": 157.0, "epoch": 4.737026239067055, "grad_norm": 0.14757046103477478, "learning_rate": 1e-06, "loss": -0.0262, "num_tokens": 303364256.0, "reward": 0.6651785969734192, "reward_std": 0.1788567453622818, "rewards/verify_math_reward/mean": 0.6651785969734192, "rewards/verify_math_reward/std": 0.47219157218933105, "step": 507 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0915178571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 4059.0, "completions/mean_length": 994.0245971679688, "completions/mean_terminated_length": 681.54052734375, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 4.746355685131196, "grad_norm": 0.13975664973258972, "learning_rate": 1e-06, "loss": -0.0403, "num_tokens": 304013742.0, "reward": 0.5602678656578064, "reward_std": 0.1728488951921463, "rewards/verify_math_reward/mean": 0.5602678656578064, "rewards/verify_math_reward/std": 0.4966317415237427, "step": 508 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0714285714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3845.0, "completions/mean_length": 857.404052734375, "completions/mean_terminated_length": 608.28125, "completions/min_length": 161.0, "completions/min_terminated_length": 161.0, "epoch": 4.755685131195335, "grad_norm": 0.14102959632873535, "learning_rate": 1e-06, "loss": 0.0005, "num_tokens": 304613456.0, "reward": 0.6584821939468384, "reward_std": 0.14094644784927368, "rewards/verify_math_reward/mean": 0.6584821343421936, "rewards/verify_math_reward/std": 0.4744836091995239, "step": 509 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0558035714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3494.0, "completions/mean_length": 815.8225708007812, "completions/mean_terminated_length": 621.9586181640625, "completions/min_length": 161.0, "completions/min_terminated_length": 161.0, "epoch": 4.765014577259475, "grad_norm": 0.12711749970912933, "learning_rate": 1e-06, "loss": -0.039, "num_tokens": 305224233.0, "reward": 0.723214328289032, "reward_std": 0.16386404633522034, "rewards/verify_math_reward/mean": 0.7232142686843872, "rewards/verify_math_reward/std": 0.44765952229499817, "step": 510 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0859375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3817.0, "completions/mean_length": 955.30810546875, "completions/mean_terminated_length": 660.0293579101562, "completions/min_length": 168.0, "completions/min_terminated_length": 168.0, "epoch": 4.774344023323615, "grad_norm": 0.14884766936302185, "learning_rate": 1e-06, "loss": -0.0012, "num_tokens": 305867781.0, "reward": 0.6082589626312256, "reward_std": 0.19392429292201996, "rewards/verify_math_reward/mean": 0.6082589030265808, "rewards/verify_math_reward/std": 0.48841193318367004, "step": 511 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0703125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3730.0, "completions/mean_length": 818.5111694335938, "completions/mean_terminated_length": 570.6338500976562, "completions/min_length": 158.0, "completions/min_terminated_length": 158.0, "epoch": 4.783673469387755, "grad_norm": 0.13729892671108246, "learning_rate": 1e-06, "loss": -0.02, "num_tokens": 306428167.0, "reward": 0.6718750596046448, "reward_std": 0.1368863582611084, "rewards/verify_math_reward/mean": 0.671875, "rewards/verify_math_reward/std": 0.46979284286499023, "step": 512 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0669642857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3931.0, "completions/mean_length": 859.0357666015625, "completions/mean_terminated_length": 626.7176513671875, "completions/min_length": 163.0, "completions/min_terminated_length": 163.0, "epoch": 4.793002915451895, "grad_norm": 0.13870447874069214, "learning_rate": 1e-06, "loss": -0.0396, "num_tokens": 307044415.0, "reward": 0.5970982313156128, "reward_std": 0.16848501563072205, "rewards/verify_math_reward/mean": 0.5970982313156128, "rewards/verify_math_reward/std": 0.4907552897930145, "step": 513 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0993303571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3639.0, "completions/mean_length": 966.08935546875, "completions/mean_terminated_length": 620.9071044921875, "completions/min_length": 163.0, "completions/min_terminated_length": 163.0, "epoch": 4.802332361516035, "grad_norm": 0.15466636419296265, "learning_rate": 1e-06, "loss": -0.0106, "num_tokens": 307626375.0, "reward": 0.6261160969734192, "reward_std": 0.1811119168996811, "rewards/verify_math_reward/mean": 0.6261160969734192, "rewards/verify_math_reward/std": 0.48410359025001526, "step": 514 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0915178571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3488.0, "completions/mean_length": 966.2667846679688, "completions/mean_terminated_length": 650.9864501953125, "completions/min_length": 158.0, "completions/min_terminated_length": 158.0, "epoch": 4.811661807580175, "grad_norm": 0.15332716703414917, "learning_rate": 1e-06, "loss": -0.0013, "num_tokens": 308253054.0, "reward": 0.582589328289032, "reward_std": 0.176600843667984, "rewards/verify_math_reward/mean": 0.5825892686843872, "rewards/verify_math_reward/std": 0.493407279253006, "step": 515 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0982142857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3642.0, "completions/mean_length": 978.966552734375, "completions/mean_terminated_length": 639.4876098632812, "completions/min_length": 123.0, "completions/min_terminated_length": 123.0, "epoch": 4.820991253644315, "grad_norm": 0.1658165603876114, "learning_rate": 1e-06, "loss": -0.0435, "num_tokens": 308863856.0, "reward": 0.5714285969734192, "reward_std": 0.2142515331506729, "rewards/verify_math_reward/mean": 0.5714285969734192, "rewards/verify_math_reward/std": 0.49514803290367126, "step": 516 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0669642857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4075.0, "completions/mean_length": 874.7545166015625, "completions/mean_terminated_length": 643.5645751953125, "completions/min_length": 171.0, "completions/min_terminated_length": 171.0, "epoch": 4.830320699708455, "grad_norm": 0.14621497690677643, "learning_rate": 1e-06, "loss": -0.0209, "num_tokens": 309490748.0, "reward": 0.6082589626312256, "reward_std": 0.20308955013751984, "rewards/verify_math_reward/mean": 0.6082589030265808, "rewards/verify_math_reward/std": 0.48841196298599243, "step": 517 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0870535714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3166.0, "completions/mean_length": 932.0703735351562, "completions/mean_terminated_length": 630.3753051757812, "completions/min_length": 112.0, "completions/min_terminated_length": 112.0, "epoch": 4.839650145772595, "grad_norm": 0.16321462392807007, "learning_rate": 1e-06, "loss": -0.0138, "num_tokens": 310088707.0, "reward": 0.6361607313156128, "reward_std": 0.14102061092853546, "rewards/verify_math_reward/mean": 0.6361607313156128, "rewards/verify_math_reward/std": 0.4813718795776367, "step": 518 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1004464285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3853.0, "completions/mean_length": 983.9944458007812, "completions/mean_terminated_length": 636.5, "completions/min_length": 182.0, "completions/min_terminated_length": 182.0, "epoch": 4.848979591836734, "grad_norm": 0.1474456787109375, "learning_rate": 1e-06, "loss": -0.0406, "num_tokens": 310684830.0, "reward": 0.6116071939468384, "reward_std": 0.14394910633563995, "rewards/verify_math_reward/mean": 0.6116071343421936, "rewards/verify_math_reward/std": 0.48765692114830017, "step": 519 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0870535714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3515.0, "completions/mean_length": 954.6317138671875, "completions/mean_terminated_length": 655.0880737304688, "completions/min_length": 189.0, "completions/min_terminated_length": 189.0, "epoch": 4.858309037900875, "grad_norm": 0.12145557254552841, "learning_rate": 1e-06, "loss": -0.0311, "num_tokens": 311308524.0, "reward": 0.5814732313156128, "reward_std": 0.15033601224422455, "rewards/verify_math_reward/mean": 0.5814732313156128, "rewards/verify_math_reward/std": 0.4935929775238037, "step": 520 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0647321428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3251.0, "completions/mean_length": 856.9922485351562, "completions/mean_terminated_length": 632.8126831054688, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 4.867638483965014, "grad_norm": 0.13548052310943604, "learning_rate": 1e-06, "loss": -0.034, "num_tokens": 311931709.0, "reward": 0.6473214626312256, "reward_std": 0.16326813399791718, "rewards/verify_math_reward/mean": 0.6473214030265808, "rewards/verify_math_reward/std": 0.47807058691978455, "step": 521 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0703125, "completions/max_length": 4096.0, "completions/max_terminated_length": 2178.0, "completions/mean_length": 810.9475708007812, "completions/mean_terminated_length": 562.4981689453125, "completions/min_length": 96.0, "completions/min_terminated_length": 96.0, "epoch": 4.876967930029155, "grad_norm": 0.15522019565105438, "learning_rate": 1e-06, "loss": -0.0308, "num_tokens": 312485238.0, "reward": 0.6875000596046448, "reward_std": 0.16747654974460602, "rewards/verify_math_reward/mean": 0.6875, "rewards/verify_math_reward/std": 0.4637712836265564, "step": 522 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0558035714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3690.0, "completions/mean_length": 821.794677734375, "completions/mean_terminated_length": 628.28369140625, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 4.886297376093294, "grad_norm": 0.14525139331817627, "learning_rate": 1e-06, "loss": -0.0123, "num_tokens": 313113166.0, "reward": 0.613839328289032, "reward_std": 0.18080954253673553, "rewards/verify_math_reward/mean": 0.6138392686843872, "rewards/verify_math_reward/std": 0.48714008927345276, "step": 523 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0814732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3791.0, "completions/mean_length": 857.7813110351562, "completions/mean_terminated_length": 570.5516357421875, "completions/min_length": 175.0, "completions/min_terminated_length": 175.0, "epoch": 4.895626822157435, "grad_norm": 0.13862133026123047, "learning_rate": 1e-06, "loss": -0.0359, "num_tokens": 313674034.0, "reward": 0.6305803656578064, "reward_std": 0.12805670499801636, "rewards/verify_math_reward/mean": 0.6305803656578064, "rewards/verify_math_reward/std": 0.48291724920272827, "step": 524 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3119.0, "completions/mean_length": 1012.5089721679688, "completions/mean_terminated_length": 693.527099609375, "completions/min_length": 191.0, "completions/min_terminated_length": 191.0, "epoch": 4.904956268221574, "grad_norm": 0.1406036764383316, "learning_rate": 1e-06, "loss": -0.0273, "num_tokens": 314333354.0, "reward": 0.5457589626312256, "reward_std": 0.20298220217227936, "rewards/verify_math_reward/mean": 0.5457589030265808, "rewards/verify_math_reward/std": 0.4981797933578491, "step": 525 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1149553571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3999.0, "completions/mean_length": 1040.7913818359375, "completions/mean_terminated_length": 643.9609375, "completions/min_length": 162.0, "completions/min_terminated_length": 162.0, "epoch": 4.914285714285715, "grad_norm": 0.1542518138885498, "learning_rate": 1e-06, "loss": -0.0497, "num_tokens": 314935831.0, "reward": 0.6004464626312256, "reward_std": 0.1698743999004364, "rewards/verify_math_reward/mean": 0.6004464030265808, "rewards/verify_math_reward/std": 0.49008017778396606, "step": 526 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0859375, "completions/max_length": 4096.0, "completions/max_terminated_length": 2488.0, "completions/mean_length": 903.6641235351562, "completions/mean_terminated_length": 603.5299072265625, "completions/min_length": 122.0, "completions/min_terminated_length": 122.0, "epoch": 4.923615160349854, "grad_norm": 0.1344006061553955, "learning_rate": 1e-06, "loss": -0.0452, "num_tokens": 315519586.0, "reward": 0.5736607313156128, "reward_std": 0.15120504796504974, "rewards/verify_math_reward/mean": 0.5736607313156128, "rewards/verify_math_reward/std": 0.4948205351829529, "step": 527 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0926339285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3122.0, "completions/mean_length": 942.0848388671875, "completions/mean_terminated_length": 620.098388671875, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 4.932944606413994, "grad_norm": 0.15205985307693481, "learning_rate": 1e-06, "loss": -0.0474, "num_tokens": 316112758.0, "reward": 0.6350446939468384, "reward_std": 0.13940368592739105, "rewards/verify_math_reward/mean": 0.6350446343421936, "rewards/verify_math_reward/std": 0.481686532497406, "step": 528 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0591517857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3746.0, "completions/mean_length": 816.1808471679688, "completions/mean_terminated_length": 609.976318359375, "completions/min_length": 176.0, "completions/min_terminated_length": 176.0, "epoch": 4.942274052478134, "grad_norm": 0.14302214980125427, "learning_rate": 1e-06, "loss": -0.0214, "num_tokens": 316707080.0, "reward": 0.6540178656578064, "reward_std": 0.177130326628685, "rewards/verify_math_reward/mean": 0.6540178656578064, "rewards/verify_math_reward/std": 0.4759531021118164, "step": 529 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0714285714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2597.0, "completions/mean_length": 863.3404541015625, "completions/mean_terminated_length": 614.67431640625, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 4.9516034985422746, "grad_norm": 0.15913861989974976, "learning_rate": 1e-06, "loss": -0.0164, "num_tokens": 317308297.0, "reward": 0.6618303656578064, "reward_std": 0.157324880361557, "rewards/verify_math_reward/mean": 0.6618303656578064, "rewards/verify_math_reward/std": 0.4733508229255676, "step": 530 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0669642857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2789.0, "completions/mean_length": 800.0256958007812, "completions/mean_terminated_length": 563.4724731445312, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 4.960932944606414, "grad_norm": 0.12956346571445465, "learning_rate": 1e-06, "loss": -0.0388, "num_tokens": 317863608.0, "reward": 0.7299107313156128, "reward_std": 0.14312425255775452, "rewards/verify_math_reward/mean": 0.7299107313156128, "rewards/verify_math_reward/std": 0.44425368309020996, "step": 531 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3079.0, "completions/mean_length": 933.9152221679688, "completions/mean_terminated_length": 606.8029174804688, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 4.970262390670554, "grad_norm": 0.1410096436738968, "learning_rate": 1e-06, "loss": -0.045, "num_tokens": 318436020.0, "reward": 0.6573660969734192, "reward_std": 0.17942015826702118, "rewards/verify_math_reward/mean": 0.6573660969734192, "rewards/verify_math_reward/std": 0.47485536336898804, "step": 532 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 2926.0, "completions/mean_length": 890.5357666015625, "completions/mean_terminated_length": 618.88623046875, "completions/min_length": 100.0, "completions/min_terminated_length": 100.0, "epoch": 4.979591836734694, "grad_norm": 0.13046663999557495, "learning_rate": 1e-06, "loss": -0.0144, "num_tokens": 319039924.0, "reward": 0.6305803656578064, "reward_std": 0.14714929461479187, "rewards/verify_math_reward/mean": 0.6305803656578064, "rewards/verify_math_reward/std": 0.48291724920272827, "step": 533 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0770089285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3411.0, "completions/mean_length": 894.8928833007812, "completions/mean_terminated_length": 627.8114013671875, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 4.988921282798834, "grad_norm": 0.14756572246551514, "learning_rate": 1e-06, "loss": -0.0265, "num_tokens": 319651332.0, "reward": 0.6004464626312256, "reward_std": 0.1861012876033783, "rewards/verify_math_reward/mean": 0.6004464030265808, "rewards/verify_math_reward/std": 0.49008017778396606, "step": 534 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.07670454545454541, "completions/max_length": 4096.0, "completions/max_terminated_length": 1950.0, "completions/mean_length": 822.45458984375, "completions/mean_terminated_length": 550.4984741210938, "completions/min_length": 111.0, "completions/min_terminated_length": 111.0, "epoch": 4.998250728862974, "grad_norm": 0.1231074184179306, "learning_rate": 1e-06, "loss": -0.0154, "num_tokens": 320228151.0, "reward": 0.625, "reward_std": 0.11835899204015732, "rewards/verify_math_reward/mean": 0.625, "rewards/verify_math_reward/std": 0.48439329862594604, "step": 535 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0758928571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3135.0, "completions/mean_length": 858.0636596679688, "completions/mean_terminated_length": 592.1461181640625, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 5.0093294460641395, "grad_norm": 0.13598649203777313, "learning_rate": 1e-06, "loss": -0.0272, "num_tokens": 320796040.0, "reward": 0.6863839626312256, "reward_std": 0.13519500195980072, "rewards/verify_math_reward/mean": 0.6863839030265808, "rewards/verify_math_reward/std": 0.46422144770622253, "step": 536 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0881696428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3349.0, "completions/mean_length": 936.01904296875, "completions/mean_terminated_length": 630.4639282226562, "completions/min_length": 173.0, "completions/min_terminated_length": 173.0, "epoch": 5.01865889212828, "grad_norm": 0.13071931898593903, "learning_rate": 1e-06, "loss": -0.0181, "num_tokens": 321392849.0, "reward": 0.5837053656578064, "reward_std": 0.14304685592651367, "rewards/verify_math_reward/mean": 0.5837053656578064, "rewards/verify_math_reward/std": 0.49321892857551575, "step": 537 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1037946428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3047.0, "completions/mean_length": 1017.6016235351562, "completions/mean_terminated_length": 661.07470703125, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 5.0279883381924195, "grad_norm": 0.14592930674552917, "learning_rate": 1e-06, "loss": -0.0498, "num_tokens": 322014052.0, "reward": 0.5915178656578064, "reward_std": 0.18532103300094604, "rewards/verify_math_reward/mean": 0.5915178656578064, "rewards/verify_math_reward/std": 0.49182769656181335, "step": 538 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0736607142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2656.0, "completions/mean_length": 851.3750610351562, "completions/mean_terminated_length": 593.3687133789062, "completions/min_length": 104.0, "completions/min_terminated_length": 104.0, "epoch": 5.03731778425656, "grad_norm": 0.1583303064107895, "learning_rate": 1e-06, "loss": -0.022, "num_tokens": 322599156.0, "reward": 0.621651828289032, "reward_std": 0.15417632460594177, "rewards/verify_math_reward/mean": 0.6216517686843872, "rewards/verify_math_reward/std": 0.4852459728717804, "step": 539 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0703125, "completions/max_length": 4096.0, "completions/max_terminated_length": 4071.0, "completions/mean_length": 847.8739013671875, "completions/mean_terminated_length": 602.21728515625, "completions/min_length": 118.0, "completions/min_terminated_length": 118.0, "epoch": 5.0466472303206995, "grad_norm": 0.15164095163345337, "learning_rate": 1e-06, "loss": -0.0335, "num_tokens": 323183915.0, "reward": 0.7008928656578064, "reward_std": 0.1834784299135208, "rewards/verify_math_reward/mean": 0.7008928656578064, "rewards/verify_math_reward/std": 0.4581226110458374, "step": 540 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0691964285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3451.0, "completions/mean_length": 839.5145263671875, "completions/mean_terminated_length": 597.4256591796875, "completions/min_length": 160.0, "completions/min_terminated_length": 160.0, "epoch": 5.05597667638484, "grad_norm": 0.1190546452999115, "learning_rate": 1e-06, "loss": -0.0257, "num_tokens": 323765096.0, "reward": 0.6383928656578064, "reward_std": 0.13275323808193207, "rewards/verify_math_reward/mean": 0.6383928656578064, "rewards/verify_math_reward/std": 0.4807341694831848, "step": 541 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0613839285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 4063.0, "completions/mean_length": 836.075927734375, "completions/mean_terminated_length": 622.8822631835938, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 5.0653061224489795, "grad_norm": 0.1438395380973816, "learning_rate": 1e-06, "loss": -0.0128, "num_tokens": 324371476.0, "reward": 0.6484375, "reward_std": 0.1672803908586502, "rewards/verify_math_reward/mean": 0.6484375, "rewards/verify_math_reward/std": 0.4777248501777649, "step": 542 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0691964285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 4044.0, "completions/mean_length": 838.6585083007812, "completions/mean_terminated_length": 596.5059814453125, "completions/min_length": 167.0, "completions/min_terminated_length": 167.0, "epoch": 5.07463556851312, "grad_norm": 0.1270408034324646, "learning_rate": 1e-06, "loss": -0.013, "num_tokens": 324956410.0, "reward": 0.6316964626312256, "reward_std": 0.1478532999753952, "rewards/verify_math_reward/mean": 0.6316964030265808, "rewards/verify_math_reward/std": 0.4826137125492096, "step": 543 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3538.0, "completions/mean_length": 867.8594360351562, "completions/mean_terminated_length": 594.2881469726562, "completions/min_length": 162.0, "completions/min_terminated_length": 162.0, "epoch": 5.0839650145772595, "grad_norm": 0.13325980305671692, "learning_rate": 1e-06, "loss": -0.0223, "num_tokens": 325540796.0, "reward": 0.6484375, "reward_std": 0.13804681599140167, "rewards/verify_math_reward/mean": 0.6484375, "rewards/verify_math_reward/std": 0.4777248501777649, "step": 544 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0970982142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3920.0, "completions/mean_length": 973.5379638671875, "completions/mean_terminated_length": 637.747802734375, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 5.093294460641399, "grad_norm": 0.12674620747566223, "learning_rate": 1e-06, "loss": -0.0362, "num_tokens": 326155990.0, "reward": 0.543526828289032, "reward_std": 0.14628097414970398, "rewards/verify_math_reward/mean": 0.5435267686843872, "rewards/verify_math_reward/std": 0.49838000535964966, "step": 545 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0736607142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3766.0, "completions/mean_length": 826.5234985351562, "completions/mean_terminated_length": 566.5409545898438, "completions/min_length": 170.0, "completions/min_terminated_length": 170.0, "epoch": 5.1026239067055394, "grad_norm": 0.14388048648834229, "learning_rate": 1e-06, "loss": -0.0136, "num_tokens": 326709187.0, "reward": 0.6651785969734192, "reward_std": 0.14064082503318787, "rewards/verify_math_reward/mean": 0.6651785969734192, "rewards/verify_math_reward/std": 0.47219160199165344, "step": 546 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0669642857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4052.0, "completions/mean_length": 839.1495971679688, "completions/mean_terminated_length": 605.404296875, "completions/min_length": 123.0, "completions/min_terminated_length": 123.0, "epoch": 5.111953352769679, "grad_norm": 0.1343618780374527, "learning_rate": 1e-06, "loss": -0.04, "num_tokens": 327305321.0, "reward": 0.676339328289032, "reward_std": 0.1538725346326828, "rewards/verify_math_reward/mean": 0.6763392686843872, "rewards/verify_math_reward/std": 0.4681335985660553, "step": 547 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.060267857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3394.0, "completions/mean_length": 786.8739013671875, "completions/mean_terminated_length": 574.649658203125, "completions/min_length": 174.0, "completions/min_terminated_length": 174.0, "epoch": 5.121282798833819, "grad_norm": 0.14267843961715698, "learning_rate": 1e-06, "loss": -0.0185, "num_tokens": 327871144.0, "reward": 0.7243303656578064, "reward_std": 0.13839450478553772, "rewards/verify_math_reward/mean": 0.7243303656578064, "rewards/verify_math_reward/std": 0.4471006691455841, "step": 548 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3624.0, "completions/mean_length": 859.0814819335938, "completions/mean_terminated_length": 643.2869262695312, "completions/min_length": 118.0, "completions/min_terminated_length": 118.0, "epoch": 5.130612244897959, "grad_norm": 0.12819762527942657, "learning_rate": 1e-06, "loss": -0.0339, "num_tokens": 328502497.0, "reward": 0.6517857313156128, "reward_std": 0.15105168521404266, "rewards/verify_math_reward/mean": 0.6517857313156128, "rewards/verify_math_reward/std": 0.47667041420936584, "step": 549 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3505.0, "completions/mean_length": 967.5535888671875, "completions/mean_terminated_length": 613.903076171875, "completions/min_length": 159.0, "completions/min_terminated_length": 159.0, "epoch": 5.139941690962099, "grad_norm": 0.14108048379421234, "learning_rate": 1e-06, "loss": -0.034, "num_tokens": 329095945.0, "reward": 0.5412946939468384, "reward_std": 0.1646895855665207, "rewards/verify_math_reward/mean": 0.5412946343421936, "rewards/verify_math_reward/std": 0.49857014417648315, "step": 550 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0758928571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 4018.0, "completions/mean_length": 845.7891235351562, "completions/mean_terminated_length": 578.863525390625, "completions/min_length": 123.0, "completions/min_terminated_length": 123.0, "epoch": 5.149271137026239, "grad_norm": 0.13589175045490265, "learning_rate": 1e-06, "loss": -0.0216, "num_tokens": 329663580.0, "reward": 0.6540178656578064, "reward_std": 0.12967249751091003, "rewards/verify_math_reward/mean": 0.6540178656578064, "rewards/verify_math_reward/std": 0.4759531021118164, "step": 551 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3706.0, "completions/mean_length": 881.1920166015625, "completions/mean_terminated_length": 666.8714599609375, "completions/min_length": 176.0, "completions/min_terminated_length": 176.0, "epoch": 5.158600583090379, "grad_norm": 0.1442086398601532, "learning_rate": 1e-06, "loss": -0.0559, "num_tokens": 330311032.0, "reward": 0.6696428656578064, "reward_std": 0.18426865339279175, "rewards/verify_math_reward/mean": 0.6696428656578064, "rewards/verify_math_reward/std": 0.47060438990592957, "step": 552 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0636160714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3465.0, "completions/mean_length": 824.1585083007812, "completions/mean_terminated_length": 601.8760375976562, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 5.167930029154519, "grad_norm": 0.13595373928546906, "learning_rate": 1e-06, "loss": -0.0322, "num_tokens": 330902230.0, "reward": 0.6506696939468384, "reward_std": 0.1429395079612732, "rewards/verify_math_reward/mean": 0.6506696343421936, "rewards/verify_math_reward/std": 0.47702476382255554, "step": 553 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0948660714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3057.0, "completions/mean_length": 971.66748046875, "completions/mean_terminated_length": 644.2095947265625, "completions/min_length": 182.0, "completions/min_terminated_length": 182.0, "epoch": 5.1772594752186585, "grad_norm": 0.12988536059856415, "learning_rate": 1e-06, "loss": -0.0419, "num_tokens": 331512100.0, "reward": 0.6104910969734192, "reward_std": 0.15266859531402588, "rewards/verify_math_reward/mean": 0.6104910969734192, "rewards/verify_math_reward/std": 0.48791128396987915, "step": 554 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0803571428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 2589.0, "completions/mean_length": 908.65966796875, "completions/mean_terminated_length": 630.1541137695312, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 5.186588921282799, "grad_norm": 0.15173228085041046, "learning_rate": 1e-06, "loss": -0.0365, "num_tokens": 332117251.0, "reward": 0.6696428656578064, "reward_std": 0.1817852407693863, "rewards/verify_math_reward/mean": 0.6696428656578064, "rewards/verify_math_reward/std": 0.47060438990592957, "step": 555 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.056919642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3508.0, "completions/mean_length": 795.2188110351562, "completions/mean_terminated_length": 596.0, "completions/min_length": 153.0, "completions/min_terminated_length": 153.0, "epoch": 5.1959183673469385, "grad_norm": 0.14987683296203613, "learning_rate": 1e-06, "loss": -0.039, "num_tokens": 332711983.0, "reward": 0.6953125596046448, "reward_std": 0.18096107244491577, "rewards/verify_math_reward/mean": 0.6953125, "rewards/verify_math_reward/std": 0.4605320394039154, "step": 556 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0970982142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2917.0, "completions/mean_length": 995.87060546875, "completions/mean_terminated_length": 662.4820556640625, "completions/min_length": 174.0, "completions/min_terminated_length": 174.0, "epoch": 5.205247813411079, "grad_norm": 0.14564700424671173, "learning_rate": 1e-06, "loss": -0.0613, "num_tokens": 333336899.0, "reward": 0.6049107313156128, "reward_std": 0.19107064604759216, "rewards/verify_math_reward/mean": 0.6049107313156128, "rewards/verify_math_reward/std": 0.48914292454719543, "step": 557 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1004464285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2628.0, "completions/mean_length": 945.86279296875, "completions/mean_terminated_length": 594.1104125976562, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 5.214577259475218, "grad_norm": 0.14157523214817047, "learning_rate": 1e-06, "loss": -0.0394, "num_tokens": 333911280.0, "reward": 0.6305803656578064, "reward_std": 0.16686992347240448, "rewards/verify_math_reward/mean": 0.6305803656578064, "rewards/verify_math_reward/std": 0.48291724920272827, "step": 558 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3678.0, "completions/mean_length": 975.755615234375, "completions/mean_terminated_length": 623.0322875976562, "completions/min_length": 137.0, "completions/min_terminated_length": 137.0, "epoch": 5.223906705539359, "grad_norm": 0.1475507616996765, "learning_rate": 1e-06, "loss": -0.0633, "num_tokens": 334498133.0, "reward": 0.6305803656578064, "reward_std": 0.18366950750350952, "rewards/verify_math_reward/mean": 0.6305803656578064, "rewards/verify_math_reward/std": 0.4829172194004059, "step": 559 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1049107142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2293.0, "completions/mean_length": 961.2678833007812, "completions/mean_terminated_length": 593.8554077148438, "completions/min_length": 100.0, "completions/min_terminated_length": 100.0, "epoch": 5.233236151603498, "grad_norm": 0.1454574018716812, "learning_rate": 1e-06, "loss": -0.0422, "num_tokens": 335064125.0, "reward": 0.5736607313156128, "reward_std": 0.13527238368988037, "rewards/verify_math_reward/mean": 0.5736607313156128, "rewards/verify_math_reward/std": 0.4948205351829529, "step": 560 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0892857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3801.0, "completions/mean_length": 919.1116333007812, "completions/mean_terminated_length": 607.6519775390625, "completions/min_length": 156.0, "completions/min_terminated_length": 156.0, "epoch": 5.242565597667639, "grad_norm": 0.13434049487113953, "learning_rate": 1e-06, "loss": -0.0304, "num_tokens": 335651097.0, "reward": 0.6651785969734192, "reward_std": 0.14109477400779724, "rewards/verify_math_reward/mean": 0.6651785969734192, "rewards/verify_math_reward/std": 0.47219160199165344, "step": 561 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0982142857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4050.0, "completions/mean_length": 958.59716796875, "completions/mean_terminated_length": 616.8997192382812, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 5.251895043731778, "grad_norm": 0.14526161551475525, "learning_rate": 1e-06, "loss": -0.0292, "num_tokens": 336234872.0, "reward": 0.613839328289032, "reward_std": 0.15867966413497925, "rewards/verify_math_reward/mean": 0.6138392686843872, "rewards/verify_math_reward/std": 0.48714008927345276, "step": 562 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0948660714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3176.0, "completions/mean_length": 942.2433471679688, "completions/mean_terminated_length": 611.7015991210938, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 5.261224489795918, "grad_norm": 0.15732093155384064, "learning_rate": 1e-06, "loss": -0.0679, "num_tokens": 336816234.0, "reward": 0.5993303656578064, "reward_std": 0.19666732847690582, "rewards/verify_math_reward/mean": 0.5993303656578064, "rewards/verify_math_reward/std": 0.49030786752700806, "step": 563 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1127232142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3807.0, "completions/mean_length": 1053.8560791015625, "completions/mean_terminated_length": 667.3698120117188, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 5.270553935860058, "grad_norm": 0.14700715243816376, "learning_rate": 1e-06, "loss": -0.0676, "num_tokens": 337446441.0, "reward": 0.559151828289032, "reward_std": 0.19974806904792786, "rewards/verify_math_reward/mean": 0.5591517686843872, "rewards/verify_math_reward/std": 0.496766060590744, "step": 564 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0970982142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3499.0, "completions/mean_length": 947.7522583007812, "completions/mean_terminated_length": 609.1890869140625, "completions/min_length": 121.0, "completions/min_terminated_length": 121.0, "epoch": 5.279883381924198, "grad_norm": 0.15658360719680786, "learning_rate": 1e-06, "loss": -0.0453, "num_tokens": 338030283.0, "reward": 0.6116071939468384, "reward_std": 0.1766418069601059, "rewards/verify_math_reward/mean": 0.6116071343421936, "rewards/verify_math_reward/std": 0.4876568913459778, "step": 565 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0770089285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2421.0, "completions/mean_length": 863.8370971679688, "completions/mean_terminated_length": 594.1644897460938, "completions/min_length": 120.0, "completions/min_terminated_length": 120.0, "epoch": 5.289212827988338, "grad_norm": 0.15140020847320557, "learning_rate": 1e-06, "loss": -0.0456, "num_tokens": 338607561.0, "reward": 0.6484375, "reward_std": 0.15916889905929565, "rewards/verify_math_reward/mean": 0.6484375, "rewards/verify_math_reward/std": 0.4777248501777649, "step": 566 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0959821428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 2559.0, "completions/mean_length": 953.05810546875, "completions/mean_terminated_length": 619.3629760742188, "completions/min_length": 163.0, "completions/min_terminated_length": 163.0, "epoch": 5.298542274052478, "grad_norm": 0.14211858808994293, "learning_rate": 1e-06, "loss": -0.0451, "num_tokens": 339194709.0, "reward": 0.6116071939468384, "reward_std": 0.16044698655605316, "rewards/verify_math_reward/mean": 0.6116071343421936, "rewards/verify_math_reward/std": 0.4876568913459778, "step": 567 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3767.0, "completions/mean_length": 886.1897583007812, "completions/mean_terminated_length": 614.1719360351562, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 5.307871720116618, "grad_norm": 0.14483484625816345, "learning_rate": 1e-06, "loss": -0.0294, "num_tokens": 339788775.0, "reward": 0.6149553656578064, "reward_std": 0.1488385647535324, "rewards/verify_math_reward/mean": 0.6149553656578064, "rewards/verify_math_reward/std": 0.4868776500225067, "step": 568 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1071428571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3927.0, "completions/mean_length": 993.4576416015625, "completions/mean_terminated_length": 621.1524658203125, "completions/min_length": 177.0, "completions/min_terminated_length": 177.0, "epoch": 5.317201166180758, "grad_norm": 0.12855836749076843, "learning_rate": 1e-06, "loss": -0.0427, "num_tokens": 340377953.0, "reward": 0.637276828289032, "reward_std": 0.1392500400543213, "rewards/verify_math_reward/mean": 0.6372767686843872, "rewards/verify_math_reward/std": 0.481054425239563, "step": 569 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0870535714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2971.0, "completions/mean_length": 915.40185546875, "completions/mean_terminated_length": 612.1173706054688, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 5.326530612244898, "grad_norm": 0.14327600598335266, "learning_rate": 1e-06, "loss": -0.0175, "num_tokens": 340963633.0, "reward": 0.609375, "reward_std": 0.1397392898797989, "rewards/verify_math_reward/mean": 0.609375, "rewards/verify_math_reward/std": 0.48816296458244324, "step": 570 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0892857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3925.0, "completions/mean_length": 932.4777221679688, "completions/mean_terminated_length": 622.3284301757812, "completions/min_length": 122.0, "completions/min_terminated_length": 122.0, "epoch": 5.335860058309038, "grad_norm": 0.14557726681232452, "learning_rate": 1e-06, "loss": -0.0468, "num_tokens": 341553677.0, "reward": 0.6785714626312256, "reward_std": 0.17315199971199036, "rewards/verify_math_reward/mean": 0.6785714030265808, "rewards/verify_math_reward/std": 0.46728572249412537, "step": 571 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0770089285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2654.0, "completions/mean_length": 871.8303833007812, "completions/mean_terminated_length": 602.82470703125, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 5.345189504373177, "grad_norm": 0.1359301060438156, "learning_rate": 1e-06, "loss": -0.0419, "num_tokens": 342144901.0, "reward": 0.5959821939468384, "reward_std": 0.157290980219841, "rewards/verify_math_reward/mean": 0.5959821343421936, "rewards/verify_math_reward/std": 0.490975022315979, "step": 572 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0792410714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 4062.0, "completions/mean_length": 871.1339721679688, "completions/mean_terminated_length": 593.5999755859375, "completions/min_length": 81.0, "completions/min_terminated_length": 81.0, "epoch": 5.354518950437318, "grad_norm": 0.1599980890750885, "learning_rate": 1e-06, "loss": -0.0469, "num_tokens": 342729197.0, "reward": 0.6640625, "reward_std": 0.17502851784229279, "rewards/verify_math_reward/mean": 0.6640625, "rewards/verify_math_reward/std": 0.4725809693336487, "step": 573 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0714285714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2673.0, "completions/mean_length": 794.0379638671875, "completions/mean_terminated_length": 540.0408935546875, "completions/min_length": 162.0, "completions/min_terminated_length": 162.0, "epoch": 5.363848396501457, "grad_norm": 0.14998947083950043, "learning_rate": 1e-06, "loss": -0.0202, "num_tokens": 343269063.0, "reward": 0.6261160969734192, "reward_std": 0.14170026779174805, "rewards/verify_math_reward/mean": 0.6261160969734192, "rewards/verify_math_reward/std": 0.48410359025001526, "step": 574 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0837053571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3654.0, "completions/mean_length": 961.2656860351562, "completions/mean_terminated_length": 674.9013671875, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 5.373177842565598, "grad_norm": 0.13110540807247162, "learning_rate": 1e-06, "loss": -0.0179, "num_tokens": 343915637.0, "reward": 0.6082589626312256, "reward_std": 0.1699492633342743, "rewards/verify_math_reward/mean": 0.6082589030265808, "rewards/verify_math_reward/std": 0.48841196298599243, "step": 575 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0792410714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3127.0, "completions/mean_length": 881.825927734375, "completions/mean_terminated_length": 605.2120971679688, "completions/min_length": 160.0, "completions/min_terminated_length": 160.0, "epoch": 5.382507288629737, "grad_norm": 0.14458364248275757, "learning_rate": 1e-06, "loss": -0.031, "num_tokens": 344511481.0, "reward": 0.6395089626312256, "reward_std": 0.15030533075332642, "rewards/verify_math_reward/mean": 0.6395089030265808, "rewards/verify_math_reward/std": 0.4804111421108246, "step": 576 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0792410714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 4036.0, "completions/mean_length": 910.7310791015625, "completions/mean_terminated_length": 636.6048583984375, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 5.391836734693878, "grad_norm": 0.171955406665802, "learning_rate": 1e-06, "loss": -0.0286, "num_tokens": 345123256.0, "reward": 0.6875000596046448, "reward_std": 0.13508693873882294, "rewards/verify_math_reward/mean": 0.6875, "rewards/verify_math_reward/std": 0.4637712836265564, "step": 577 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0770089285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2465.0, "completions/mean_length": 869.3939819335938, "completions/mean_terminated_length": 600.1849975585938, "completions/min_length": 153.0, "completions/min_terminated_length": 153.0, "epoch": 5.401166180758017, "grad_norm": 0.13843972980976105, "learning_rate": 1e-06, "loss": -0.0194, "num_tokens": 345715233.0, "reward": 0.6473214626312256, "reward_std": 0.1371905654668808, "rewards/verify_math_reward/mean": 0.6473214030265808, "rewards/verify_math_reward/std": 0.47807061672210693, "step": 578 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1049107142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3303.0, "completions/mean_length": 992.16748046875, "completions/mean_terminated_length": 628.3765869140625, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 5.410495626822158, "grad_norm": 0.18873398005962372, "learning_rate": 1e-06, "loss": -0.0229, "num_tokens": 346313231.0, "reward": 0.5926339626312256, "reward_std": 0.1828792542219162, "rewards/verify_math_reward/mean": 0.5926339030265808, "rewards/verify_math_reward/std": 0.49161845445632935, "step": 579 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0680803571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3424.0, "completions/mean_length": 856.6808471679688, "completions/mean_terminated_length": 620.0359497070312, "completions/min_length": 158.0, "completions/min_terminated_length": 158.0, "epoch": 5.419825072886297, "grad_norm": 0.14334967732429504, "learning_rate": 1e-06, "loss": -0.0068, "num_tokens": 346916697.0, "reward": 0.660714328289032, "reward_std": 0.1626298427581787, "rewards/verify_math_reward/mean": 0.6607142686843872, "rewards/verify_math_reward/std": 0.4737313687801361, "step": 580 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3971.0, "completions/mean_length": 867.802490234375, "completions/mean_terminated_length": 594.2264404296875, "completions/min_length": 170.0, "completions/min_terminated_length": 170.0, "epoch": 5.429154518950437, "grad_norm": 0.11689605563879013, "learning_rate": 1e-06, "loss": -0.0325, "num_tokens": 347493752.0, "reward": 0.6283482313156128, "reward_std": 0.10968157649040222, "rewards/verify_math_reward/mean": 0.6283482313156128, "rewards/verify_math_reward/std": 0.4835159480571747, "step": 581 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0837053571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 4056.0, "completions/mean_length": 933.2210083007812, "completions/mean_terminated_length": 644.2947998046875, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 5.438483965014577, "grad_norm": 0.12936632335186005, "learning_rate": 1e-06, "loss": -0.0147, "num_tokens": 348104246.0, "reward": 0.6640625, "reward_std": 0.1634860783815384, "rewards/verify_math_reward/mean": 0.6640625, "rewards/verify_math_reward/std": 0.4725809693336487, "step": 582 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0792410714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3923.0, "completions/mean_length": 874.5904541015625, "completions/mean_terminated_length": 597.3539428710938, "completions/min_length": 157.0, "completions/min_terminated_length": 157.0, "epoch": 5.447813411078717, "grad_norm": 0.16096006333827972, "learning_rate": 1e-06, "loss": -0.0344, "num_tokens": 348679951.0, "reward": 0.6517857313156128, "reward_std": 0.1748744547367096, "rewards/verify_math_reward/mean": 0.6517857313156128, "rewards/verify_math_reward/std": 0.47667041420936584, "step": 583 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0736607142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2761.0, "completions/mean_length": 848.8047485351562, "completions/mean_terminated_length": 590.593994140625, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 5.457142857142857, "grad_norm": 0.13853366672992706, "learning_rate": 1e-06, "loss": -0.0398, "num_tokens": 349254784.0, "reward": 0.6238839626312256, "reward_std": 0.17081758379936218, "rewards/verify_math_reward/mean": 0.6238839030265808, "rewards/verify_math_reward/std": 0.48468026518821716, "step": 584 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2573.0, "completions/mean_length": 974.8248291015625, "completions/mean_terminated_length": 621.9962768554688, "completions/min_length": 162.0, "completions/min_terminated_length": 162.0, "epoch": 5.466472303206997, "grad_norm": 0.1280086636543274, "learning_rate": 1e-06, "loss": -0.0426, "num_tokens": 349852459.0, "reward": 0.598214328289032, "reward_std": 0.13921892642974854, "rewards/verify_math_reward/mean": 0.5982142686843872, "rewards/verify_math_reward/std": 0.49053290486335754, "step": 585 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3792.0, "completions/mean_length": 865.4520263671875, "completions/mean_terminated_length": 591.6767578125, "completions/min_length": 195.0, "completions/min_terminated_length": 195.0, "epoch": 5.475801749271137, "grad_norm": 0.15977877378463745, "learning_rate": 1e-06, "loss": -0.017, "num_tokens": 350428920.0, "reward": 0.6383928656578064, "reward_std": 0.1852443516254425, "rewards/verify_math_reward/mean": 0.6383928656578064, "rewards/verify_math_reward/std": 0.4807341694831848, "step": 586 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0803571428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 2997.0, "completions/mean_length": 929.4063110351562, "completions/mean_terminated_length": 652.713623046875, "completions/min_length": 156.0, "completions/min_terminated_length": 156.0, "epoch": 5.485131195335277, "grad_norm": 0.1367771029472351, "learning_rate": 1e-06, "loss": -0.0308, "num_tokens": 351065372.0, "reward": 0.5814732313156128, "reward_std": 0.15942853689193726, "rewards/verify_math_reward/mean": 0.5814732313156128, "rewards/verify_math_reward/std": 0.4935929775238037, "step": 587 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1227678571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3689.0, "completions/mean_length": 1064.630615234375, "completions/mean_terminated_length": 640.3931274414062, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 5.494460641399417, "grad_norm": 0.14162762463092804, "learning_rate": 1e-06, "loss": -0.027, "num_tokens": 351666393.0, "reward": 0.5959821939468384, "reward_std": 0.15916681289672852, "rewards/verify_math_reward/mean": 0.5959821343421936, "rewards/verify_math_reward/std": 0.490975022315979, "step": 588 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0758928571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3185.0, "completions/mean_length": 873.7422485351562, "completions/mean_terminated_length": 609.1123046875, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 5.503790087463557, "grad_norm": 0.14134083688259125, "learning_rate": 1e-06, "loss": -0.0427, "num_tokens": 352256394.0, "reward": 0.6830357313156128, "reward_std": 0.1574750393629074, "rewards/verify_math_reward/mean": 0.6830357313156128, "rewards/verify_math_reward/std": 0.46555325388908386, "step": 589 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.060267857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 1970.0, "completions/mean_length": 777.568115234375, "completions/mean_terminated_length": 564.7470703125, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 5.513119533527696, "grad_norm": 0.15470553934574127, "learning_rate": 1e-06, "loss": -0.0376, "num_tokens": 352815631.0, "reward": 0.6540178656578064, "reward_std": 0.16101224720478058, "rewards/verify_math_reward/mean": 0.6540178656578064, "rewards/verify_math_reward/std": 0.4759531021118164, "step": 590 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1071428571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3057.0, "completions/mean_length": 985.7645263671875, "completions/mean_terminated_length": 612.5362548828125, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 5.522448979591837, "grad_norm": 0.15244746208190918, "learning_rate": 1e-06, "loss": -0.0393, "num_tokens": 353401740.0, "reward": 0.6183035969734192, "reward_std": 0.17235924303531647, "rewards/verify_math_reward/mean": 0.6183035969734192, "rewards/verify_math_reward/std": 0.4860740303993225, "step": 591 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0669642857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2796.0, "completions/mean_length": 849.8058471679688, "completions/mean_terminated_length": 616.8253173828125, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 5.531778425655976, "grad_norm": 0.13224495947360992, "learning_rate": 1e-06, "loss": -0.044, "num_tokens": 354018638.0, "reward": 0.652901828289032, "reward_std": 0.16323533654212952, "rewards/verify_math_reward/mean": 0.6529017686843872, "rewards/verify_math_reward/std": 0.47631317377090454, "step": 592 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3486.0, "completions/mean_length": 911.5011596679688, "completions/mean_terminated_length": 582.0701904296875, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 5.541107871720117, "grad_norm": 0.14421240985393524, "learning_rate": 1e-06, "loss": -0.0531, "num_tokens": 354587599.0, "reward": 0.6696428656578064, "reward_std": 0.1471051275730133, "rewards/verify_math_reward/mean": 0.6696428656578064, "rewards/verify_math_reward/std": 0.47060438990592957, "step": 593 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1216517857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3826.0, "completions/mean_length": 1075.540283203125, "completions/mean_terminated_length": 657.2045288085938, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 5.550437317784256, "grad_norm": 0.16855870187282562, "learning_rate": 1e-06, "loss": -0.083, "num_tokens": 355189395.0, "reward": 0.6049107313156128, "reward_std": 0.20005299150943756, "rewards/verify_math_reward/mean": 0.6049107313156128, "rewards/verify_math_reward/std": 0.48914289474487305, "step": 594 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1049107142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3282.0, "completions/mean_length": 1002.5625610351562, "completions/mean_terminated_length": 639.9900512695312, "completions/min_length": 115.0, "completions/min_terminated_length": 115.0, "epoch": 5.559766763848397, "grad_norm": 0.17673636972904205, "learning_rate": 1e-06, "loss": -0.0474, "num_tokens": 355809707.0, "reward": 0.5602678656578064, "reward_std": 0.18821631371974945, "rewards/verify_math_reward/mean": 0.5602678656578064, "rewards/verify_math_reward/std": 0.4966317415237427, "step": 595 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1049107142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2931.0, "completions/mean_length": 989.51123046875, "completions/mean_terminated_length": 625.4089965820312, "completions/min_length": 170.0, "completions/min_terminated_length": 170.0, "epoch": 5.569096209912536, "grad_norm": 0.1684071272611618, "learning_rate": 1e-06, "loss": -0.0339, "num_tokens": 356387085.0, "reward": 0.6238839626312256, "reward_std": 0.1985434591770172, "rewards/verify_math_reward/mean": 0.6238839030265808, "rewards/verify_math_reward/std": 0.4846802353858948, "step": 596 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0714285714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2428.0, "completions/mean_length": 852.7142944335938, "completions/mean_terminated_length": 603.2307739257812, "completions/min_length": 156.0, "completions/min_terminated_length": 156.0, "epoch": 5.578425655976677, "grad_norm": 0.14479760825634003, "learning_rate": 1e-06, "loss": -0.0301, "num_tokens": 356966557.0, "reward": 0.6473214626312256, "reward_std": 0.1407930701971054, "rewards/verify_math_reward/mean": 0.6473214030265808, "rewards/verify_math_reward/std": 0.47807061672210693, "step": 597 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0881696428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 2759.0, "completions/mean_length": 927.8638916015625, "completions/mean_terminated_length": 621.5202026367188, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 5.587755102040816, "grad_norm": 0.15837636590003967, "learning_rate": 1e-06, "loss": -0.0301, "num_tokens": 357558851.0, "reward": 0.598214328289032, "reward_std": 0.17728371918201447, "rewards/verify_math_reward/mean": 0.5982142686843872, "rewards/verify_math_reward/std": 0.49053290486335754, "step": 598 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1116071428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3263.0, "completions/mean_length": 997.7422485351562, "completions/mean_terminated_length": 608.5137939453125, "completions/min_length": 167.0, "completions/min_terminated_length": 167.0, "epoch": 5.597084548104956, "grad_norm": 0.1677587926387787, "learning_rate": 1e-06, "loss": -0.0465, "num_tokens": 358134820.0, "reward": 0.5870535969734192, "reward_std": 0.18231727182865143, "rewards/verify_math_reward/mean": 0.5870535969734192, "rewards/verify_math_reward/std": 0.49263834953308105, "step": 599 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1149553571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 2665.0, "completions/mean_length": 1024.2623291015625, "completions/mean_terminated_length": 625.2849731445312, "completions/min_length": 162.0, "completions/min_terminated_length": 162.0, "epoch": 5.606413994169096, "grad_norm": 0.15459483861923218, "learning_rate": 1e-06, "loss": -0.0294, "num_tokens": 358717983.0, "reward": 0.6049107313156128, "reward_std": 0.16773755848407745, "rewards/verify_math_reward/mean": 0.6049107313156128, "rewards/verify_math_reward/std": 0.48914292454719543, "step": 600 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.052455357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2661.0, "completions/mean_length": 812.6585083007812, "completions/mean_terminated_length": 630.8952026367188, "completions/min_length": 157.0, "completions/min_terminated_length": 157.0, "epoch": 5.615743440233236, "grad_norm": 0.1608906388282776, "learning_rate": 1e-06, "loss": -0.0203, "num_tokens": 359342053.0, "reward": 0.6551339626312256, "reward_std": 0.19813409447669983, "rewards/verify_math_reward/mean": 0.6551339030265808, "rewards/verify_math_reward/std": 0.4755900502204895, "step": 601 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0636160714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3789.0, "completions/mean_length": 817.8248291015625, "completions/mean_terminated_length": 595.112060546875, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 5.625072886297376, "grad_norm": 0.13706611096858978, "learning_rate": 1e-06, "loss": -0.0247, "num_tokens": 359920952.0, "reward": 0.6752232313156128, "reward_std": 0.16265869140625, "rewards/verify_math_reward/mean": 0.6752232313156128, "rewards/verify_math_reward/std": 0.46855294704437256, "step": 602 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0803571428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3890.0, "completions/mean_length": 894.86279296875, "completions/mean_terminated_length": 615.1517333984375, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 5.634402332361516, "grad_norm": 0.16253483295440674, "learning_rate": 1e-06, "loss": -0.0182, "num_tokens": 360522613.0, "reward": 0.6830357313156128, "reward_std": 0.18742607533931732, "rewards/verify_math_reward/mean": 0.6830357313156128, "rewards/verify_math_reward/std": 0.46555325388908386, "step": 603 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0558035714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2333.0, "completions/mean_length": 820.6361694335938, "completions/mean_terminated_length": 627.0567626953125, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 5.643731778425656, "grad_norm": 0.14758019149303436, "learning_rate": 1e-06, "loss": -0.0292, "num_tokens": 361139543.0, "reward": 0.6551339626312256, "reward_std": 0.1923847645521164, "rewards/verify_math_reward/mean": 0.6551339030265808, "rewards/verify_math_reward/std": 0.4755900502204895, "step": 604 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0837053571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3689.0, "completions/mean_length": 901.40185546875, "completions/mean_terminated_length": 609.56884765625, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 5.653061224489796, "grad_norm": 0.13753700256347656, "learning_rate": 1e-06, "loss": -0.0462, "num_tokens": 361729487.0, "reward": 0.6506696939468384, "reward_std": 0.17171913385391235, "rewards/verify_math_reward/mean": 0.6506696343421936, "rewards/verify_math_reward/std": 0.47702476382255554, "step": 605 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0837053571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3492.0, "completions/mean_length": 891.83154296875, "completions/mean_terminated_length": 599.124267578125, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 5.662390670553936, "grad_norm": 0.17821872234344482, "learning_rate": 1e-06, "loss": -0.032, "num_tokens": 362308552.0, "reward": 0.660714328289032, "reward_std": 0.21707123517990112, "rewards/verify_math_reward/mean": 0.6607142686843872, "rewards/verify_math_reward/std": 0.4737313687801361, "step": 606 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0859375, "completions/max_length": 4096.0, "completions/max_terminated_length": 2894.0, "completions/mean_length": 888.2053833007812, "completions/mean_terminated_length": 586.6178588867188, "completions/min_length": 137.0, "completions/min_terminated_length": 137.0, "epoch": 5.671720116618076, "grad_norm": 0.14055386185646057, "learning_rate": 1e-06, "loss": -0.039, "num_tokens": 362875784.0, "reward": 0.6930803656578064, "reward_std": 0.14669284224510193, "rewards/verify_math_reward/mean": 0.6930803656578064, "rewards/verify_math_reward/std": 0.46147334575653076, "step": 607 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0535714285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2822.0, "completions/mean_length": 765.9252319335938, "completions/mean_terminated_length": 577.430419921875, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 5.681049562682215, "grad_norm": 0.11932545900344849, "learning_rate": 1e-06, "loss": -0.0173, "num_tokens": 363459509.0, "reward": 0.6897321939468384, "reward_std": 0.11712367087602615, "rewards/verify_math_reward/mean": 0.6897321343421936, "rewards/verify_math_reward/std": 0.4628615975379944, "step": 608 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0859375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3767.0, "completions/mean_length": 926.708740234375, "completions/mean_terminated_length": 628.7411499023438, "completions/min_length": 93.0, "completions/min_terminated_length": 93.0, "epoch": 5.690379008746356, "grad_norm": 0.15615519881248474, "learning_rate": 1e-06, "loss": -0.0428, "num_tokens": 364063832.0, "reward": 0.5915178656578064, "reward_std": 0.18648220598697662, "rewards/verify_math_reward/mean": 0.5915178656578064, "rewards/verify_math_reward/std": 0.49182769656181335, "step": 609 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.060267857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3095.0, "completions/mean_length": 774.9642944335938, "completions/mean_terminated_length": 561.9762573242188, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 5.699708454810495, "grad_norm": 0.1408005803823471, "learning_rate": 1e-06, "loss": -0.0312, "num_tokens": 364628504.0, "reward": 0.6819196939468384, "reward_std": 0.149286150932312, "rewards/verify_math_reward/mean": 0.6819196343421936, "rewards/verify_math_reward/std": 0.46599099040031433, "step": 610 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0680803571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3978.0, "completions/mean_length": 851.0123291015625, "completions/mean_terminated_length": 613.9533081054688, "completions/min_length": 119.0, "completions/min_terminated_length": 119.0, "epoch": 5.709037900874636, "grad_norm": 0.13900849223136902, "learning_rate": 1e-06, "loss": -0.0322, "num_tokens": 365236555.0, "reward": 0.6361607313156128, "reward_std": 0.16037030518054962, "rewards/verify_math_reward/mean": 0.6361607313156128, "rewards/verify_math_reward/std": 0.4813718795776367, "step": 611 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1127232142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3284.0, "completions/mean_length": 1006.2589721679688, "completions/mean_terminated_length": 613.7257690429688, "completions/min_length": 170.0, "completions/min_terminated_length": 170.0, "epoch": 5.718367346938775, "grad_norm": 0.16850095987319946, "learning_rate": 1e-06, "loss": -0.0764, "num_tokens": 365824115.0, "reward": 0.6506696939468384, "reward_std": 0.18874019384384155, "rewards/verify_math_reward/mean": 0.6506696343421936, "rewards/verify_math_reward/std": 0.47702476382255554, "step": 612 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0870535714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3931.0, "completions/mean_length": 891.1272583007812, "completions/mean_terminated_length": 585.5281372070312, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 5.727696793002916, "grad_norm": 0.13965724408626556, "learning_rate": 1e-06, "loss": -0.0406, "num_tokens": 366392733.0, "reward": 0.6428571939468384, "reward_std": 0.1321905255317688, "rewards/verify_math_reward/mean": 0.6428571343421936, "rewards/verify_math_reward/std": 0.47942501306533813, "step": 613 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0658482142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2521.0, "completions/mean_length": 823.0614013671875, "completions/mean_terminated_length": 592.3524169921875, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 5.737026239067055, "grad_norm": 0.14950597286224365, "learning_rate": 1e-06, "loss": -0.0333, "num_tokens": 366982268.0, "reward": 0.6484375, "reward_std": 0.16465751826763153, "rewards/verify_math_reward/mean": 0.6484375, "rewards/verify_math_reward/std": 0.4777248501777649, "step": 614 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0647321428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 2915.0, "completions/mean_length": 821.654052734375, "completions/mean_terminated_length": 595.0286865234375, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 5.746355685131196, "grad_norm": 0.14539076387882233, "learning_rate": 1e-06, "loss": -0.0274, "num_tokens": 367569398.0, "reward": 0.6082589626312256, "reward_std": 0.17461413145065308, "rewards/verify_math_reward/mean": 0.6082589030265808, "rewards/verify_math_reward/std": 0.4884119927883148, "step": 615 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0770089285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3374.0, "completions/mean_length": 858.5859985351562, "completions/mean_terminated_length": 588.4752197265625, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 5.755685131195335, "grad_norm": 0.13074429333209991, "learning_rate": 1e-06, "loss": -0.0385, "num_tokens": 368148899.0, "reward": 0.6707589626312256, "reward_std": 0.1418115496635437, "rewards/verify_math_reward/mean": 0.6707589030265808, "rewards/verify_math_reward/std": 0.4702001214027405, "step": 616 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0647321428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 2440.0, "completions/mean_length": 815.708740234375, "completions/mean_terminated_length": 588.671875, "completions/min_length": 112.0, "completions/min_terminated_length": 112.0, "epoch": 5.765014577259475, "grad_norm": 0.16770143806934357, "learning_rate": 1e-06, "loss": -0.0411, "num_tokens": 368728542.0, "reward": 0.6573660969734192, "reward_std": 0.1802103966474533, "rewards/verify_math_reward/mean": 0.6573660969734192, "rewards/verify_math_reward/std": 0.47485533356666565, "step": 617 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0982142857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2782.0, "completions/mean_length": 987.958740234375, "completions/mean_terminated_length": 649.4591674804688, "completions/min_length": 162.0, "completions/min_terminated_length": 162.0, "epoch": 5.774344023323615, "grad_norm": 0.13639651238918304, "learning_rate": 1e-06, "loss": -0.0538, "num_tokens": 369337593.0, "reward": 0.6674107313156128, "reward_std": 0.1692018061876297, "rewards/verify_math_reward/mean": 0.6674107313156128, "rewards/verify_math_reward/std": 0.47140392661094666, "step": 618 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0948660714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 4059.0, "completions/mean_length": 927.35498046875, "completions/mean_terminated_length": 595.2527465820312, "completions/min_length": 159.0, "completions/min_terminated_length": 159.0, "epoch": 5.783673469387755, "grad_norm": 0.13305982947349548, "learning_rate": 1e-06, "loss": -0.0333, "num_tokens": 369909847.0, "reward": 0.640625, "reward_std": 0.12426057457923889, "rewards/verify_math_reward/mean": 0.640625, "rewards/verify_math_reward/std": 0.48008525371551514, "step": 619 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0770089285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3511.0, "completions/mean_length": 833.2511596679688, "completions/mean_terminated_length": 561.026611328125, "completions/min_length": 161.0, "completions/min_terminated_length": 161.0, "epoch": 5.793002915451895, "grad_norm": 0.14414291083812714, "learning_rate": 1e-06, "loss": -0.0495, "num_tokens": 370470696.0, "reward": 0.6584821939468384, "reward_std": 0.15962466597557068, "rewards/verify_math_reward/mean": 0.6584821343421936, "rewards/verify_math_reward/std": 0.4744836091995239, "step": 620 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0825892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2733.0, "completions/mean_length": 896.3136596679688, "completions/mean_terminated_length": 608.2639770507812, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 5.802332361516035, "grad_norm": 0.1310935616493225, "learning_rate": 1e-06, "loss": -0.0325, "num_tokens": 371057769.0, "reward": 0.6350446939468384, "reward_std": 0.14684367179870605, "rewards/verify_math_reward/mean": 0.6350446343421936, "rewards/verify_math_reward/std": 0.481686532497406, "step": 621 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0736607142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3498.0, "completions/mean_length": 882.8281860351562, "completions/mean_terminated_length": 627.3228759765625, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 5.811661807580175, "grad_norm": 0.13719427585601807, "learning_rate": 1e-06, "loss": -0.0353, "num_tokens": 371662655.0, "reward": 0.668526828289032, "reward_std": 0.16405907273292542, "rewards/verify_math_reward/mean": 0.6685267686843872, "rewards/verify_math_reward/std": 0.4710056483745575, "step": 622 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1004464285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3710.0, "completions/mean_length": 1014.232177734375, "completions/mean_terminated_length": 670.1141357421875, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 5.820991253644315, "grad_norm": 0.14407393336296082, "learning_rate": 1e-06, "loss": -0.037, "num_tokens": 372303167.0, "reward": 0.6227678656578064, "reward_std": 0.1522217094898224, "rewards/verify_math_reward/mean": 0.6227678656578064, "rewards/verify_math_reward/std": 0.4849644899368286, "step": 623 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0837053571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3407.0, "completions/mean_length": 933.95654296875, "completions/mean_terminated_length": 645.0974731445312, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 5.830320699708455, "grad_norm": 0.14550761878490448, "learning_rate": 1e-06, "loss": -0.0548, "num_tokens": 372934568.0, "reward": 0.609375, "reward_std": 0.16999204456806183, "rewards/verify_math_reward/mean": 0.609375, "rewards/verify_math_reward/std": 0.48816296458244324, "step": 624 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0736607142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3052.0, "completions/mean_length": 844.0904541015625, "completions/mean_terminated_length": 585.5048217773438, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 5.839650145772595, "grad_norm": 0.14165332913398743, "learning_rate": 1e-06, "loss": -0.0429, "num_tokens": 373500169.0, "reward": 0.7087053656578064, "reward_std": 0.1527032107114792, "rewards/verify_math_reward/mean": 0.7087053656578064, "rewards/verify_math_reward/std": 0.45461273193359375, "step": 625 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0479910714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3868.0, "completions/mean_length": 763.7120971679688, "completions/mean_terminated_length": 595.7303466796875, "completions/min_length": 164.0, "completions/min_terminated_length": 164.0, "epoch": 5.848979591836734, "grad_norm": 0.12370767444372177, "learning_rate": 1e-06, "loss": -0.0117, "num_tokens": 374088303.0, "reward": 0.6640625, "reward_std": 0.11896559596061707, "rewards/verify_math_reward/mean": 0.6640625, "rewards/verify_math_reward/std": 0.4725809693336487, "step": 626 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0691964285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3027.0, "completions/mean_length": 820.0826416015625, "completions/mean_terminated_length": 576.5491943359375, "completions/min_length": 123.0, "completions/min_terminated_length": 123.0, "epoch": 5.858309037900875, "grad_norm": 0.17080122232437134, "learning_rate": 1e-06, "loss": -0.0177, "num_tokens": 374664105.0, "reward": 0.6741071939468384, "reward_std": 0.1446651816368103, "rewards/verify_math_reward/mean": 0.6741071343421936, "rewards/verify_math_reward/std": 0.4689692556858063, "step": 627 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0915178571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3773.0, "completions/mean_length": 922.950927734375, "completions/mean_terminated_length": 603.30712890625, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 5.867638483965014, "grad_norm": 0.16178250312805176, "learning_rate": 1e-06, "loss": -0.0351, "num_tokens": 375235501.0, "reward": 0.6506696939468384, "reward_std": 0.16724829375743866, "rewards/verify_math_reward/mean": 0.6506696343421936, "rewards/verify_math_reward/std": 0.47702476382255554, "step": 628 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0803571428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 4056.0, "completions/mean_length": 899.0167846679688, "completions/mean_terminated_length": 619.668701171875, "completions/min_length": 153.0, "completions/min_terminated_length": 153.0, "epoch": 5.876967930029155, "grad_norm": 0.14244891703128815, "learning_rate": 1e-06, "loss": -0.0533, "num_tokens": 375844228.0, "reward": 0.629464328289032, "reward_std": 0.1816418617963791, "rewards/verify_math_reward/mean": 0.6294642686843872, "rewards/verify_math_reward/std": 0.4832179844379425, "step": 629 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0870535714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3564.0, "completions/mean_length": 906.47998046875, "completions/mean_terminated_length": 602.3447875976562, "completions/min_length": 133.0, "completions/min_terminated_length": 133.0, "epoch": 5.886297376093294, "grad_norm": 0.14157941937446594, "learning_rate": 1e-06, "loss": -0.0483, "num_tokens": 376428978.0, "reward": 0.6584821939468384, "reward_std": 0.165178582072258, "rewards/verify_math_reward/mean": 0.6584821343421936, "rewards/verify_math_reward/std": 0.4744836091995239, "step": 630 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0658482142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2815.0, "completions/mean_length": 818.3705444335938, "completions/mean_terminated_length": 587.3309326171875, "completions/min_length": 156.0, "completions/min_terminated_length": 156.0, "epoch": 5.895626822157435, "grad_norm": 0.14939922094345093, "learning_rate": 1e-06, "loss": -0.0346, "num_tokens": 377008262.0, "reward": 0.637276828289032, "reward_std": 0.15132339298725128, "rewards/verify_math_reward/mean": 0.6372767686843872, "rewards/verify_math_reward/std": 0.481054425239563, "step": 631 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0825892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3528.0, "completions/mean_length": 937.2980346679688, "completions/mean_terminated_length": 652.9379272460938, "completions/min_length": 164.0, "completions/min_terminated_length": 164.0, "epoch": 5.904956268221574, "grad_norm": 0.14546416699886322, "learning_rate": 1e-06, "loss": -0.0222, "num_tokens": 377641929.0, "reward": 0.6037946939468384, "reward_std": 0.17341090738773346, "rewards/verify_math_reward/mean": 0.6037946343421936, "rewards/verify_math_reward/std": 0.48938122391700745, "step": 632 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.052455357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2029.0, "completions/mean_length": 757.5145263671875, "completions/mean_terminated_length": 572.698486328125, "completions/min_length": 176.0, "completions/min_terminated_length": 176.0, "epoch": 5.914285714285715, "grad_norm": 0.14108310639858246, "learning_rate": 1e-06, "loss": -0.0389, "num_tokens": 378209158.0, "reward": 0.6674107313156128, "reward_std": 0.13973930478096008, "rewards/verify_math_reward/mean": 0.6674107313156128, "rewards/verify_math_reward/std": 0.47140392661094666, "step": 633 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2733.0, "completions/mean_length": 801.0960083007812, "completions/mean_terminated_length": 581.4357299804688, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 5.923615160349854, "grad_norm": 0.17102012038230896, "learning_rate": 1e-06, "loss": -0.0148, "num_tokens": 378789804.0, "reward": 0.6640625, "reward_std": 0.18152238428592682, "rewards/verify_math_reward/mean": 0.6640625, "rewards/verify_math_reward/std": 0.4725809693336487, "step": 634 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1049107142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3288.0, "completions/mean_length": 983.6239013671875, "completions/mean_terminated_length": 618.8316650390625, "completions/min_length": 156.0, "completions/min_terminated_length": 156.0, "epoch": 5.932944606413994, "grad_norm": 0.16276925802230835, "learning_rate": 1e-06, "loss": -0.0524, "num_tokens": 379383507.0, "reward": 0.6305803656578064, "reward_std": 0.19148434698581696, "rewards/verify_math_reward/mean": 0.6305803656578064, "rewards/verify_math_reward/std": 0.4829172194004059, "step": 635 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0848214285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2827.0, "completions/mean_length": 908.6529541015625, "completions/mean_terminated_length": 613.240234375, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 5.942274052478134, "grad_norm": 0.14920274913311005, "learning_rate": 1e-06, "loss": -0.0109, "num_tokens": 379973484.0, "reward": 0.6417410969734192, "reward_std": 0.14635765552520752, "rewards/verify_math_reward/mean": 0.6417410969734192, "rewards/verify_math_reward/std": 0.47975653409957886, "step": 636 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0837053571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3118.0, "completions/mean_length": 908.0469360351562, "completions/mean_terminated_length": 616.8209838867188, "completions/min_length": 94.0, "completions/min_terminated_length": 94.0, "epoch": 5.9516034985422746, "grad_norm": 0.1606222540140152, "learning_rate": 1e-06, "loss": -0.0182, "num_tokens": 380575622.0, "reward": 0.598214328289032, "reward_std": 0.16540497541427612, "rewards/verify_math_reward/mean": 0.5982142686843872, "rewards/verify_math_reward/std": 0.49053290486335754, "step": 637 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0915178571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3884.0, "completions/mean_length": 978.2891235351562, "completions/mean_terminated_length": 664.2199096679688, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 5.960932944606414, "grad_norm": 0.13233332335948944, "learning_rate": 1e-06, "loss": -0.0298, "num_tokens": 381212033.0, "reward": 0.6183035969734192, "reward_std": 0.15060026943683624, "rewards/verify_math_reward/mean": 0.6183035969734192, "rewards/verify_math_reward/std": 0.4860740303993225, "step": 638 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0647321428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3942.0, "completions/mean_length": 805.2656860351562, "completions/mean_terminated_length": 577.5059814453125, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 5.970262390670554, "grad_norm": 0.141335129737854, "learning_rate": 1e-06, "loss": -0.035, "num_tokens": 381787199.0, "reward": 0.6339285969734192, "reward_std": 0.16029614210128784, "rewards/verify_math_reward/mean": 0.6339285969734192, "rewards/verify_math_reward/std": 0.48199835419654846, "step": 639 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0758928571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 2849.0, "completions/mean_length": 804.2098388671875, "completions/mean_terminated_length": 533.8695678710938, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 5.979591836734694, "grad_norm": 0.14169426262378693, "learning_rate": 1e-06, "loss": -0.0223, "num_tokens": 382309483.0, "reward": 0.65625, "reward_std": 0.12125539779663086, "rewards/verify_math_reward/mean": 0.65625, "rewards/verify_math_reward/std": 0.4752241373062134, "step": 640 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1383928571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3972.0, "completions/mean_length": 1099.232177734375, "completions/mean_terminated_length": 617.885986328125, "completions/min_length": 165.0, "completions/min_terminated_length": 165.0, "epoch": 5.988921282798834, "grad_norm": 0.16632375121116638, "learning_rate": 1e-06, "loss": -0.0578, "num_tokens": 382886459.0, "reward": 0.5714285969734192, "reward_std": 0.18400652706623077, "rewards/verify_math_reward/mean": 0.5714285969734192, "rewards/verify_math_reward/std": 0.49514806270599365, "step": 641 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.05965909090909094, "completions/max_length": 4096.0, "completions/max_terminated_length": 2588.0, "completions/mean_length": 834.4375, "completions/mean_terminated_length": 627.5105590820312, "completions/min_length": 159.0, "completions/min_terminated_length": 159.0, "epoch": 5.998250728862974, "grad_norm": 0.14725947380065918, "learning_rate": 1e-06, "loss": -0.0428, "num_tokens": 383457042.0, "reward": 0.6629464626312256, "reward_std": 0.14534805715084076, "rewards/verify_math_reward/mean": 0.6629464030265808, "rewards/verify_math_reward/std": 0.47296738624572754, "step": 642 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0736607142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3828.0, "completions/mean_length": 875.0067138671875, "completions/mean_terminated_length": 618.8795166015625, "completions/min_length": 159.0, "completions/min_terminated_length": 159.0, "epoch": 6.0093294460641395, "grad_norm": 0.15770813822746277, "learning_rate": 1e-06, "loss": -0.0384, "num_tokens": 384075656.0, "reward": 0.6104910969734192, "reward_std": 0.16390934586524963, "rewards/verify_math_reward/mean": 0.6104910969734192, "rewards/verify_math_reward/std": 0.48791128396987915, "step": 643 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2629.0, "completions/mean_length": 771.4564819335938, "completions/mean_terminated_length": 549.8202514648438, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 6.01865889212828, "grad_norm": 0.16204263269901276, "learning_rate": 1e-06, "loss": -0.0345, "num_tokens": 384615121.0, "reward": 0.6774553656578064, "reward_std": 0.17325752973556519, "rewards/verify_math_reward/mean": 0.6774553656578064, "rewards/verify_math_reward/std": 0.4677111804485321, "step": 644 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0814732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3206.0, "completions/mean_length": 888.7801513671875, "completions/mean_terminated_length": 604.3001708984375, "completions/min_length": 159.0, "completions/min_terminated_length": 159.0, "epoch": 6.0279883381924195, "grad_norm": 0.18138130009174347, "learning_rate": 1e-06, "loss": -0.0451, "num_tokens": 385203988.0, "reward": 0.6852678656578064, "reward_std": 0.20534615218639374, "rewards/verify_math_reward/mean": 0.6852678656578064, "rewards/verify_math_reward/std": 0.46466848254203796, "step": 645 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0881696428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3978.0, "completions/mean_length": 933.7120971679688, "completions/mean_terminated_length": 627.9338989257812, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 6.03731778425656, "grad_norm": 0.15825672447681427, "learning_rate": 1e-06, "loss": -0.0275, "num_tokens": 385817842.0, "reward": 0.5725446939468384, "reward_std": 0.15420952439308167, "rewards/verify_math_reward/mean": 0.5725446343421936, "rewards/verify_math_reward/std": 0.49498558044433594, "step": 646 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0747767857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3903.0, "completions/mean_length": 952.71435546875, "completions/mean_terminated_length": 698.673095703125, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 6.0466472303206995, "grad_norm": 0.139588862657547, "learning_rate": 1e-06, "loss": -0.0544, "num_tokens": 386481378.0, "reward": 0.6540178656578064, "reward_std": 0.1636785864830017, "rewards/verify_math_reward/mean": 0.6540178656578064, "rewards/verify_math_reward/std": 0.4759531021118164, "step": 647 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0948660714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3614.0, "completions/mean_length": 919.12841796875, "completions/mean_terminated_length": 586.1640014648438, "completions/min_length": 153.0, "completions/min_terminated_length": 153.0, "epoch": 6.05597667638484, "grad_norm": 0.14342287182807922, "learning_rate": 1e-06, "loss": -0.0257, "num_tokens": 387044293.0, "reward": 0.6696428656578064, "reward_std": 0.15349416434764862, "rewards/verify_math_reward/mean": 0.6696428656578064, "rewards/verify_math_reward/std": 0.47060438990592957, "step": 648 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0959821428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3892.0, "completions/mean_length": 959.8035888671875, "completions/mean_terminated_length": 626.82470703125, "completions/min_length": 161.0, "completions/min_terminated_length": 161.0, "epoch": 6.0653061224489795, "grad_norm": 0.14265646040439606, "learning_rate": 1e-06, "loss": -0.0264, "num_tokens": 387639741.0, "reward": 0.6462053656578064, "reward_std": 0.14004167914390564, "rewards/verify_math_reward/mean": 0.6462053656578064, "rewards/verify_math_reward/std": 0.478413462638855, "step": 649 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3879.0, "completions/mean_length": 851.2813110351562, "completions/mean_terminated_length": 576.3051147460938, "completions/min_length": 119.0, "completions/min_terminated_length": 119.0, "epoch": 6.07463556851312, "grad_norm": 0.14192552864551544, "learning_rate": 1e-06, "loss": -0.0435, "num_tokens": 388207105.0, "reward": 0.6573660969734192, "reward_std": 0.1554897278547287, "rewards/verify_math_reward/mean": 0.6573660969734192, "rewards/verify_math_reward/std": 0.47485536336898804, "step": 650 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.056919642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2246.0, "completions/mean_length": 759.1049194335938, "completions/mean_terminated_length": 557.7064819335938, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 6.0839650145772595, "grad_norm": 0.15565773844718933, "learning_rate": 1e-06, "loss": -0.0365, "num_tokens": 388769359.0, "reward": 0.6651785969734192, "reward_std": 0.15338537096977234, "rewards/verify_math_reward/mean": 0.6651785969734192, "rewards/verify_math_reward/std": 0.47219157218933105, "step": 651 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0770089285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2537.0, "completions/mean_length": 843.5045166015625, "completions/mean_terminated_length": 572.1354370117188, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 6.093294460641399, "grad_norm": 0.14776359498500824, "learning_rate": 1e-06, "loss": -0.0192, "num_tokens": 389330691.0, "reward": 0.6495535969734192, "reward_std": 0.1338823139667511, "rewards/verify_math_reward/mean": 0.6495535969734192, "rewards/verify_math_reward/std": 0.477376252412796, "step": 652 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0892857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3314.0, "completions/mean_length": 912.7701416015625, "completions/mean_terminated_length": 600.688720703125, "completions/min_length": 158.0, "completions/min_terminated_length": 158.0, "epoch": 6.1026239067055394, "grad_norm": 0.15512509644031525, "learning_rate": 1e-06, "loss": -0.0472, "num_tokens": 389911645.0, "reward": 0.6272321939468384, "reward_std": 0.15631134808063507, "rewards/verify_math_reward/mean": 0.6272321343421936, "rewards/verify_math_reward/std": 0.4838111698627472, "step": 653 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0915178571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3418.0, "completions/mean_length": 948.036865234375, "completions/mean_terminated_length": 630.920166015625, "completions/min_length": 112.0, "completions/min_terminated_length": 112.0, "epoch": 6.111953352769679, "grad_norm": 0.14921864867210388, "learning_rate": 1e-06, "loss": -0.062, "num_tokens": 390522430.0, "reward": 0.6272321939468384, "reward_std": 0.18329153954982758, "rewards/verify_math_reward/mean": 0.6272321343421936, "rewards/verify_math_reward/std": 0.4838111698627472, "step": 654 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0848214285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3554.0, "completions/mean_length": 862.6663208007812, "completions/mean_terminated_length": 562.991455078125, "completions/min_length": 158.0, "completions/min_terminated_length": 158.0, "epoch": 6.121282798833819, "grad_norm": 0.3301600515842438, "learning_rate": 1e-06, "loss": -0.0307, "num_tokens": 391066227.0, "reward": 0.6439732313156128, "reward_std": 0.15555500984191895, "rewards/verify_math_reward/mean": 0.6439732313156128, "rewards/verify_math_reward/std": 0.47909072041511536, "step": 655 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0758928571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 2953.0, "completions/mean_length": 813.7756958007812, "completions/mean_terminated_length": 544.2210083007812, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 6.130612244897959, "grad_norm": 0.15756727755069733, "learning_rate": 1e-06, "loss": -0.0258, "num_tokens": 391594074.0, "reward": 0.723214328289032, "reward_std": 0.1493610143661499, "rewards/verify_math_reward/mean": 0.7232142686843872, "rewards/verify_math_reward/std": 0.44765952229499817, "step": 656 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0647321428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3012.0, "completions/mean_length": 845.3281860351562, "completions/mean_terminated_length": 620.34130859375, "completions/min_length": 122.0, "completions/min_terminated_length": 122.0, "epoch": 6.139941690962099, "grad_norm": 0.1489861011505127, "learning_rate": 1e-06, "loss": -0.0558, "num_tokens": 392198920.0, "reward": 0.684151828289032, "reward_std": 0.16773684322834015, "rewards/verify_math_reward/mean": 0.6841517686843872, "rewards/verify_math_reward/std": 0.4651124179363251, "step": 657 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1060267857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4003.0, "completions/mean_length": 957.08154296875, "completions/mean_terminated_length": 584.8002319335938, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 6.149271137026239, "grad_norm": 0.16252826154232025, "learning_rate": 1e-06, "loss": -0.047, "num_tokens": 392761153.0, "reward": 0.629464328289032, "reward_std": 0.14079166948795319, "rewards/verify_math_reward/mean": 0.6294642686843872, "rewards/verify_math_reward/std": 0.4832179844379425, "step": 658 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2455.0, "completions/mean_length": 796.4174194335938, "completions/mean_terminated_length": 576.4452514648438, "completions/min_length": 114.0, "completions/min_terminated_length": 114.0, "epoch": 6.158600583090379, "grad_norm": 0.13597793877124786, "learning_rate": 1e-06, "loss": -0.0395, "num_tokens": 393330439.0, "reward": 0.7243303656578064, "reward_std": 0.1406836062669754, "rewards/verify_math_reward/mean": 0.7243303656578064, "rewards/verify_math_reward/std": 0.4471006691455841, "step": 659 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0848214285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3844.0, "completions/mean_length": 888.8013916015625, "completions/mean_terminated_length": 591.5487670898438, "completions/min_length": 160.0, "completions/min_terminated_length": 160.0, "epoch": 6.167930029154519, "grad_norm": 0.15661506354808807, "learning_rate": 1e-06, "loss": -0.0279, "num_tokens": 393902053.0, "reward": 0.6417410969734192, "reward_std": 0.1735963523387909, "rewards/verify_math_reward/mean": 0.6417410969734192, "rewards/verify_math_reward/std": 0.47975656390190125, "step": 660 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1194196428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3138.0, "completions/mean_length": 1075.360595703125, "completions/mean_terminated_length": 665.7173461914062, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 6.1772594752186585, "grad_norm": 0.1538289487361908, "learning_rate": 1e-06, "loss": -0.0465, "num_tokens": 394529584.0, "reward": 0.5524553656578064, "reward_std": 0.17043782770633698, "rewards/verify_math_reward/mean": 0.5524553656578064, "rewards/verify_math_reward/std": 0.49751853942871094, "step": 661 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0502232142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3255.0, "completions/mean_length": 769.1730346679688, "completions/mean_terminated_length": 593.2537841796875, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 6.186588921282799, "grad_norm": 0.13282759487628937, "learning_rate": 1e-06, "loss": -0.0284, "num_tokens": 395117371.0, "reward": 0.6986607313156128, "reward_std": 0.14158585667610168, "rewards/verify_math_reward/mean": 0.6986607313156128, "rewards/verify_math_reward/std": 0.4590960443019867, "step": 662 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0837053571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3696.0, "completions/mean_length": 878.6585083007812, "completions/mean_terminated_length": 584.7479248046875, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 6.1959183673469385, "grad_norm": 0.16318635642528534, "learning_rate": 1e-06, "loss": -0.044, "num_tokens": 395690865.0, "reward": 0.6908482313156128, "reward_std": 0.17367054522037506, "rewards/verify_math_reward/mean": 0.6908482313156128, "rewards/verify_math_reward/std": 0.46240198612213135, "step": 663 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0647321428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 2016.0, "completions/mean_length": 810.0279541015625, "completions/mean_terminated_length": 582.597900390625, "completions/min_length": 172.0, "completions/min_terminated_length": 172.0, "epoch": 6.205247813411079, "grad_norm": 0.1334102898836136, "learning_rate": 1e-06, "loss": -0.0216, "num_tokens": 396260210.0, "reward": 0.6205357313156128, "reward_std": 0.130649596452713, "rewards/verify_math_reward/mean": 0.6205357313156128, "rewards/verify_math_reward/std": 0.4855247139930725, "step": 664 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0803571428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3396.0, "completions/mean_length": 929.9386596679688, "completions/mean_terminated_length": 653.29248046875, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 6.214577259475218, "grad_norm": 0.15028053522109985, "learning_rate": 1e-06, "loss": -0.0303, "num_tokens": 396897403.0, "reward": 0.551339328289032, "reward_std": 0.1418864130973816, "rewards/verify_math_reward/mean": 0.5513392686843872, "rewards/verify_math_reward/std": 0.4976350665092468, "step": 665 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0837053571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3659.0, "completions/mean_length": 831.505615234375, "completions/mean_terminated_length": 533.2874755859375, "completions/min_length": 120.0, "completions/min_terminated_length": 120.0, "epoch": 6.223906705539359, "grad_norm": 0.16576990485191345, "learning_rate": 1e-06, "loss": -0.0515, "num_tokens": 397413608.0, "reward": 0.6863839626312256, "reward_std": 0.15488353371620178, "rewards/verify_math_reward/mean": 0.6863839030265808, "rewards/verify_math_reward/std": 0.46422144770622253, "step": 666 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0915178571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3694.0, "completions/mean_length": 964.036865234375, "completions/mean_terminated_length": 648.5319213867188, "completions/min_length": 119.0, "completions/min_terminated_length": 119.0, "epoch": 6.233236151603498, "grad_norm": 0.14228703081607819, "learning_rate": 1e-06, "loss": -0.0499, "num_tokens": 398023633.0, "reward": 0.640625, "reward_std": 0.16134853661060333, "rewards/verify_math_reward/mean": 0.640625, "rewards/verify_math_reward/std": 0.48008525371551514, "step": 667 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1138392857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3856.0, "completions/mean_length": 1026.0201416015625, "completions/mean_terminated_length": 631.6397705078125, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 6.242565597667639, "grad_norm": 0.15054574608802795, "learning_rate": 1e-06, "loss": -0.0327, "num_tokens": 398620939.0, "reward": 0.6127232313156128, "reward_std": 0.1465405970811844, "rewards/verify_math_reward/mean": 0.6127232313156128, "rewards/verify_math_reward/std": 0.4873998463153839, "step": 668 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0993303571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3837.0, "completions/mean_length": 977.7511596679688, "completions/mean_terminated_length": 633.8550415039062, "completions/min_length": 137.0, "completions/min_terminated_length": 137.0, "epoch": 6.251895043731778, "grad_norm": 0.1700843870639801, "learning_rate": 1e-06, "loss": -0.0235, "num_tokens": 399223812.0, "reward": 0.5859375, "reward_std": 0.1597301959991455, "rewards/verify_math_reward/mean": 0.5859375, "rewards/verify_math_reward/std": 0.4928344786167145, "step": 669 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0881696428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3357.0, "completions/mean_length": 883.9832763671875, "completions/mean_terminated_length": 573.3966064453125, "completions/min_length": 166.0, "completions/min_terminated_length": 166.0, "epoch": 6.261224489795918, "grad_norm": 0.15604212880134583, "learning_rate": 1e-06, "loss": -0.0112, "num_tokens": 399782509.0, "reward": 0.6752232313156128, "reward_std": 0.11960498988628387, "rewards/verify_math_reward/mean": 0.6752232313156128, "rewards/verify_math_reward/std": 0.46855294704437256, "step": 670 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0859375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3368.0, "completions/mean_length": 888.8516235351562, "completions/mean_terminated_length": 587.3248291015625, "completions/min_length": 123.0, "completions/min_terminated_length": 123.0, "epoch": 6.270553935860058, "grad_norm": 0.15224426984786987, "learning_rate": 1e-06, "loss": -0.0577, "num_tokens": 400352568.0, "reward": 0.6729910969734192, "reward_std": 0.14992554485797882, "rewards/verify_math_reward/mean": 0.6729910969734192, "rewards/verify_math_reward/std": 0.46938255429267883, "step": 671 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0993303571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 2380.0, "completions/mean_length": 969.661865234375, "completions/mean_terminated_length": 624.8735961914062, "completions/min_length": 115.0, "completions/min_terminated_length": 115.0, "epoch": 6.279883381924198, "grad_norm": 0.14326012134552002, "learning_rate": 1e-06, "loss": -0.051, "num_tokens": 400951145.0, "reward": 0.6651785969734192, "reward_std": 0.1510196030139923, "rewards/verify_math_reward/mean": 0.6651785969734192, "rewards/verify_math_reward/std": 0.47219157218933105, "step": 672 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1071428571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3373.0, "completions/mean_length": 981.7578735351562, "completions/mean_terminated_length": 608.0487060546875, "completions/min_length": 169.0, "completions/min_terminated_length": 169.0, "epoch": 6.289212827988338, "grad_norm": 0.1547728329896927, "learning_rate": 1e-06, "loss": -0.0398, "num_tokens": 401537312.0, "reward": 0.625, "reward_std": 0.16330133378505707, "rewards/verify_math_reward/mean": 0.625, "rewards/verify_math_reward/std": 0.48439329862594604, "step": 673 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 2486.0, "completions/mean_length": 867.833740234375, "completions/mean_terminated_length": 594.2603149414062, "completions/min_length": 161.0, "completions/min_terminated_length": 161.0, "epoch": 6.298542274052478, "grad_norm": 0.1469501405954361, "learning_rate": 1e-06, "loss": -0.0268, "num_tokens": 402115011.0, "reward": 0.6629464626312256, "reward_std": 0.16927708685398102, "rewards/verify_math_reward/mean": 0.6629464030265808, "rewards/verify_math_reward/std": 0.47296738624572754, "step": 674 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09375, "completions/max_length": 4096.0, "completions/max_terminated_length": 2878.0, "completions/mean_length": 939.9074096679688, "completions/mean_terminated_length": 613.4150390625, "completions/min_length": 120.0, "completions/min_terminated_length": 120.0, "epoch": 6.307871720116618, "grad_norm": 0.15620948374271393, "learning_rate": 1e-06, "loss": -0.0613, "num_tokens": 402688848.0, "reward": 0.660714328289032, "reward_std": 0.1834438145160675, "rewards/verify_math_reward/mean": 0.6607142686843872, "rewards/verify_math_reward/std": 0.4737313687801361, "step": 675 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0915178571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3498.0, "completions/mean_length": 946.30029296875, "completions/mean_terminated_length": 629.0086059570312, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 6.317201166180758, "grad_norm": 0.14087562263011932, "learning_rate": 1e-06, "loss": -0.0244, "num_tokens": 403287965.0, "reward": 0.6439732313156128, "reward_std": 0.1571815013885498, "rewards/verify_math_reward/mean": 0.6439732313156128, "rewards/verify_math_reward/std": 0.47909072041511536, "step": 676 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1026785714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3362.0, "completions/mean_length": 950.5714721679688, "completions/mean_terminated_length": 590.646728515625, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 6.326530612244898, "grad_norm": 0.14991827309131622, "learning_rate": 1e-06, "loss": -0.055, "num_tokens": 403854205.0, "reward": 0.6272321939468384, "reward_std": 0.16101223230361938, "rewards/verify_math_reward/mean": 0.6272321343421936, "rewards/verify_math_reward/std": 0.4838111698627472, "step": 677 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1004464285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2886.0, "completions/mean_length": 942.6875610351562, "completions/mean_terminated_length": 590.5806274414062, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 6.335860058309038, "grad_norm": 0.15213103592395782, "learning_rate": 1e-06, "loss": -0.0426, "num_tokens": 404433261.0, "reward": 0.6328125, "reward_std": 0.14977329969406128, "rewards/verify_math_reward/mean": 0.6328125, "rewards/verify_math_reward/std": 0.48230743408203125, "step": 678 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0870535714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2945.0, "completions/mean_length": 893.1641235351562, "completions/mean_terminated_length": 587.7592163085938, "completions/min_length": 133.0, "completions/min_terminated_length": 133.0, "epoch": 6.345189504373177, "grad_norm": 0.15677736699581146, "learning_rate": 1e-06, "loss": -0.0666, "num_tokens": 405002544.0, "reward": 0.6830357313156128, "reward_std": 0.17269553244113922, "rewards/verify_math_reward/mean": 0.6830357313156128, "rewards/verify_math_reward/std": 0.46555325388908386, "step": 679 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0825892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3432.0, "completions/mean_length": 929.7857666015625, "completions/mean_terminated_length": 644.7493896484375, "completions/min_length": 153.0, "completions/min_terminated_length": 153.0, "epoch": 6.354518950437318, "grad_norm": 0.14856281876564026, "learning_rate": 1e-06, "loss": -0.0339, "num_tokens": 405618392.0, "reward": 0.6718750596046448, "reward_std": 0.16991788148880005, "rewards/verify_math_reward/mean": 0.671875, "rewards/verify_math_reward/std": 0.46979284286499023, "step": 680 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1183035714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3868.0, "completions/mean_length": 1069.368408203125, "completions/mean_terminated_length": 663.2633056640625, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 6.363848396501457, "grad_norm": 0.153412863612175, "learning_rate": 1e-06, "loss": -0.0667, "num_tokens": 406233170.0, "reward": 0.6082589626312256, "reward_std": 0.16529551148414612, "rewards/verify_math_reward/mean": 0.6082589030265808, "rewards/verify_math_reward/std": 0.4884119927883148, "step": 681 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1127232142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3957.0, "completions/mean_length": 1008.0469360351562, "completions/mean_terminated_length": 615.7408447265625, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 6.373177842565598, "grad_norm": 0.163439080119133, "learning_rate": 1e-06, "loss": -0.0518, "num_tokens": 406818028.0, "reward": 0.613839328289032, "reward_std": 0.17754334211349487, "rewards/verify_math_reward/mean": 0.6138392686843872, "rewards/verify_math_reward/std": 0.48714008927345276, "step": 682 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0736607142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2578.0, "completions/mean_length": 835.3660888671875, "completions/mean_terminated_length": 576.0867309570312, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 6.382507288629737, "grad_norm": 0.14261586964130402, "learning_rate": 1e-06, "loss": -0.04, "num_tokens": 407388780.0, "reward": 0.715401828289032, "reward_std": 0.13482409715652466, "rewards/verify_math_reward/mean": 0.7154017686843872, "rewards/verify_math_reward/std": 0.4514748752117157, "step": 683 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2400.0, "completions/mean_length": 962.1138916015625, "completions/mean_terminated_length": 607.8484497070312, "completions/min_length": 120.0, "completions/min_terminated_length": 120.0, "epoch": 6.391836734693878, "grad_norm": 0.14774686098098755, "learning_rate": 1e-06, "loss": -0.0399, "num_tokens": 407968418.0, "reward": 0.5926339626312256, "reward_std": 0.16604506969451904, "rewards/verify_math_reward/mean": 0.5926339030265808, "rewards/verify_math_reward/std": 0.49161848425865173, "step": 684 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1149553571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 2411.0, "completions/mean_length": 948.3147583007812, "completions/mean_terminated_length": 539.472900390625, "completions/min_length": 153.0, "completions/min_terminated_length": 153.0, "epoch": 6.401166180758017, "grad_norm": 0.1840965747833252, "learning_rate": 1e-06, "loss": -0.055, "num_tokens": 408487868.0, "reward": 0.7053571939468384, "reward_std": 0.18036557734012604, "rewards/verify_math_reward/mean": 0.7053571343421936, "rewards/verify_math_reward/std": 0.45613664388656616, "step": 685 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1238839285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3128.0, "completions/mean_length": 1002.8817138671875, "completions/mean_terminated_length": 565.5108642578125, "completions/min_length": 137.0, "completions/min_terminated_length": 137.0, "epoch": 6.410495626822158, "grad_norm": 0.15381786227226257, "learning_rate": 1e-06, "loss": -0.0622, "num_tokens": 409019298.0, "reward": 0.6462053656578064, "reward_std": 0.14263640344142914, "rewards/verify_math_reward/mean": 0.6462053656578064, "rewards/verify_math_reward/std": 0.478413462638855, "step": 686 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0948660714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2662.0, "completions/mean_length": 957.87060546875, "completions/mean_terminated_length": 628.9666748046875, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 6.419825072886297, "grad_norm": 0.15057718753814697, "learning_rate": 1e-06, "loss": -0.0517, "num_tokens": 409613822.0, "reward": 0.6395089626312256, "reward_std": 0.16856171190738678, "rewards/verify_math_reward/mean": 0.6395089030265808, "rewards/verify_math_reward/std": 0.4804111421108246, "step": 687 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1082589285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2931.0, "completions/mean_length": 1012.1160888671875, "completions/mean_terminated_length": 637.7271728515625, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 6.429154518950437, "grad_norm": 0.15426890552043915, "learning_rate": 1e-06, "loss": -0.0488, "num_tokens": 410210918.0, "reward": 0.6238839626312256, "reward_std": 0.16337618231773376, "rewards/verify_math_reward/mean": 0.6238839030265808, "rewards/verify_math_reward/std": 0.4846802353858948, "step": 688 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0982142857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3988.0, "completions/mean_length": 976.1038208007812, "completions/mean_terminated_length": 636.3131103515625, "completions/min_length": 115.0, "completions/min_terminated_length": 115.0, "epoch": 6.438483965014577, "grad_norm": 0.13959239423274994, "learning_rate": 1e-06, "loss": -0.037, "num_tokens": 410815923.0, "reward": 0.5993303656578064, "reward_std": 0.16412687301635742, "rewards/verify_math_reward/mean": 0.5993303656578064, "rewards/verify_math_reward/std": 0.49030786752700806, "step": 689 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3614.0, "completions/mean_length": 940.3504638671875, "completions/mean_terminated_length": 613.9039306640625, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 6.447813411078717, "grad_norm": 0.15441806614398956, "learning_rate": 1e-06, "loss": -0.0496, "num_tokens": 411399245.0, "reward": 0.6595982313156128, "reward_std": 0.18085232377052307, "rewards/verify_math_reward/mean": 0.6595982313156128, "rewards/verify_math_reward/std": 0.4741089344024658, "step": 690 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1060267857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3517.0, "completions/mean_length": 1002.0123291015625, "completions/mean_terminated_length": 635.0599365234375, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 6.457142857142857, "grad_norm": 0.14723995327949524, "learning_rate": 1e-06, "loss": -0.0437, "num_tokens": 411996400.0, "reward": 0.6350446939468384, "reward_std": 0.1742357611656189, "rewards/verify_math_reward/mean": 0.6350446343421936, "rewards/verify_math_reward/std": 0.481686532497406, "step": 691 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0703125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3049.0, "completions/mean_length": 844.5234985351562, "completions/mean_terminated_length": 598.6134033203125, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 6.466472303206997, "grad_norm": 0.14656583964824677, "learning_rate": 1e-06, "loss": -0.0125, "num_tokens": 412585333.0, "reward": 0.676339328289032, "reward_std": 0.1426345854997635, "rewards/verify_math_reward/mean": 0.6763392686843872, "rewards/verify_math_reward/std": 0.4681335985660553, "step": 692 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3937.0, "completions/mean_length": 934.34716796875, "completions/mean_terminated_length": 576.94287109375, "completions/min_length": 95.0, "completions/min_terminated_length": 95.0, "epoch": 6.475801749271137, "grad_norm": 0.1560421884059906, "learning_rate": 1e-06, "loss": -0.0638, "num_tokens": 413132164.0, "reward": 0.6830357313156128, "reward_std": 0.1557832509279251, "rewards/verify_math_reward/mean": 0.6830357313156128, "rewards/verify_math_reward/std": 0.46555325388908386, "step": 693 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3249.0, "completions/mean_length": 845.8192138671875, "completions/mean_terminated_length": 570.3801879882812, "completions/min_length": 120.0, "completions/min_terminated_length": 120.0, "epoch": 6.485131195335277, "grad_norm": 0.14496955275535583, "learning_rate": 1e-06, "loss": -0.0152, "num_tokens": 413694866.0, "reward": 0.6662946939468384, "reward_std": 0.13842660188674927, "rewards/verify_math_reward/mean": 0.6662946343421936, "rewards/verify_math_reward/std": 0.47179925441741943, "step": 694 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0837053571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3939.0, "completions/mean_length": 880.7600708007812, "completions/mean_terminated_length": 587.0414428710938, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 6.494460641399417, "grad_norm": 0.15243299305438995, "learning_rate": 1e-06, "loss": -0.0283, "num_tokens": 414264187.0, "reward": 0.6517857313156128, "reward_std": 0.15800592303276062, "rewards/verify_math_reward/mean": 0.6517857313156128, "rewards/verify_math_reward/std": 0.47667041420936584, "step": 695 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1082589285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3640.0, "completions/mean_length": 981.44873046875, "completions/mean_terminated_length": 603.336669921875, "completions/min_length": 153.0, "completions/min_terminated_length": 153.0, "epoch": 6.503790087463557, "grad_norm": 0.14356254041194916, "learning_rate": 1e-06, "loss": -0.0238, "num_tokens": 414839845.0, "reward": 0.6183035969734192, "reward_std": 0.13845908641815186, "rewards/verify_math_reward/mean": 0.6183035969734192, "rewards/verify_math_reward/std": 0.4860740303993225, "step": 696 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3930.0, "completions/mean_length": 1060.068115234375, "completions/mean_terminated_length": 626.363525390625, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 6.513119533527696, "grad_norm": 0.1592957228422165, "learning_rate": 1e-06, "loss": -0.0907, "num_tokens": 415427586.0, "reward": 0.6462053656578064, "reward_std": 0.1828383058309555, "rewards/verify_math_reward/mean": 0.6462053656578064, "rewards/verify_math_reward/std": 0.478413462638855, "step": 697 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1037946428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3997.0, "completions/mean_length": 972.755615234375, "completions/mean_terminated_length": 611.0348510742188, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 6.522448979591837, "grad_norm": 0.13217735290527344, "learning_rate": 1e-06, "loss": -0.0333, "num_tokens": 416017999.0, "reward": 0.6573660969734192, "reward_std": 0.1420711725950241, "rewards/verify_math_reward/mean": 0.6573660969734192, "rewards/verify_math_reward/std": 0.47485533356666565, "step": 698 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0691964285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3059.0, "completions/mean_length": 800.0535888671875, "completions/mean_terminated_length": 555.0311889648438, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 6.531778425655976, "grad_norm": 0.14009703695774078, "learning_rate": 1e-06, "loss": -0.0235, "num_tokens": 416570623.0, "reward": 0.7020089626312256, "reward_std": 0.12625475227832794, "rewards/verify_math_reward/mean": 0.7020089030265808, "rewards/verify_math_reward/std": 0.45763099193573, "step": 699 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0904017857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2204.0, "completions/mean_length": 870.8850708007812, "completions/mean_terminated_length": 550.3521728515625, "completions/min_length": 166.0, "completions/min_terminated_length": 166.0, "epoch": 6.541107871720117, "grad_norm": 0.1271519809961319, "learning_rate": 1e-06, "loss": -0.0095, "num_tokens": 417099344.0, "reward": 0.6741071939468384, "reward_std": 0.10058976709842682, "rewards/verify_math_reward/mean": 0.6741071343421936, "rewards/verify_math_reward/std": 0.4689692556858063, "step": 700 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1004464285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3150.0, "completions/mean_length": 974.5078735351562, "completions/mean_terminated_length": 625.9541015625, "completions/min_length": 114.0, "completions/min_terminated_length": 114.0, "epoch": 6.550437317784256, "grad_norm": 0.14003746211528778, "learning_rate": 1e-06, "loss": -0.0479, "num_tokens": 417688999.0, "reward": 0.6428571939468384, "reward_std": 0.14102695882320404, "rewards/verify_math_reward/mean": 0.6428571343421936, "rewards/verify_math_reward/std": 0.47942501306533813, "step": 701 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0747767857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4076.0, "completions/mean_length": 865.8560791015625, "completions/mean_terminated_length": 604.794921875, "completions/min_length": 122.0, "completions/min_terminated_length": 122.0, "epoch": 6.559766763848397, "grad_norm": 0.15287365019321442, "learning_rate": 1e-06, "loss": -0.0032, "num_tokens": 418274678.0, "reward": 0.6584821939468384, "reward_std": 0.13602055609226227, "rewards/verify_math_reward/mean": 0.6584821343421936, "rewards/verify_math_reward/std": 0.4744836091995239, "step": 702 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1361607142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3689.0, "completions/mean_length": 1132.7578125, "completions/mean_terminated_length": 665.6834716796875, "completions/min_length": 97.0, "completions/min_terminated_length": 97.0, "epoch": 6.569096209912536, "grad_norm": 0.14975379407405853, "learning_rate": 1e-06, "loss": -0.0888, "num_tokens": 418866261.0, "reward": 0.5970982313156128, "reward_std": 0.15274415910243988, "rewards/verify_math_reward/mean": 0.5970982313156128, "rewards/verify_math_reward/std": 0.49075525999069214, "step": 703 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3912.0, "completions/mean_length": 856.1395263671875, "completions/mean_terminated_length": 581.5750732421875, "completions/min_length": 185.0, "completions/min_terminated_length": 185.0, "epoch": 6.578425655976677, "grad_norm": 0.1418183445930481, "learning_rate": 1e-06, "loss": -0.0261, "num_tokens": 419432194.0, "reward": 0.6830357313156128, "reward_std": 0.14699524641036987, "rewards/verify_math_reward/mean": 0.6830357313156128, "rewards/verify_math_reward/std": 0.46555325388908386, "step": 704 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1004464285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3742.0, "completions/mean_length": 979.10498046875, "completions/mean_terminated_length": 631.0645141601562, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 6.587755102040816, "grad_norm": 0.1584358662366867, "learning_rate": 1e-06, "loss": -0.0529, "num_tokens": 420021992.0, "reward": 0.6953125596046448, "reward_std": 0.17622952163219452, "rewards/verify_math_reward/mean": 0.6953125, "rewards/verify_math_reward/std": 0.4605320394039154, "step": 705 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1082589285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 4037.0, "completions/mean_length": 988.0614013671875, "completions/mean_terminated_length": 610.752197265625, "completions/min_length": 110.0, "completions/min_terminated_length": 110.0, "epoch": 6.597084548104956, "grad_norm": 0.15502335131168365, "learning_rate": 1e-06, "loss": -0.0331, "num_tokens": 420603039.0, "reward": 0.6395089626312256, "reward_std": 0.15613025426864624, "rewards/verify_math_reward/mean": 0.6395089030265808, "rewards/verify_math_reward/std": 0.4804111123085022, "step": 706 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0803571428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3301.0, "completions/mean_length": 858.583740234375, "completions/mean_terminated_length": 575.7026977539062, "completions/min_length": 158.0, "completions/min_terminated_length": 158.0, "epoch": 6.606413994169096, "grad_norm": 0.14917011559009552, "learning_rate": 1e-06, "loss": -0.0362, "num_tokens": 421168810.0, "reward": 0.6495535969734192, "reward_std": 0.13624556362628937, "rewards/verify_math_reward/mean": 0.6495535969734192, "rewards/verify_math_reward/std": 0.47737622261047363, "step": 707 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0848214285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3962.0, "completions/mean_length": 882.0402221679688, "completions/mean_terminated_length": 584.1609497070312, "completions/min_length": 101.0, "completions/min_terminated_length": 101.0, "epoch": 6.615743440233236, "grad_norm": 0.15988053381443024, "learning_rate": 1e-06, "loss": -0.0371, "num_tokens": 421737950.0, "reward": 0.6819196939468384, "reward_std": 0.17844446003437042, "rewards/verify_math_reward/mean": 0.6819196343421936, "rewards/verify_math_reward/std": 0.46599099040031433, "step": 708 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1339285714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3947.0, "completions/mean_length": 1074.094970703125, "completions/mean_terminated_length": 606.7899169921875, "completions/min_length": 103.0, "completions/min_terminated_length": 103.0, "epoch": 6.625072886297376, "grad_norm": 0.16448982059955597, "learning_rate": 1e-06, "loss": -0.0746, "num_tokens": 422298115.0, "reward": 0.645089328289032, "reward_std": 0.1699153631925583, "rewards/verify_math_reward/mean": 0.6450892686843872, "rewards/verify_math_reward/std": 0.4787535071372986, "step": 709 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1294642857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3173.0, "completions/mean_length": 1038.110595703125, "completions/mean_terminated_length": 583.3474731445312, "completions/min_length": 158.0, "completions/min_terminated_length": 158.0, "epoch": 6.634402332361516, "grad_norm": 0.1447528451681137, "learning_rate": 1e-06, "loss": -0.0829, "num_tokens": 422852918.0, "reward": 0.6696428656578064, "reward_std": 0.16698938608169556, "rewards/verify_math_reward/mean": 0.6696428656578064, "rewards/verify_math_reward/std": 0.47060438990592957, "step": 710 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0770089285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3998.0, "completions/mean_length": 899.0881958007812, "completions/mean_terminated_length": 632.3567504882812, "completions/min_length": 172.0, "completions/min_terminated_length": 172.0, "epoch": 6.643731778425656, "grad_norm": 0.12234325706958771, "learning_rate": 1e-06, "loss": -0.0315, "num_tokens": 423459733.0, "reward": 0.6517857313156128, "reward_std": 0.12125399708747864, "rewards/verify_math_reward/mean": 0.6517857313156128, "rewards/verify_math_reward/std": 0.47667041420936584, "step": 711 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.140625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2934.0, "completions/mean_length": 1070.560302734375, "completions/mean_terminated_length": 575.48828125, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 6.653061224489796, "grad_norm": 0.15985053777694702, "learning_rate": 1e-06, "loss": -0.0578, "num_tokens": 423999227.0, "reward": 0.6037946939468384, "reward_std": 0.16266122460365295, "rewards/verify_math_reward/mean": 0.6037946343421936, "rewards/verify_math_reward/std": 0.48938122391700745, "step": 712 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3614.0, "completions/mean_length": 907.1574096679688, "completions/mean_terminated_length": 577.277099609375, "completions/min_length": 119.0, "completions/min_terminated_length": 119.0, "epoch": 6.662390670553936, "grad_norm": 0.14064276218414307, "learning_rate": 1e-06, "loss": -0.0373, "num_tokens": 424558992.0, "reward": 0.6674107313156128, "reward_std": 0.11727311462163925, "rewards/verify_math_reward/mean": 0.6674107313156128, "rewards/verify_math_reward/std": 0.47140392661094666, "step": 713 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1104910714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3741.0, "completions/mean_length": 990.9085083007812, "completions/mean_terminated_length": 605.20703125, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 6.671720116618076, "grad_norm": 0.13120871782302856, "learning_rate": 1e-06, "loss": -0.0305, "num_tokens": 425140454.0, "reward": 0.6071428656578064, "reward_std": 0.12302273511886597, "rewards/verify_math_reward/mean": 0.6071428656578064, "rewards/verify_math_reward/std": 0.48865827918052673, "step": 714 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1171875, "completions/max_length": 4096.0, "completions/max_terminated_length": 2889.0, "completions/mean_length": 1005.5324096679688, "completions/mean_terminated_length": 595.2933349609375, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 6.681049562682215, "grad_norm": 0.14877988398075104, "learning_rate": 1e-06, "loss": -0.0609, "num_tokens": 425702067.0, "reward": 0.6551339626312256, "reward_std": 0.1471467763185501, "rewards/verify_math_reward/mean": 0.6551339030265808, "rewards/verify_math_reward/std": 0.4755900800228119, "step": 715 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.109375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3806.0, "completions/mean_length": 1020.2545166015625, "completions/mean_terminated_length": 642.5313110351562, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 6.690379008746356, "grad_norm": 0.1527351588010788, "learning_rate": 1e-06, "loss": -0.0566, "num_tokens": 426305559.0, "reward": 0.6305803656578064, "reward_std": 0.17446216940879822, "rewards/verify_math_reward/mean": 0.6305803656578064, "rewards/verify_math_reward/std": 0.48291724920272827, "step": 716 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1261160714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3579.0, "completions/mean_length": 1064.0592041015625, "completions/mean_terminated_length": 626.4993286132812, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 6.699708454810495, "grad_norm": 0.12764711678028107, "learning_rate": 1e-06, "loss": -0.0524, "num_tokens": 426885116.0, "reward": 0.609375, "reward_std": 0.12692692875862122, "rewards/verify_math_reward/mean": 0.609375, "rewards/verify_math_reward/std": 0.48816296458244324, "step": 717 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0859375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3393.0, "completions/mean_length": 908.0770263671875, "completions/mean_terminated_length": 608.3577880859375, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 6.709037900874636, "grad_norm": 0.15886114537715912, "learning_rate": 1e-06, "loss": -0.0312, "num_tokens": 427467681.0, "reward": 0.6662946939468384, "reward_std": 0.1766424924135208, "rewards/verify_math_reward/mean": 0.6662946343421936, "rewards/verify_math_reward/std": 0.47179925441741943, "step": 718 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2535.0, "completions/mean_length": 946.1339721679688, "completions/mean_terminated_length": 590.0620727539062, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 6.718367346938775, "grad_norm": 0.17554457485675812, "learning_rate": 1e-06, "loss": -0.0394, "num_tokens": 428041561.0, "reward": 0.6551339626312256, "reward_std": 0.17559263110160828, "rewards/verify_math_reward/mean": 0.6551339030265808, "rewards/verify_math_reward/std": 0.4755900502204895, "step": 719 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1004464285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3210.0, "completions/mean_length": 967.8438110351562, "completions/mean_terminated_length": 618.5458984375, "completions/min_length": 172.0, "completions/min_terminated_length": 172.0, "epoch": 6.727696793002916, "grad_norm": 0.1546453833580017, "learning_rate": 1e-06, "loss": -0.0399, "num_tokens": 428629029.0, "reward": 0.6149553656578064, "reward_std": 0.1616523265838623, "rewards/verify_math_reward/mean": 0.6149553656578064, "rewards/verify_math_reward/std": 0.4868776500225067, "step": 720 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0870535714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3660.0, "completions/mean_length": 926.0402221679688, "completions/mean_terminated_length": 623.7702026367188, "completions/min_length": 161.0, "completions/min_terminated_length": 161.0, "epoch": 6.737026239067055, "grad_norm": 0.14390479028224945, "learning_rate": 1e-06, "loss": -0.0167, "num_tokens": 429220193.0, "reward": 0.6573660969734192, "reward_std": 0.1571369171142578, "rewards/verify_math_reward/mean": 0.6573660969734192, "rewards/verify_math_reward/std": 0.47485533356666565, "step": 721 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0926339285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2530.0, "completions/mean_length": 869.489990234375, "completions/mean_terminated_length": 540.0922241210938, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 6.746355685131196, "grad_norm": 0.15528041124343872, "learning_rate": 1e-06, "loss": -0.048, "num_tokens": 429762064.0, "reward": 0.6640625, "reward_std": 0.15924306213855743, "rewards/verify_math_reward/mean": 0.6640625, "rewards/verify_math_reward/std": 0.4725809693336487, "step": 722 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1183035714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2770.0, "completions/mean_length": 997.4542846679688, "completions/mean_terminated_length": 581.7000122070312, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 6.755685131195335, "grad_norm": 0.18460367619991302, "learning_rate": 1e-06, "loss": -0.0649, "num_tokens": 430309543.0, "reward": 0.6361607313156128, "reward_std": 0.1305394172668457, "rewards/verify_math_reward/mean": 0.6361607313156128, "rewards/verify_math_reward/std": 0.4813718795776367, "step": 723 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0714285714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3328.0, "completions/mean_length": 829.0089721679688, "completions/mean_terminated_length": 577.7019653320312, "completions/min_length": 159.0, "completions/min_terminated_length": 159.0, "epoch": 6.765014577259475, "grad_norm": 0.12850549817085266, "learning_rate": 1e-06, "loss": -0.0466, "num_tokens": 430871223.0, "reward": 0.707589328289032, "reward_std": 0.14263710379600525, "rewards/verify_math_reward/mean": 0.7075892686843872, "rewards/verify_math_reward/std": 0.45512402057647705, "step": 724 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1071428571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3886.0, "completions/mean_length": 1013.01123046875, "completions/mean_terminated_length": 643.052490234375, "completions/min_length": 111.0, "completions/min_terminated_length": 111.0, "epoch": 6.774344023323615, "grad_norm": 0.14496265351772308, "learning_rate": 1e-06, "loss": -0.0438, "num_tokens": 431471313.0, "reward": 0.609375, "reward_std": 0.16386516392230988, "rewards/verify_math_reward/mean": 0.609375, "rewards/verify_math_reward/std": 0.48816296458244324, "step": 725 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0948660714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3083.0, "completions/mean_length": 942.8683471679688, "completions/mean_terminated_length": 612.39208984375, "completions/min_length": 167.0, "completions/min_terminated_length": 167.0, "epoch": 6.783673469387755, "grad_norm": 0.16144829988479614, "learning_rate": 1e-06, "loss": -0.0452, "num_tokens": 432055411.0, "reward": 0.6729910969734192, "reward_std": 0.1521814614534378, "rewards/verify_math_reward/mean": 0.6729910969734192, "rewards/verify_math_reward/std": 0.46938255429267883, "step": 726 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1138392857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2320.0, "completions/mean_length": 1024.888427734375, "completions/mean_terminated_length": 630.3626708984375, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 6.793002915451895, "grad_norm": 0.16446854174137115, "learning_rate": 1e-06, "loss": -0.0607, "num_tokens": 432646319.0, "reward": 0.5993303656578064, "reward_std": 0.18010301887989044, "rewards/verify_math_reward/mean": 0.5993303656578064, "rewards/verify_math_reward/std": 0.49030786752700806, "step": 727 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1283482142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3900.0, "completions/mean_length": 1074.12841796875, "completions/mean_terminated_length": 629.1664428710938, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 6.802332361516035, "grad_norm": 0.1925697773694992, "learning_rate": 1e-06, "loss": -0.0472, "num_tokens": 433221706.0, "reward": 0.5870535969734192, "reward_std": 0.18829300999641418, "rewards/verify_math_reward/mean": 0.5870535969734192, "rewards/verify_math_reward/std": 0.49263837933540344, "step": 728 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1071428571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3842.0, "completions/mean_length": 990.4285888671875, "completions/mean_terminated_length": 617.760009765625, "completions/min_length": 137.0, "completions/min_terminated_length": 137.0, "epoch": 6.811661807580175, "grad_norm": 0.1481935977935791, "learning_rate": 1e-06, "loss": -0.0512, "num_tokens": 433797290.0, "reward": 0.6808035969734192, "reward_std": 0.16597020626068115, "rewards/verify_math_reward/mean": 0.6808035969734192, "rewards/verify_math_reward/std": 0.4664256274700165, "step": 729 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1272321428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3786.0, "completions/mean_length": 1064.4654541015625, "completions/mean_terminated_length": 622.5281372070312, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 6.820991253644315, "grad_norm": 0.1566762775182724, "learning_rate": 1e-06, "loss": -0.0441, "num_tokens": 434360643.0, "reward": 0.606026828289032, "reward_std": 0.17442122101783752, "rewards/verify_math_reward/mean": 0.6060267686843872, "rewards/verify_math_reward/std": 0.48890191316604614, "step": 730 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1149553571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 4006.0, "completions/mean_length": 1022.6785888671875, "completions/mean_terminated_length": 623.49560546875, "completions/min_length": 170.0, "completions/min_terminated_length": 170.0, "epoch": 6.830320699708455, "grad_norm": 0.1799861043691635, "learning_rate": 1e-06, "loss": -0.0309, "num_tokens": 434943875.0, "reward": 0.629464328289032, "reward_std": 0.16037283837795258, "rewards/verify_math_reward/mean": 0.6294642686843872, "rewards/verify_math_reward/std": 0.4832179844379425, "step": 731 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1026785714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3300.0, "completions/mean_length": 995.41748046875, "completions/mean_terminated_length": 640.6243896484375, "completions/min_length": 105.0, "completions/min_terminated_length": 105.0, "epoch": 6.839650145772595, "grad_norm": 0.15543977916240692, "learning_rate": 1e-06, "loss": -0.0294, "num_tokens": 435549521.0, "reward": 0.6350446939468384, "reward_std": 0.16244256496429443, "rewards/verify_math_reward/mean": 0.6350446343421936, "rewards/verify_math_reward/std": 0.481686532497406, "step": 732 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0892857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3254.0, "completions/mean_length": 913.9263916015625, "completions/mean_terminated_length": 601.9583740234375, "completions/min_length": 137.0, "completions/min_terminated_length": 137.0, "epoch": 6.848979591836734, "grad_norm": 0.14535638689994812, "learning_rate": 1e-06, "loss": -0.057, "num_tokens": 436141807.0, "reward": 0.6417410969734192, "reward_std": 0.1573990434408188, "rewards/verify_math_reward/mean": 0.6417410969734192, "rewards/verify_math_reward/std": 0.47975656390190125, "step": 733 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1194196428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 2443.0, "completions/mean_length": 1001.9732666015625, "completions/mean_terminated_length": 582.377685546875, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 6.858309037900875, "grad_norm": 0.1419903188943863, "learning_rate": 1e-06, "loss": -0.0394, "num_tokens": 436690015.0, "reward": 0.6361607313156128, "reward_std": 0.14409995079040527, "rewards/verify_math_reward/mean": 0.6361607313156128, "rewards/verify_math_reward/std": 0.4813718795776367, "step": 734 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1484375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3257.0, "completions/mean_length": 1119.2410888671875, "completions/mean_terminated_length": 600.3565063476562, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 6.867638483965014, "grad_norm": 0.15156620740890503, "learning_rate": 1e-06, "loss": -0.0817, "num_tokens": 437247871.0, "reward": 0.629464328289032, "reward_std": 0.15469737350940704, "rewards/verify_math_reward/mean": 0.6294642686843872, "rewards/verify_math_reward/std": 0.4832179844379425, "step": 735 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0859375, "completions/max_length": 4096.0, "completions/max_terminated_length": 2847.0, "completions/mean_length": 913.8527221679688, "completions/mean_terminated_length": 614.6764526367188, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 6.876967930029155, "grad_norm": 0.148374542593956, "learning_rate": 1e-06, "loss": -0.0378, "num_tokens": 437851427.0, "reward": 0.6908482313156128, "reward_std": 0.14635653793811798, "rewards/verify_math_reward/mean": 0.6908482313156128, "rewards/verify_math_reward/std": 0.46240198612213135, "step": 736 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0636160714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3818.0, "completions/mean_length": 819.4017944335938, "completions/mean_terminated_length": 596.7962036132812, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 6.886297376093294, "grad_norm": 0.14753498136997223, "learning_rate": 1e-06, "loss": -0.0375, "num_tokens": 438451155.0, "reward": 0.6651785969734192, "reward_std": 0.1374947875738144, "rewards/verify_math_reward/mean": 0.6651785969734192, "rewards/verify_math_reward/std": 0.47219160199165344, "step": 737 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1037946428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3703.0, "completions/mean_length": 1006.2266235351562, "completions/mean_terminated_length": 648.38232421875, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 6.895626822157435, "grad_norm": 0.14850552380084991, "learning_rate": 1e-06, "loss": -0.0263, "num_tokens": 439065878.0, "reward": 0.6037946939468384, "reward_std": 0.1515752673149109, "rewards/verify_math_reward/mean": 0.6037946343421936, "rewards/verify_math_reward/std": 0.48938122391700745, "step": 738 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3984.0, "completions/mean_length": 942.4163208007812, "completions/mean_terminated_length": 585.9241943359375, "completions/min_length": 106.0, "completions/min_terminated_length": 106.0, "epoch": 6.904956268221574, "grad_norm": 0.15253846347332, "learning_rate": 1e-06, "loss": -0.028, "num_tokens": 439618731.0, "reward": 0.6651785969734192, "reward_std": 0.14977288246154785, "rewards/verify_math_reward/mean": 0.6651785969734192, "rewards/verify_math_reward/std": 0.47219157218933105, "step": 739 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1238839285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3940.0, "completions/mean_length": 1075.78466796875, "completions/mean_terminated_length": 648.7222900390625, "completions/min_length": 157.0, "completions/min_terminated_length": 157.0, "epoch": 6.914285714285715, "grad_norm": 0.13778938353061676, "learning_rate": 1e-06, "loss": -0.0507, "num_tokens": 440222378.0, "reward": 0.5948660969734192, "reward_std": 0.1310618817806244, "rewards/verify_math_reward/mean": 0.5948660969734192, "rewards/verify_math_reward/std": 0.49119213223457336, "step": 740 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0915178571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3878.0, "completions/mean_length": 904.69091796875, "completions/mean_terminated_length": 583.2075805664062, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 6.923615160349854, "grad_norm": 0.16547341644763947, "learning_rate": 1e-06, "loss": -0.0479, "num_tokens": 440777597.0, "reward": 0.6808035969734192, "reward_std": 0.1654791235923767, "rewards/verify_math_reward/mean": 0.6808035969734192, "rewards/verify_math_reward/std": 0.4664256274700165, "step": 741 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0926339285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3951.0, "completions/mean_length": 931.1998291015625, "completions/mean_terminated_length": 608.10205078125, "completions/min_length": 120.0, "completions/min_terminated_length": 120.0, "epoch": 6.932944606413994, "grad_norm": 0.1453477442264557, "learning_rate": 1e-06, "loss": -0.0428, "num_tokens": 441356488.0, "reward": 0.6517857313156128, "reward_std": 0.14203867316246033, "rewards/verify_math_reward/mean": 0.6517857313156128, "rewards/verify_math_reward/std": 0.47667041420936584, "step": 742 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0926339285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2650.0, "completions/mean_length": 905.6473388671875, "completions/mean_terminated_length": 579.94091796875, "completions/min_length": 161.0, "completions/min_terminated_length": 161.0, "epoch": 6.942274052478134, "grad_norm": 0.13660424947738647, "learning_rate": 1e-06, "loss": -0.0392, "num_tokens": 441922948.0, "reward": 0.640625, "reward_std": 0.14992626011371613, "rewards/verify_math_reward/mean": 0.640625, "rewards/verify_math_reward/std": 0.48008525371551514, "step": 743 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.140625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3565.0, "completions/mean_length": 1148.8125, "completions/mean_terminated_length": 666.5454711914062, "completions/min_length": 109.0, "completions/min_terminated_length": 109.0, "epoch": 6.9516034985422746, "grad_norm": 0.15369150042533875, "learning_rate": 1e-06, "loss": -0.0847, "num_tokens": 442522492.0, "reward": 0.5881696939468384, "reward_std": 0.17758752405643463, "rewards/verify_math_reward/mean": 0.5881696343421936, "rewards/verify_math_reward/std": 0.4924395978450775, "step": 744 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1037946428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3203.0, "completions/mean_length": 980.9855346679688, "completions/mean_terminated_length": 620.2178955078125, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 6.960932944606414, "grad_norm": 0.13597574830055237, "learning_rate": 1e-06, "loss": -0.0526, "num_tokens": 443111087.0, "reward": 0.6674107313156128, "reward_std": 0.13699373602867126, "rewards/verify_math_reward/mean": 0.6674107313156128, "rewards/verify_math_reward/std": 0.47140389680862427, "step": 745 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0982142857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2414.0, "completions/mean_length": 947.6395263671875, "completions/mean_terminated_length": 604.748779296875, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 6.970262390670554, "grad_norm": 0.15619629621505737, "learning_rate": 1e-06, "loss": -0.055, "num_tokens": 443690644.0, "reward": 0.6517857313156128, "reward_std": 0.14707191288471222, "rewards/verify_math_reward/mean": 0.6517857313156128, "rewards/verify_math_reward/std": 0.47667041420936584, "step": 746 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1104910714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2988.0, "completions/mean_length": 975.1797485351562, "completions/mean_terminated_length": 587.5244750976562, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 6.979591836734694, "grad_norm": 0.17877110838890076, "learning_rate": 1e-06, "loss": -0.0382, "num_tokens": 444245413.0, "reward": 0.637276828289032, "reward_std": 0.15500116348266602, "rewards/verify_math_reward/mean": 0.6372767686843872, "rewards/verify_math_reward/std": 0.481054425239563, "step": 747 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0959821428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3919.0, "completions/mean_length": 935.318115234375, "completions/mean_terminated_length": 599.7395629882812, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 6.988921282798834, "grad_norm": 0.16470494866371155, "learning_rate": 1e-06, "loss": -0.0359, "num_tokens": 444811578.0, "reward": 0.6886160969734192, "reward_std": 0.15000112354755402, "rewards/verify_math_reward/mean": 0.6886160969734192, "rewards/verify_math_reward/std": 0.46331802010536194, "step": 748 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.13068181818181823, "completions/max_length": 4096.0, "completions/max_terminated_length": 2026.0, "completions/mean_length": 1023.7614135742188, "completions/mean_terminated_length": 561.9215698242188, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 6.998250728862974, "grad_norm": 0.1541009545326233, "learning_rate": 1e-06, "loss": -0.0582, "num_tokens": 445353419.0, "reward": 0.606026828289032, "reward_std": 0.1420711725950241, "rewards/verify_math_reward/mean": 0.6060267686843872, "rewards/verify_math_reward/std": 0.48890194296836853, "step": 749 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1160714285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3964.0, "completions/mean_length": 1002.380615234375, "completions/mean_terminated_length": 596.147705078125, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 7.0093294460641395, "grad_norm": 0.17025308310985565, "learning_rate": 1e-06, "loss": -0.0418, "num_tokens": 445921456.0, "reward": 0.6462053656578064, "reward_std": 0.16296431422233582, "rewards/verify_math_reward/mean": 0.6462053656578064, "rewards/verify_math_reward/std": 0.478413462638855, "step": 750 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1026785714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3364.0, "completions/mean_length": 930.5301513671875, "completions/mean_terminated_length": 568.3121948242188, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 7.01865889212828, "grad_norm": 0.15315227210521698, "learning_rate": 1e-06, "loss": -0.073, "num_tokens": 446469563.0, "reward": 0.6852678656578064, "reward_std": 0.16258524358272552, "rewards/verify_math_reward/mean": 0.6852678656578064, "rewards/verify_math_reward/std": 0.46466848254203796, "step": 751 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1037946428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3210.0, "completions/mean_length": 967.51904296875, "completions/mean_terminated_length": 605.1917724609375, "completions/min_length": 170.0, "completions/min_terminated_length": 170.0, "epoch": 7.0279883381924195, "grad_norm": 0.14817029237747192, "learning_rate": 1e-06, "loss": -0.0538, "num_tokens": 447039844.0, "reward": 0.6584821939468384, "reward_std": 0.1579635739326477, "rewards/verify_math_reward/mean": 0.6584821343421936, "rewards/verify_math_reward/std": 0.4744836091995239, "step": 752 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1238839285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3686.0, "completions/mean_length": 1061.4320068359375, "completions/mean_terminated_length": 632.3401489257812, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 7.03731778425656, "grad_norm": 0.12313847243785858, "learning_rate": 1e-06, "loss": -0.0621, "num_tokens": 447621887.0, "reward": 0.652901828289032, "reward_std": 0.124261274933815, "rewards/verify_math_reward/mean": 0.6529017686843872, "rewards/verify_math_reward/std": 0.47631317377090454, "step": 753 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.109375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3793.0, "completions/mean_length": 982.216552734375, "completions/mean_terminated_length": 599.822021484375, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 7.0466472303206995, "grad_norm": 0.16664567589759827, "learning_rate": 1e-06, "loss": -0.0284, "num_tokens": 448187449.0, "reward": 0.6674107313156128, "reward_std": 0.13154971599578857, "rewards/verify_math_reward/mean": 0.6674107313156128, "rewards/verify_math_reward/std": 0.47140392661094666, "step": 754 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0982142857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3134.0, "completions/mean_length": 973.4129638671875, "completions/mean_terminated_length": 633.3292236328125, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 7.05597667638484, "grad_norm": 0.15151433646678925, "learning_rate": 1e-06, "loss": -0.0368, "num_tokens": 448799859.0, "reward": 0.6484375, "reward_std": 0.15251775085926056, "rewards/verify_math_reward/mean": 0.6484375, "rewards/verify_math_reward/std": 0.4777248501777649, "step": 755 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0959821428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3613.0, "completions/mean_length": 969.2623291015625, "completions/mean_terminated_length": 637.2876586914062, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 7.0653061224489795, "grad_norm": 0.16572339832782745, "learning_rate": 1e-06, "loss": -0.0277, "num_tokens": 449405134.0, "reward": 0.6696428656578064, "reward_std": 0.1702873855829239, "rewards/verify_math_reward/mean": 0.6696428656578064, "rewards/verify_math_reward/std": 0.47060438990592957, "step": 756 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3684.0, "completions/mean_length": 921.01904296875, "completions/mean_terminated_length": 562.1080932617188, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 7.07463556851312, "grad_norm": 0.1490185409784317, "learning_rate": 1e-06, "loss": -0.0276, "num_tokens": 449944871.0, "reward": 0.6640625, "reward_std": 0.12549659609794617, "rewards/verify_math_reward/mean": 0.6640625, "rewards/verify_math_reward/std": 0.4725809693336487, "step": 757 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1116071428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3951.0, "completions/mean_length": 989.7902221679688, "completions/mean_terminated_length": 599.5628051757812, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 7.0839650145772595, "grad_norm": 0.17254501581192017, "learning_rate": 1e-06, "loss": -0.0489, "num_tokens": 450510115.0, "reward": 0.6830357313156128, "reward_std": 0.16882342100143433, "rewards/verify_math_reward/mean": 0.6830357313156128, "rewards/verify_math_reward/std": 0.46555325388908386, "step": 758 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0948660714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2861.0, "completions/mean_length": 924.8203735351562, "completions/mean_terminated_length": 592.4525146484375, "completions/min_length": 97.0, "completions/min_terminated_length": 97.0, "epoch": 7.093294460641399, "grad_norm": 0.15800072252750397, "learning_rate": 1e-06, "loss": -0.0347, "num_tokens": 451082794.0, "reward": 0.637276828289032, "reward_std": 0.15526078641414642, "rewards/verify_math_reward/mean": 0.6372767686843872, "rewards/verify_math_reward/std": 0.481054425239563, "step": 759 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0837053571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3650.0, "completions/mean_length": 909.0703735351562, "completions/mean_terminated_length": 617.9379272460938, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 7.1026239067055394, "grad_norm": 0.14929740130901337, "learning_rate": 1e-06, "loss": -0.0263, "num_tokens": 451682041.0, "reward": 0.668526828289032, "reward_std": 0.15680059790611267, "rewards/verify_math_reward/mean": 0.6685267686843872, "rewards/verify_math_reward/std": 0.4710056483745575, "step": 760 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1328125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3891.0, "completions/mean_length": 1049.28466796875, "completions/mean_terminated_length": 582.6705322265625, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 7.111953352769679, "grad_norm": 0.13264919817447662, "learning_rate": 1e-06, "loss": -0.0653, "num_tokens": 452217504.0, "reward": 0.660714328289032, "reward_std": 0.11817465722560883, "rewards/verify_math_reward/mean": 0.6607142686843872, "rewards/verify_math_reward/std": 0.4737313687801361, "step": 761 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1205357142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3851.0, "completions/mean_length": 1064.227783203125, "completions/mean_terminated_length": 648.70556640625, "completions/min_length": 159.0, "completions/min_terminated_length": 159.0, "epoch": 7.121282798833819, "grad_norm": 0.13209392130374908, "learning_rate": 1e-06, "loss": -0.0529, "num_tokens": 452819028.0, "reward": 0.6116071939468384, "reward_std": 0.13383881747722626, "rewards/verify_math_reward/mean": 0.6116071343421936, "rewards/verify_math_reward/std": 0.48765692114830017, "step": 762 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0915178571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3683.0, "completions/mean_length": 948.7813110351562, "completions/mean_terminated_length": 631.7395629882812, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 7.130612244897959, "grad_norm": 0.16158021986484528, "learning_rate": 1e-06, "loss": -0.0274, "num_tokens": 453428016.0, "reward": 0.6205357313156128, "reward_std": 0.18678276240825653, "rewards/verify_math_reward/mean": 0.6205357313156128, "rewards/verify_math_reward/std": 0.4855247139930725, "step": 763 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 2822.0, "completions/mean_length": 890.9029541015625, "completions/mean_terminated_length": 619.2845458984375, "completions/min_length": 119.0, "completions/min_terminated_length": 119.0, "epoch": 7.139941690962099, "grad_norm": 0.15226466953754425, "learning_rate": 1e-06, "loss": -0.028, "num_tokens": 454019745.0, "reward": 0.6964285969734192, "reward_std": 0.1516169160604477, "rewards/verify_math_reward/mean": 0.6964285969734192, "rewards/verify_math_reward/std": 0.4600566029548645, "step": 764 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1171875, "completions/max_length": 4096.0, "completions/max_terminated_length": 3380.0, "completions/mean_length": 1041.1239013671875, "completions/mean_terminated_length": 635.609375, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 7.149271137026239, "grad_norm": 0.13410533964633942, "learning_rate": 1e-06, "loss": -0.0143, "num_tokens": 454621984.0, "reward": 0.6473214626312256, "reward_std": 0.14710794389247894, "rewards/verify_math_reward/mean": 0.6473214030265808, "rewards/verify_math_reward/std": 0.47807061672210693, "step": 765 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1160714285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3638.0, "completions/mean_length": 989.8973388671875, "completions/mean_terminated_length": 582.0252685546875, "completions/min_length": 169.0, "completions/min_terminated_length": 169.0, "epoch": 7.158600583090379, "grad_norm": 0.26457151770591736, "learning_rate": 1e-06, "loss": -0.0207, "num_tokens": 455165900.0, "reward": 0.6729910969734192, "reward_std": 0.10431172698736191, "rewards/verify_math_reward/mean": 0.6729910969734192, "rewards/verify_math_reward/std": 0.46938255429267883, "step": 766 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1004464285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2437.0, "completions/mean_length": 978.0636596679688, "completions/mean_terminated_length": 629.9069213867188, "completions/min_length": 137.0, "completions/min_terminated_length": 137.0, "epoch": 7.167930029154519, "grad_norm": 0.14636722207069397, "learning_rate": 1e-06, "loss": -0.0351, "num_tokens": 455766989.0, "reward": 0.6082589626312256, "reward_std": 0.15860366821289062, "rewards/verify_math_reward/mean": 0.6082589030265808, "rewards/verify_math_reward/std": 0.4884119927883148, "step": 767 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1116071428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3490.0, "completions/mean_length": 975.2277221679688, "completions/mean_terminated_length": 583.1708374023438, "completions/min_length": 181.0, "completions/min_terminated_length": 181.0, "epoch": 7.1772594752186585, "grad_norm": 0.15124312043190002, "learning_rate": 1e-06, "loss": -0.0414, "num_tokens": 456317273.0, "reward": 0.676339328289032, "reward_std": 0.14913208782672882, "rewards/verify_math_reward/mean": 0.6763392686843872, "rewards/verify_math_reward/std": 0.4681335985660553, "step": 768 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0870535714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2608.0, "completions/mean_length": 921.6317138671875, "completions/mean_terminated_length": 618.9413452148438, "completions/min_length": 162.0, "completions/min_terminated_length": 162.0, "epoch": 7.186588921282799, "grad_norm": 0.15408408641815186, "learning_rate": 1e-06, "loss": -0.0599, "num_tokens": 456911607.0, "reward": 0.6718750596046448, "reward_std": 0.1886628121137619, "rewards/verify_math_reward/mean": 0.671875, "rewards/verify_math_reward/std": 0.46979284286499023, "step": 769 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1160714285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2836.0, "completions/mean_length": 1032.5279541015625, "completions/mean_terminated_length": 630.2537841796875, "completions/min_length": 122.0, "completions/min_terminated_length": 122.0, "epoch": 7.1959183673469385, "grad_norm": 0.1400507092475891, "learning_rate": 1e-06, "loss": -0.0645, "num_tokens": 457503288.0, "reward": 0.6495535969734192, "reward_std": 0.13929423689842224, "rewards/verify_math_reward/mean": 0.6495535969734192, "rewards/verify_math_reward/std": 0.477376252412796, "step": 770 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1104910714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3766.0, "completions/mean_length": 1033.84716796875, "completions/mean_terminated_length": 653.4793090820312, "completions/min_length": 162.0, "completions/min_terminated_length": 162.0, "epoch": 7.205247813411079, "grad_norm": 0.14792729914188385, "learning_rate": 1e-06, "loss": -0.0609, "num_tokens": 458109543.0, "reward": 0.6160714626312256, "reward_std": 0.13737602531909943, "rewards/verify_math_reward/mean": 0.6160714030265808, "rewards/verify_math_reward/std": 0.486612468957901, "step": 771 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 4096.0, "completions/max_terminated_length": 4023.0, "completions/mean_length": 1061.501220703125, "completions/mean_terminated_length": 628.0012817382812, "completions/min_length": 98.0, "completions/min_terminated_length": 98.0, "epoch": 7.214577259475218, "grad_norm": 0.15368112921714783, "learning_rate": 1e-06, "loss": -0.0489, "num_tokens": 458695288.0, "reward": 0.6506696939468384, "reward_std": 0.1612711399793625, "rewards/verify_math_reward/mean": 0.6506696343421936, "rewards/verify_math_reward/std": 0.47702476382255554, "step": 772 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.109375, "completions/max_length": 4096.0, "completions/max_terminated_length": 2485.0, "completions/mean_length": 947.9676513671875, "completions/mean_terminated_length": 561.3671875, "completions/min_length": 165.0, "completions/min_terminated_length": 165.0, "epoch": 7.223906705539359, "grad_norm": 0.17014342546463013, "learning_rate": 1e-06, "loss": -0.0722, "num_tokens": 459238139.0, "reward": 0.6116071939468384, "reward_std": 0.15965421497821808, "rewards/verify_math_reward/mean": 0.6116071343421936, "rewards/verify_math_reward/std": 0.4876568913459778, "step": 773 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0814732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3735.0, "completions/mean_length": 879.8114013671875, "completions/mean_terminated_length": 594.535888671875, "completions/min_length": 160.0, "completions/min_terminated_length": 160.0, "epoch": 7.233236151603498, "grad_norm": 0.16187119483947754, "learning_rate": 1e-06, "loss": -0.0145, "num_tokens": 459819258.0, "reward": 0.6316964626312256, "reward_std": 0.16394074261188507, "rewards/verify_math_reward/mean": 0.6316964030265808, "rewards/verify_math_reward/std": 0.4826137125492096, "step": 774 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1071428571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3808.0, "completions/mean_length": 1040.4710693359375, "completions/mean_terminated_length": 673.8074951171875, "completions/min_length": 165.0, "completions/min_terminated_length": 165.0, "epoch": 7.242565597667639, "grad_norm": 0.14009040594100952, "learning_rate": 1e-06, "loss": -0.0546, "num_tokens": 460449592.0, "reward": 0.6707589626312256, "reward_std": 0.16818967461585999, "rewards/verify_math_reward/mean": 0.6707589030265808, "rewards/verify_math_reward/std": 0.4702001214027405, "step": 775 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1004464285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3036.0, "completions/mean_length": 983.7344360351562, "completions/mean_terminated_length": 636.2109375, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 7.251895043731778, "grad_norm": 0.15002386271953583, "learning_rate": 1e-06, "loss": -0.0631, "num_tokens": 461048362.0, "reward": 0.65625, "reward_std": 0.1685623973608017, "rewards/verify_math_reward/mean": 0.65625, "rewards/verify_math_reward/std": 0.4752241373062134, "step": 776 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0948660714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3524.0, "completions/mean_length": 930.8270263671875, "completions/mean_terminated_length": 599.0887451171875, "completions/min_length": 159.0, "completions/min_terminated_length": 159.0, "epoch": 7.261224489795918, "grad_norm": 0.1424115151166916, "learning_rate": 1e-06, "loss": -0.0463, "num_tokens": 461616487.0, "reward": 0.6863839626312256, "reward_std": 0.1451198309659958, "rewards/verify_math_reward/mean": 0.6863839030265808, "rewards/verify_math_reward/std": 0.46422144770622253, "step": 777 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0792410714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3574.0, "completions/mean_length": 919.7489013671875, "completions/mean_terminated_length": 646.3988037109375, "completions/min_length": 104.0, "completions/min_terminated_length": 104.0, "epoch": 7.270553935860058, "grad_norm": 0.14443738758563995, "learning_rate": 1e-06, "loss": -0.0261, "num_tokens": 462235238.0, "reward": 0.6819196939468384, "reward_std": 0.15488353371620178, "rewards/verify_math_reward/mean": 0.6819196343421936, "rewards/verify_math_reward/std": 0.46599099040031433, "step": 778 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1573660714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3732.0, "completions/mean_length": 1221.9866943359375, "completions/mean_terminated_length": 685.2503662109375, "completions/min_length": 176.0, "completions/min_terminated_length": 176.0, "epoch": 7.279883381924198, "grad_norm": 0.18153712153434753, "learning_rate": 1e-06, "loss": -0.0503, "num_tokens": 462845138.0, "reward": 0.546875, "reward_std": 0.20715807378292084, "rewards/verify_math_reward/mean": 0.546875, "rewards/verify_math_reward/std": 0.4980759024620056, "step": 779 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1004464285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2835.0, "completions/mean_length": 972.5569458007812, "completions/mean_terminated_length": 623.7853393554688, "completions/min_length": 120.0, "completions/min_terminated_length": 120.0, "epoch": 7.289212827988338, "grad_norm": 0.14053218066692352, "learning_rate": 1e-06, "loss": -0.0342, "num_tokens": 463436133.0, "reward": 0.6361607313156128, "reward_std": 0.139630526304245, "rewards/verify_math_reward/mean": 0.6361607313156128, "rewards/verify_math_reward/std": 0.4813718795776367, "step": 780 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0848214285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3962.0, "completions/mean_length": 902.7254638671875, "completions/mean_terminated_length": 606.763427734375, "completions/min_length": 121.0, "completions/min_terminated_length": 121.0, "epoch": 7.298542274052478, "grad_norm": 0.14001287519931793, "learning_rate": 1e-06, "loss": -0.0687, "num_tokens": 464015983.0, "reward": 0.6886160969734192, "reward_std": 0.1677689254283905, "rewards/verify_math_reward/mean": 0.6886160969734192, "rewards/verify_math_reward/std": 0.46331802010536194, "step": 781 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1138392857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4016.0, "completions/mean_length": 1044.6451416015625, "completions/mean_terminated_length": 652.6574096679688, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 7.307871720116618, "grad_norm": 0.16297949850559235, "learning_rate": 1e-06, "loss": -0.0346, "num_tokens": 464627601.0, "reward": 0.6395089626312256, "reward_std": 0.17611047625541687, "rewards/verify_math_reward/mean": 0.6395089030265808, "rewards/verify_math_reward/std": 0.4804111421108246, "step": 782 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1049107142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2878.0, "completions/mean_length": 986.1417846679688, "completions/mean_terminated_length": 621.6446533203125, "completions/min_length": 110.0, "completions/min_terminated_length": 110.0, "epoch": 7.317201166180758, "grad_norm": 0.14394626021385193, "learning_rate": 1e-06, "loss": -0.0294, "num_tokens": 465211504.0, "reward": 0.6573660969734192, "reward_std": 0.15000224113464355, "rewards/verify_math_reward/mean": 0.6573660969734192, "rewards/verify_math_reward/std": 0.47485533356666565, "step": 783 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0926339285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3153.0, "completions/mean_length": 921.0301513671875, "completions/mean_terminated_length": 596.8942260742188, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 7.326530612244898, "grad_norm": 0.15465307235717773, "learning_rate": 1e-06, "loss": -0.0799, "num_tokens": 465786723.0, "reward": 0.6975446939468384, "reward_std": 0.17600058019161224, "rewards/verify_math_reward/mean": 0.6975446343421936, "rewards/verify_math_reward/std": 0.45957788825035095, "step": 784 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0825892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3377.0, "completions/mean_length": 900.732177734375, "completions/mean_terminated_length": 613.0802612304688, "completions/min_length": 156.0, "completions/min_terminated_length": 156.0, "epoch": 7.335860058309038, "grad_norm": 0.13416793942451477, "learning_rate": 1e-06, "loss": -0.0431, "num_tokens": 466372315.0, "reward": 0.6819196939468384, "reward_std": 0.1421799510717392, "rewards/verify_math_reward/mean": 0.6819196343421936, "rewards/verify_math_reward/std": 0.46599099040031433, "step": 785 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0870535714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3611.0, "completions/mean_length": 924.8359985351562, "completions/mean_terminated_length": 622.4511108398438, "completions/min_length": 158.0, "completions/min_terminated_length": 158.0, "epoch": 7.345189504373177, "grad_norm": 0.14363907277584076, "learning_rate": 1e-06, "loss": -0.0271, "num_tokens": 466971464.0, "reward": 0.6183035969734192, "reward_std": 0.14354610443115234, "rewards/verify_math_reward/mean": 0.6183035969734192, "rewards/verify_math_reward/std": 0.4860740303993225, "step": 786 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1238839285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3771.0, "completions/mean_length": 1063.4296875, "completions/mean_terminated_length": 634.620361328125, "completions/min_length": 105.0, "completions/min_terminated_length": 105.0, "epoch": 7.354518950437318, "grad_norm": 0.15622581541538239, "learning_rate": 1e-06, "loss": -0.0362, "num_tokens": 467549897.0, "reward": 0.652901828289032, "reward_std": 0.13921935856342316, "rewards/verify_math_reward/mean": 0.6529017686843872, "rewards/verify_math_reward/std": 0.47631317377090454, "step": 787 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0926339285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 4048.0, "completions/mean_length": 977.0938110351562, "completions/mean_terminated_length": 658.681396484375, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 7.363848396501457, "grad_norm": 0.15423189103603363, "learning_rate": 1e-06, "loss": -0.0425, "num_tokens": 468180973.0, "reward": 0.6517857313156128, "reward_std": 0.15936140716075897, "rewards/verify_math_reward/mean": 0.6517857313156128, "rewards/verify_math_reward/std": 0.47667041420936584, "step": 788 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0948660714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3122.0, "completions/mean_length": 928.0714721679688, "completions/mean_terminated_length": 596.0443725585938, "completions/min_length": 170.0, "completions/min_terminated_length": 170.0, "epoch": 7.373177842565598, "grad_norm": 0.14016598463058472, "learning_rate": 1e-06, "loss": -0.0115, "num_tokens": 468750205.0, "reward": 0.7064732313156128, "reward_std": 0.1352359503507614, "rewards/verify_math_reward/mean": 0.7064732313156128, "rewards/verify_math_reward/std": 0.4556320011615753, "step": 789 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0725446428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 2782.0, "completions/mean_length": 848.5178833007812, "completions/mean_terminated_length": 594.5030517578125, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 7.382507288629737, "grad_norm": 0.14416192471981049, "learning_rate": 1e-06, "loss": -0.0284, "num_tokens": 469334237.0, "reward": 0.699776828289032, "reward_std": 0.15526191890239716, "rewards/verify_math_reward/mean": 0.6997767686843872, "rewards/verify_math_reward/std": 0.4586109220981598, "step": 790 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0859375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3404.0, "completions/mean_length": 886.6295166015625, "completions/mean_terminated_length": 584.893798828125, "completions/min_length": 123.0, "completions/min_terminated_length": 123.0, "epoch": 7.391836734693878, "grad_norm": 0.15903300046920776, "learning_rate": 1e-06, "loss": -0.0207, "num_tokens": 469903841.0, "reward": 0.7031250596046448, "reward_std": 0.13200506567955017, "rewards/verify_math_reward/mean": 0.703125, "rewards/verify_math_reward/std": 0.4571361541748047, "step": 791 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0837053571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3602.0, "completions/mean_length": 870.7902221679688, "completions/mean_terminated_length": 576.1608276367188, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 7.401166180758017, "grad_norm": 0.15618284046649933, "learning_rate": 1e-06, "loss": -0.0152, "num_tokens": 470474541.0, "reward": 0.6573660969734192, "reward_std": 0.14060944318771362, "rewards/verify_math_reward/mean": 0.6573660969734192, "rewards/verify_math_reward/std": 0.47485533356666565, "step": 792 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1049107142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2756.0, "completions/mean_length": 954.94873046875, "completions/mean_terminated_length": 586.7955322265625, "completions/min_length": 174.0, "completions/min_terminated_length": 174.0, "epoch": 7.410495626822158, "grad_norm": 0.14724324643611908, "learning_rate": 1e-06, "loss": -0.059, "num_tokens": 471034199.0, "reward": 0.6964285969734192, "reward_std": 0.12918534874916077, "rewards/verify_math_reward/mean": 0.6964285969734192, "rewards/verify_math_reward/std": 0.4600566029548645, "step": 793 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0848214285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 4016.0, "completions/mean_length": 923.33935546875, "completions/mean_terminated_length": 629.2877807617188, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 7.419825072886297, "grad_norm": 0.13723978400230408, "learning_rate": 1e-06, "loss": -0.0633, "num_tokens": 471636063.0, "reward": 0.691964328289032, "reward_std": 0.15713873505592346, "rewards/verify_math_reward/mean": 0.6919642686843872, "rewards/verify_math_reward/std": 0.4619392454624176, "step": 794 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0926339285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2838.0, "completions/mean_length": 902.7031860351562, "completions/mean_terminated_length": 576.6961669921875, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 7.429154518950437, "grad_norm": 0.13166898488998413, "learning_rate": 1e-06, "loss": -0.051, "num_tokens": 472185189.0, "reward": 0.7008928656578064, "reward_std": 0.1248999685049057, "rewards/verify_math_reward/mean": 0.7008928656578064, "rewards/verify_math_reward/std": 0.458122581243515, "step": 795 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1450892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3947.0, "completions/mean_length": 1197.5625, "completions/mean_terminated_length": 705.6605834960938, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 7.438483965014577, "grad_norm": 0.14812512695789337, "learning_rate": 1e-06, "loss": -0.0419, "num_tokens": 472825341.0, "reward": 0.5725446939468384, "reward_std": 0.16634789109230042, "rewards/verify_math_reward/mean": 0.5725446343421936, "rewards/verify_math_reward/std": 0.49498558044433594, "step": 796 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1328125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3671.0, "completions/mean_length": 1129.048095703125, "completions/mean_terminated_length": 674.6499633789062, "completions/min_length": 160.0, "completions/min_terminated_length": 160.0, "epoch": 7.447813411078717, "grad_norm": 0.17269019782543182, "learning_rate": 1e-06, "loss": -0.0854, "num_tokens": 473436480.0, "reward": 0.6082589626312256, "reward_std": 0.19043196737766266, "rewards/verify_math_reward/mean": 0.6082589030265808, "rewards/verify_math_reward/std": 0.48841196298599243, "step": 797 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1227678571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3639.0, "completions/mean_length": 1082.1038818359375, "completions/mean_terminated_length": 660.3117065429688, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 7.457142857142857, "grad_norm": 0.16014207899570465, "learning_rate": 1e-06, "loss": -0.0604, "num_tokens": 474051653.0, "reward": 0.6506696939468384, "reward_std": 0.16878922283649445, "rewards/verify_math_reward/mean": 0.6506696343421936, "rewards/verify_math_reward/std": 0.47702476382255554, "step": 798 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1116071428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3215.0, "completions/mean_length": 1040.265625, "completions/mean_terminated_length": 656.37939453125, "completions/min_length": 179.0, "completions/min_terminated_length": 179.0, "epoch": 7.466472303206997, "grad_norm": 0.16901756823062897, "learning_rate": 1e-06, "loss": -0.0898, "num_tokens": 474665787.0, "reward": 0.660714328289032, "reward_std": 0.162215456366539, "rewards/verify_math_reward/mean": 0.6607142686843872, "rewards/verify_math_reward/std": 0.4737313687801361, "step": 799 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0948660714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3149.0, "completions/mean_length": 981.9163208007812, "completions/mean_terminated_length": 655.5326538085938, "completions/min_length": 207.0, "completions/min_terminated_length": 207.0, "epoch": 7.475801749271137, "grad_norm": 0.12940995395183563, "learning_rate": 1e-06, "loss": -0.0486, "num_tokens": 475288656.0, "reward": 0.6886160969734192, "reward_std": 0.13203758001327515, "rewards/verify_math_reward/mean": 0.6886160969734192, "rewards/verify_math_reward/std": 0.46331802010536194, "step": 800 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 4096.0, "completions/max_terminated_length": 2638.0, "completions/mean_length": 1063.0067138671875, "completions/mean_terminated_length": 629.721923828125, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 7.485131195335277, "grad_norm": 0.15580123662948608, "learning_rate": 1e-06, "loss": -0.0486, "num_tokens": 475865158.0, "reward": 0.6205357313156128, "reward_std": 0.16495990753173828, "rewards/verify_math_reward/mean": 0.6205357313156128, "rewards/verify_math_reward/std": 0.4855247139930725, "step": 801 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1741071428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3152.0, "completions/mean_length": 1256.993408203125, "completions/mean_terminated_length": 658.5, "completions/min_length": 161.0, "completions/min_terminated_length": 161.0, "epoch": 7.494460641399417, "grad_norm": 0.15995453298091888, "learning_rate": 1e-06, "loss": -0.1085, "num_tokens": 476437328.0, "reward": 0.621651828289032, "reward_std": 0.1811874955892563, "rewards/verify_math_reward/mean": 0.6216517686843872, "rewards/verify_math_reward/std": 0.4852459728717804, "step": 802 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1328125, "completions/max_length": 4096.0, "completions/max_terminated_length": 2366.0, "completions/mean_length": 1078.6451416015625, "completions/mean_terminated_length": 616.5276489257812, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 7.503790087463557, "grad_norm": 0.15283119678497314, "learning_rate": 1e-06, "loss": -0.0598, "num_tokens": 477011042.0, "reward": 0.6752232313156128, "reward_std": 0.13583439588546753, "rewards/verify_math_reward/mean": 0.6752232313156128, "rewards/verify_math_reward/std": 0.46855294704437256, "step": 803 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1328125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3054.0, "completions/mean_length": 1082.6473388671875, "completions/mean_terminated_length": 621.1428833007812, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 7.513119533527696, "grad_norm": 0.16292116045951843, "learning_rate": 1e-06, "loss": -0.087, "num_tokens": 477585510.0, "reward": 0.6551339626312256, "reward_std": 0.1931736022233963, "rewards/verify_math_reward/mean": 0.6551339030265808, "rewards/verify_math_reward/std": 0.4755900800228119, "step": 804 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1294642857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2621.0, "completions/mean_length": 1074.352783203125, "completions/mean_terminated_length": 624.9794921875, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 7.522448979591837, "grad_norm": 0.1713329255580902, "learning_rate": 1e-06, "loss": -0.0794, "num_tokens": 478158146.0, "reward": 0.6517857313156128, "reward_std": 0.1782582849264145, "rewards/verify_math_reward/mean": 0.6517857313156128, "rewards/verify_math_reward/std": 0.47667041420936584, "step": 805 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1305803571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 4011.0, "completions/mean_length": 1094.685302734375, "completions/mean_terminated_length": 643.91015625, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 7.531778425655976, "grad_norm": 0.1417725533246994, "learning_rate": 1e-06, "loss": -0.0742, "num_tokens": 478739040.0, "reward": 0.6540178656578064, "reward_std": 0.14011907577514648, "rewards/verify_math_reward/mean": 0.6540178656578064, "rewards/verify_math_reward/std": 0.475953072309494, "step": 806 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1272321428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3183.0, "completions/mean_length": 1092.0614013671875, "completions/mean_terminated_length": 654.1470336914062, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 7.541107871720117, "grad_norm": 0.15278641879558563, "learning_rate": 1e-06, "loss": -0.0658, "num_tokens": 479338223.0, "reward": 0.621651828289032, "reward_std": 0.12959763407707214, "rewards/verify_math_reward/mean": 0.6216517686843872, "rewards/verify_math_reward/std": 0.4852459728717804, "step": 807 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0837053571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3578.0, "completions/mean_length": 996.060302734375, "completions/mean_terminated_length": 712.8745727539062, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 7.550437317784256, "grad_norm": 0.11239632219076157, "learning_rate": 1e-06, "loss": -0.0411, "num_tokens": 480002885.0, "reward": 0.6171875, "reward_std": 0.13151581585407257, "rewards/verify_math_reward/mean": 0.6171875, "rewards/verify_math_reward/std": 0.4863446056842804, "step": 808 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1004464285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3557.0, "completions/mean_length": 1045.708740234375, "completions/mean_terminated_length": 705.10546875, "completions/min_length": 166.0, "completions/min_terminated_length": 166.0, "epoch": 7.559766763848397, "grad_norm": 0.1540139764547348, "learning_rate": 1e-06, "loss": -0.0256, "num_tokens": 480659024.0, "reward": 0.6238839626312256, "reward_std": 0.18370524048805237, "rewards/verify_math_reward/mean": 0.6238839030265808, "rewards/verify_math_reward/std": 0.4846802353858948, "step": 809 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1238839285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3606.0, "completions/mean_length": 1049.35498046875, "completions/mean_terminated_length": 618.555419921875, "completions/min_length": 192.0, "completions/min_terminated_length": 192.0, "epoch": 7.569096209912536, "grad_norm": 0.15035966038703918, "learning_rate": 1e-06, "loss": -0.0504, "num_tokens": 481246630.0, "reward": 0.6183035969734192, "reward_std": 0.14207187294960022, "rewards/verify_math_reward/mean": 0.6183035969734192, "rewards/verify_math_reward/std": 0.4860740303993225, "step": 810 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1104910714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2692.0, "completions/mean_length": 1057.97216796875, "completions/mean_terminated_length": 680.6010131835938, "completions/min_length": 184.0, "completions/min_terminated_length": 184.0, "epoch": 7.578425655976677, "grad_norm": 0.1580764353275299, "learning_rate": 1e-06, "loss": -0.0441, "num_tokens": 481874621.0, "reward": 0.640625, "reward_std": 0.16927708685398102, "rewards/verify_math_reward/mean": 0.640625, "rewards/verify_math_reward/std": 0.48008525371551514, "step": 811 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1071428571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 4018.0, "completions/mean_length": 1048.33935546875, "completions/mean_terminated_length": 682.6199951171875, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 7.587755102040816, "grad_norm": 0.15379290282726288, "learning_rate": 1e-06, "loss": -0.0333, "num_tokens": 482513693.0, "reward": 0.6261160969734192, "reward_std": 0.1696154773235321, "rewards/verify_math_reward/mean": 0.6261160969734192, "rewards/verify_math_reward/std": 0.48410359025001526, "step": 812 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1238839285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3848.0, "completions/mean_length": 1058.646240234375, "completions/mean_terminated_length": 629.1605224609375, "completions/min_length": 177.0, "completions/min_terminated_length": 177.0, "epoch": 7.597084548104956, "grad_norm": 0.13407360017299652, "learning_rate": 1e-06, "loss": -0.0602, "num_tokens": 483095040.0, "reward": 0.6863839626312256, "reward_std": 0.1445111334323883, "rewards/verify_math_reward/mean": 0.6863839030265808, "rewards/verify_math_reward/std": 0.46422144770622253, "step": 813 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0926339285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3925.0, "completions/mean_length": 896.4553833007812, "completions/mean_terminated_length": 569.810546875, "completions/min_length": 185.0, "completions/min_terminated_length": 185.0, "epoch": 7.606413994169096, "grad_norm": 0.2240392565727234, "learning_rate": 1e-06, "loss": -0.0296, "num_tokens": 483647472.0, "reward": 0.7444196939468384, "reward_std": 0.15161871910095215, "rewards/verify_math_reward/mean": 0.7444196343421936, "rewards/verify_math_reward/std": 0.43643057346343994, "step": 814 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1350446428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3023.0, "completions/mean_length": 1093.907470703125, "completions/mean_terminated_length": 625.1935424804688, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 7.615743440233236, "grad_norm": 0.1418483555316925, "learning_rate": 1e-06, "loss": -0.0578, "num_tokens": 484217533.0, "reward": 0.6383928656578064, "reward_std": 0.13403315842151642, "rewards/verify_math_reward/mean": 0.6383928656578064, "rewards/verify_math_reward/std": 0.4807341694831848, "step": 815 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1339285714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3518.0, "completions/mean_length": 1123.6507568359375, "completions/mean_terminated_length": 664.0089721679688, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 7.625072886297376, "grad_norm": 0.13304497301578522, "learning_rate": 1e-06, "loss": -0.0609, "num_tokens": 484818956.0, "reward": 0.6082589626312256, "reward_std": 0.16953739523887634, "rewards/verify_math_reward/mean": 0.6082589030265808, "rewards/verify_math_reward/std": 0.48841196298599243, "step": 816 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1183035714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2673.0, "completions/mean_length": 1026.4676513671875, "completions/mean_terminated_length": 614.6063232421875, "completions/min_length": 180.0, "completions/min_terminated_length": 180.0, "epoch": 7.634402332361516, "grad_norm": 0.13836099207401276, "learning_rate": 1e-06, "loss": -0.0356, "num_tokens": 485390127.0, "reward": 0.6417410969734192, "reward_std": 0.11907297372817993, "rewards/verify_math_reward/mean": 0.6417410969734192, "rewards/verify_math_reward/std": 0.47975656390190125, "step": 817 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.140625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3681.0, "completions/mean_length": 1113.1295166015625, "completions/mean_terminated_length": 625.0233764648438, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 7.643731778425656, "grad_norm": 0.14426197111606598, "learning_rate": 1e-06, "loss": -0.0532, "num_tokens": 485962811.0, "reward": 0.6160714626312256, "reward_std": 0.13737604022026062, "rewards/verify_math_reward/mean": 0.6160714030265808, "rewards/verify_math_reward/std": 0.486612468957901, "step": 818 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1183035714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3459.0, "completions/mean_length": 1091.7132568359375, "completions/mean_terminated_length": 688.6063232421875, "completions/min_length": 167.0, "completions/min_terminated_length": 167.0, "epoch": 7.653061224489796, "grad_norm": 0.1458161324262619, "learning_rate": 1e-06, "loss": -0.0759, "num_tokens": 486604378.0, "reward": 0.6495535969734192, "reward_std": 0.189790740609169, "rewards/verify_math_reward/mean": 0.6495535969734192, "rewards/verify_math_reward/std": 0.477376252412796, "step": 819 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0870535714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3846.0, "completions/mean_length": 922.2756958007812, "completions/mean_terminated_length": 619.646728515625, "completions/min_length": 164.0, "completions/min_terminated_length": 164.0, "epoch": 7.662390670553936, "grad_norm": 0.13000454008579254, "learning_rate": 1e-06, "loss": -0.0476, "num_tokens": 487193817.0, "reward": 0.7053571939468384, "reward_std": 0.1187373623251915, "rewards/verify_math_reward/mean": 0.7053571343421936, "rewards/verify_math_reward/std": 0.45613667368888855, "step": 820 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1060267857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4067.0, "completions/mean_length": 1062.982177734375, "completions/mean_terminated_length": 703.2609252929688, "completions/min_length": 156.0, "completions/min_terminated_length": 156.0, "epoch": 7.671720116618076, "grad_norm": 0.14187565445899963, "learning_rate": 1e-06, "loss": -0.0644, "num_tokens": 487836217.0, "reward": 0.637276828289032, "reward_std": 0.16116377711296082, "rewards/verify_math_reward/mean": 0.6372767686843872, "rewards/verify_math_reward/std": 0.481054425239563, "step": 821 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1205357142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3502.0, "completions/mean_length": 1070.126220703125, "completions/mean_terminated_length": 655.4124145507812, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 7.681049562682215, "grad_norm": 0.1762189120054245, "learning_rate": 1e-06, "loss": -0.0506, "num_tokens": 488448978.0, "reward": 0.598214328289032, "reward_std": 0.18814216554164886, "rewards/verify_math_reward/mean": 0.5982142686843872, "rewards/verify_math_reward/std": 0.49053287506103516, "step": 822 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1227678571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 2943.0, "completions/mean_length": 1025.888427734375, "completions/mean_terminated_length": 596.22900390625, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 7.690379008746356, "grad_norm": 0.1632724404335022, "learning_rate": 1e-06, "loss": -0.0484, "num_tokens": 489003726.0, "reward": 0.6517857313156128, "reward_std": 0.15521803498268127, "rewards/verify_math_reward/mean": 0.6517857313156128, "rewards/verify_math_reward/std": 0.47667041420936584, "step": 823 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.140625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3276.0, "completions/mean_length": 1104.294677734375, "completions/mean_terminated_length": 614.7428588867188, "completions/min_length": 187.0, "completions/min_terminated_length": 187.0, "epoch": 7.699708454810495, "grad_norm": 0.14527326822280884, "learning_rate": 1e-06, "loss": -0.0351, "num_tokens": 489564022.0, "reward": 0.6283482313156128, "reward_std": 0.12482258677482605, "rewards/verify_math_reward/mean": 0.6283482313156128, "rewards/verify_math_reward/std": 0.4835159480571747, "step": 824 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1026785714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 4034.0, "completions/mean_length": 1017.67529296875, "completions/mean_terminated_length": 665.4290771484375, "completions/min_length": 161.0, "completions/min_terminated_length": 161.0, "epoch": 7.709037900874636, "grad_norm": 0.14603237807750702, "learning_rate": 1e-06, "loss": -0.0497, "num_tokens": 490194075.0, "reward": 0.6473214626312256, "reward_std": 0.1614982634782791, "rewards/verify_math_reward/mean": 0.6473214030265808, "rewards/verify_math_reward/std": 0.47807058691978455, "step": 825 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3739.0, "completions/mean_length": 993.1105346679688, "completions/mean_terminated_length": 642.3490600585938, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 7.718367346938775, "grad_norm": 0.17904680967330933, "learning_rate": 1e-06, "loss": -0.0551, "num_tokens": 490805278.0, "reward": 0.6540178656578064, "reward_std": 0.14695174992084503, "rewards/verify_math_reward/mean": 0.6540178656578064, "rewards/verify_math_reward/std": 0.4759531021118164, "step": 826 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1171875, "completions/max_length": 4096.0, "completions/max_terminated_length": 3780.0, "completions/mean_length": 1037.20654296875, "completions/mean_terminated_length": 631.1719360351562, "completions/min_length": 159.0, "completions/min_terminated_length": 159.0, "epoch": 7.727696793002916, "grad_norm": 0.15991447865962982, "learning_rate": 1e-06, "loss": -0.0338, "num_tokens": 491400135.0, "reward": 0.6584821939468384, "reward_std": 0.17017750442028046, "rewards/verify_math_reward/mean": 0.6584821343421936, "rewards/verify_math_reward/std": 0.4744836091995239, "step": 827 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1294642857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3643.0, "completions/mean_length": 1124.3248291015625, "completions/mean_terminated_length": 682.3833618164062, "completions/min_length": 115.0, "completions/min_terminated_length": 115.0, "epoch": 7.737026239067055, "grad_norm": 0.12999098002910614, "learning_rate": 1e-06, "loss": -0.0559, "num_tokens": 492025138.0, "reward": 0.6015625, "reward_std": 0.11396666616201401, "rewards/verify_math_reward/mean": 0.6015625, "rewards/verify_math_reward/std": 0.48984986543655396, "step": 828 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1127232142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3304.0, "completions/mean_length": 972.638427734375, "completions/mean_terminated_length": 575.8339233398438, "completions/min_length": 84.0, "completions/min_terminated_length": 84.0, "epoch": 7.746355685131196, "grad_norm": 0.14956091344356537, "learning_rate": 1e-06, "loss": -0.0403, "num_tokens": 492573070.0, "reward": 0.7031250596046448, "reward_std": 0.13771232962608337, "rewards/verify_math_reward/mean": 0.703125, "rewards/verify_math_reward/std": 0.4571361541748047, "step": 829 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0982142857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2985.0, "completions/mean_length": 975.2902221679688, "completions/mean_terminated_length": 635.410888671875, "completions/min_length": 192.0, "completions/min_terminated_length": 192.0, "epoch": 7.755685131195335, "grad_norm": 0.15239976346492767, "learning_rate": 1e-06, "loss": -0.0317, "num_tokens": 493179298.0, "reward": 0.6417410969734192, "reward_std": 0.14263640344142914, "rewards/verify_math_reward/mean": 0.6417410969734192, "rewards/verify_math_reward/std": 0.47975659370422363, "step": 830 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1171875, "completions/max_length": 4096.0, "completions/max_terminated_length": 3021.0, "completions/mean_length": 1054.7545166015625, "completions/mean_terminated_length": 651.04931640625, "completions/min_length": 123.0, "completions/min_terminated_length": 123.0, "epoch": 7.765014577259475, "grad_norm": 0.1242513582110405, "learning_rate": 1e-06, "loss": -0.0688, "num_tokens": 493781014.0, "reward": 0.6729910969734192, "reward_std": 0.13557226955890656, "rewards/verify_math_reward/mean": 0.6729910969734192, "rewards/verify_math_reward/std": 0.46938255429267883, "step": 831 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 4096.0, "completions/max_terminated_length": 2772.0, "completions/mean_length": 1100.0592041015625, "completions/mean_terminated_length": 672.0675659179688, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 7.774344023323615, "grad_norm": 0.14618077874183655, "learning_rate": 1e-06, "loss": -0.0528, "num_tokens": 494389459.0, "reward": 0.5926339626312256, "reward_std": 0.13508763909339905, "rewards/verify_math_reward/mean": 0.5926339030265808, "rewards/verify_math_reward/std": 0.49161845445632935, "step": 832 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0970982142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3181.0, "completions/mean_length": 951.7355346679688, "completions/mean_terminated_length": 613.6007080078125, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 7.783673469387755, "grad_norm": 0.15234903991222382, "learning_rate": 1e-06, "loss": -0.0455, "num_tokens": 494974054.0, "reward": 0.6584821939468384, "reward_std": 0.15811581909656525, "rewards/verify_math_reward/mean": 0.6584821343421936, "rewards/verify_math_reward/std": 0.4744836091995239, "step": 833 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1116071428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3760.0, "completions/mean_length": 995.435302734375, "completions/mean_terminated_length": 605.9170532226562, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 7.793002915451895, "grad_norm": 0.13999764621257782, "learning_rate": 1e-06, "loss": -0.0525, "num_tokens": 495553460.0, "reward": 0.6819196939468384, "reward_std": 0.16266010701656342, "rewards/verify_math_reward/mean": 0.6819196343421936, "rewards/verify_math_reward/std": 0.46599099040031433, "step": 834 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1283482142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3498.0, "completions/mean_length": 1097.5067138671875, "completions/mean_terminated_length": 655.9871826171875, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 7.802332361516035, "grad_norm": 0.1538587510585785, "learning_rate": 1e-06, "loss": -0.0211, "num_tokens": 496158426.0, "reward": 0.6316964626312256, "reward_std": 0.13955636322498322, "rewards/verify_math_reward/mean": 0.6316964030265808, "rewards/verify_math_reward/std": 0.4826137125492096, "step": 835 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1171875, "completions/max_length": 4096.0, "completions/max_terminated_length": 3301.0, "completions/mean_length": 1032.2489013671875, "completions/mean_terminated_length": 625.5562744140625, "completions/min_length": 161.0, "completions/min_terminated_length": 161.0, "epoch": 7.811661807580175, "grad_norm": 0.14663149416446686, "learning_rate": 1e-06, "loss": -0.062, "num_tokens": 496753913.0, "reward": 0.6428571939468384, "reward_std": 0.1385025829076767, "rewards/verify_math_reward/mean": 0.6428571343421936, "rewards/verify_math_reward/std": 0.47942501306533813, "step": 836 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1149553571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3735.0, "completions/mean_length": 1041.985595703125, "completions/mean_terminated_length": 645.3102416992188, "completions/min_length": 179.0, "completions/min_terminated_length": 179.0, "epoch": 7.820991253644315, "grad_norm": 0.13347220420837402, "learning_rate": 1e-06, "loss": -0.065, "num_tokens": 497351548.0, "reward": 0.6930803656578064, "reward_std": 0.14684508740901947, "rewards/verify_math_reward/mean": 0.6930803656578064, "rewards/verify_math_reward/std": 0.46147334575653076, "step": 837 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1138392857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4092.0, "completions/mean_length": 1015.90185546875, "completions/mean_terminated_length": 620.2216186523438, "completions/min_length": 157.0, "completions/min_terminated_length": 157.0, "epoch": 7.830320699708455, "grad_norm": 0.14678119122982025, "learning_rate": 1e-06, "loss": -0.0327, "num_tokens": 497931148.0, "reward": 0.6618303656578064, "reward_std": 0.13516180217266083, "rewards/verify_math_reward/mean": 0.6618303656578064, "rewards/verify_math_reward/std": 0.4733508229255676, "step": 838 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1116071428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3982.0, "completions/mean_length": 986.7745971679688, "completions/mean_terminated_length": 596.1683349609375, "completions/min_length": 116.0, "completions/min_terminated_length": 116.0, "epoch": 7.839650145772595, "grad_norm": 0.16466213762760162, "learning_rate": 1e-06, "loss": -0.0646, "num_tokens": 498500978.0, "reward": 0.6495535969734192, "reward_std": 0.16848431527614594, "rewards/verify_math_reward/mean": 0.6495535969734192, "rewards/verify_math_reward/std": 0.477376252412796, "step": 839 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09375, "completions/max_length": 4096.0, "completions/max_terminated_length": 2018.0, "completions/mean_length": 944.2288208007812, "completions/mean_terminated_length": 618.1834716796875, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 7.848979591836734, "grad_norm": 0.13148349523544312, "learning_rate": 1e-06, "loss": -0.047, "num_tokens": 499087823.0, "reward": 0.6395089626312256, "reward_std": 0.14383850991725922, "rewards/verify_math_reward/mean": 0.6395089030265808, "rewards/verify_math_reward/std": 0.4804111421108246, "step": 840 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0814732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3220.0, "completions/mean_length": 858.7857666015625, "completions/mean_terminated_length": 571.6452026367188, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 7.858309037900875, "grad_norm": 0.14904271066188812, "learning_rate": 1e-06, "loss": -0.0629, "num_tokens": 499655687.0, "reward": 0.7053571939468384, "reward_std": 0.1356482356786728, "rewards/verify_math_reward/mean": 0.7053571343421936, "rewards/verify_math_reward/std": 0.45613664388656616, "step": 841 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0892857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2659.0, "completions/mean_length": 964.7120971679688, "completions/mean_terminated_length": 657.7230834960938, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 7.867638483965014, "grad_norm": 0.12897950410842896, "learning_rate": 1e-06, "loss": -0.0339, "num_tokens": 500276957.0, "reward": 0.6417410969734192, "reward_std": 0.15409217774868011, "rewards/verify_math_reward/mean": 0.6417410969734192, "rewards/verify_math_reward/std": 0.47975659370422363, "step": 842 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0993303571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3099.0, "completions/mean_length": 925.7645263671875, "completions/mean_terminated_length": 576.1350708007812, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 7.876967930029155, "grad_norm": 0.14820946753025055, "learning_rate": 1e-06, "loss": -0.0341, "num_tokens": 500831066.0, "reward": 0.6696428656578064, "reward_std": 0.146052747964859, "rewards/verify_math_reward/mean": 0.6696428656578064, "rewards/verify_math_reward/std": 0.47060438990592957, "step": 843 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0959821428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3196.0, "completions/mean_length": 905.3660888671875, "completions/mean_terminated_length": 566.607421875, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 7.886297376093294, "grad_norm": 0.14712035655975342, "learning_rate": 1e-06, "loss": -0.0541, "num_tokens": 501373170.0, "reward": 0.7098214626312256, "reward_std": 0.13688749074935913, "rewards/verify_math_reward/mean": 0.7098214030265808, "rewards/verify_math_reward/std": 0.454098105430603, "step": 844 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0792410714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2908.0, "completions/mean_length": 911.32373046875, "completions/mean_terminated_length": 637.2484741210938, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 7.895626822157435, "grad_norm": 0.1415589302778244, "learning_rate": 1e-06, "loss": -0.0431, "num_tokens": 501996252.0, "reward": 0.676339328289032, "reward_std": 0.1504133939743042, "rewards/verify_math_reward/mean": 0.6763392686843872, "rewards/verify_math_reward/std": 0.4681335985660553, "step": 845 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1238839285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3235.0, "completions/mean_length": 1041.485595703125, "completions/mean_terminated_length": 609.5732421875, "completions/min_length": 178.0, "completions/min_terminated_length": 178.0, "epoch": 7.904956268221574, "grad_norm": 0.1677154153585434, "learning_rate": 1e-06, "loss": -0.0498, "num_tokens": 502571431.0, "reward": 0.6473214626312256, "reward_std": 0.16217337548732758, "rewards/verify_math_reward/mean": 0.6473214030265808, "rewards/verify_math_reward/std": 0.47807058691978455, "step": 846 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0982142857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3534.0, "completions/mean_length": 983.2176513671875, "completions/mean_terminated_length": 644.2017211914062, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 7.914285714285715, "grad_norm": 0.14427697658538818, "learning_rate": 1e-06, "loss": -0.0399, "num_tokens": 503192778.0, "reward": 0.6908482313156128, "reward_std": 0.15485143661499023, "rewards/verify_math_reward/mean": 0.6908482313156128, "rewards/verify_math_reward/std": 0.46240198612213135, "step": 847 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1205357142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3536.0, "completions/mean_length": 1031.899658203125, "completions/mean_terminated_length": 611.9467163085938, "completions/min_length": 169.0, "completions/min_terminated_length": 169.0, "epoch": 7.923615160349854, "grad_norm": 0.15892471373081207, "learning_rate": 1e-06, "loss": -0.0866, "num_tokens": 503766520.0, "reward": 0.637276828289032, "reward_std": 0.1653289794921875, "rewards/verify_math_reward/mean": 0.6372767686843872, "rewards/verify_math_reward/std": 0.481054425239563, "step": 848 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1004464285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3217.0, "completions/mean_length": 968.3147583007812, "completions/mean_terminated_length": 619.0694580078125, "completions/min_length": 176.0, "completions/min_terminated_length": 176.0, "epoch": 7.932944606413994, "grad_norm": 0.15579116344451904, "learning_rate": 1e-06, "loss": -0.0589, "num_tokens": 504356722.0, "reward": 0.6551339626312256, "reward_std": 0.14910070598125458, "rewards/verify_math_reward/mean": 0.6551339030265808, "rewards/verify_math_reward/std": 0.4755900800228119, "step": 849 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1439732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3780.0, "completions/mean_length": 1141.2857666015625, "completions/mean_terminated_length": 644.3389892578125, "completions/min_length": 173.0, "completions/min_terminated_length": 173.0, "epoch": 7.942274052478134, "grad_norm": 0.19498465955257416, "learning_rate": 1e-06, "loss": -0.0861, "num_tokens": 504945706.0, "reward": 0.625, "reward_std": 0.18036307394504547, "rewards/verify_math_reward/mean": 0.625, "rewards/verify_math_reward/std": 0.48439329862594604, "step": 850 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1071428571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3677.0, "completions/mean_length": 994.5078735351562, "completions/mean_terminated_length": 622.3287353515625, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 7.9516034985422746, "grad_norm": 0.15794239938259125, "learning_rate": 1e-06, "loss": -0.0805, "num_tokens": 505529721.0, "reward": 0.6540178656578064, "reward_std": 0.16292154788970947, "rewards/verify_math_reward/mean": 0.6540178656578064, "rewards/verify_math_reward/std": 0.4759531021118164, "step": 851 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1350446428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3753.0, "completions/mean_length": 1146.546875, "completions/mean_terminated_length": 686.0516357421875, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 7.960932944606414, "grad_norm": 0.14056451618671417, "learning_rate": 1e-06, "loss": -0.059, "num_tokens": 506150227.0, "reward": 0.6194196939468384, "reward_std": 0.16273680329322815, "rewards/verify_math_reward/mean": 0.6194196343421936, "rewards/verify_math_reward/std": 0.48580074310302734, "step": 852 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0848214285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2712.0, "completions/mean_length": 935.69873046875, "completions/mean_terminated_length": 642.7926635742188, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 7.970262390670554, "grad_norm": 0.15269824862480164, "learning_rate": 1e-06, "loss": -0.0557, "num_tokens": 506758189.0, "reward": 0.7165178656578064, "reward_std": 0.1716417372226715, "rewards/verify_math_reward/mean": 0.7165178656578064, "rewards/verify_math_reward/std": 0.4509401023387909, "step": 853 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1216517857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3916.0, "completions/mean_length": 1038.5748291015625, "completions/mean_terminated_length": 615.119384765625, "completions/min_length": 96.0, "completions/min_terminated_length": 96.0, "epoch": 7.979591836734694, "grad_norm": 0.1502271592617035, "learning_rate": 1e-06, "loss": -0.0767, "num_tokens": 507327736.0, "reward": 0.684151828289032, "reward_std": 0.14451251924037933, "rewards/verify_math_reward/mean": 0.6841517686843872, "rewards/verify_math_reward/std": 0.4651124179363251, "step": 854 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1238839285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3193.0, "completions/mean_length": 1061.78466796875, "completions/mean_terminated_length": 632.74267578125, "completions/min_length": 117.0, "completions/min_terminated_length": 117.0, "epoch": 7.988921282798834, "grad_norm": 0.1559712439775467, "learning_rate": 1e-06, "loss": -0.0677, "num_tokens": 507925551.0, "reward": 0.6395089626312256, "reward_std": 0.1562378853559494, "rewards/verify_math_reward/mean": 0.6395089030265808, "rewards/verify_math_reward/std": 0.4804111421108246, "step": 855 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.11931818181818177, "completions/max_length": 4096.0, "completions/max_terminated_length": 3109.0, "completions/mean_length": 1164.28125, "completions/mean_terminated_length": 767.0806274414062, "completions/min_length": 165.0, "completions/min_terminated_length": 165.0, "epoch": 7.998250728862974, "grad_norm": 0.1355619877576828, "learning_rate": 1e-06, "loss": -0.0392, "num_tokens": 508519647.0, "reward": 0.6629464626312256, "reward_std": 0.1394055187702179, "rewards/verify_math_reward/mean": 0.6629464030265808, "rewards/verify_math_reward/std": 0.47296738624572754, "step": 856 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1261160714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3079.0, "completions/mean_length": 1112.3404541015625, "completions/mean_terminated_length": 681.7484130859375, "completions/min_length": 86.0, "completions/min_terminated_length": 86.0, "epoch": 8.00932944606414, "grad_norm": 0.1278090476989746, "learning_rate": 1e-06, "loss": -0.0624, "num_tokens": 509147872.0, "reward": 0.6328125, "reward_std": 0.14214785397052765, "rewards/verify_math_reward/mean": 0.6328125, "rewards/verify_math_reward/std": 0.48230743408203125, "step": 857 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0993303571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3949.0, "completions/mean_length": 972.8370971679688, "completions/mean_terminated_length": 628.3990478515625, "completions/min_length": 212.0, "completions/min_terminated_length": 212.0, "epoch": 8.018658892128279, "grad_norm": 0.14049650728702545, "learning_rate": 1e-06, "loss": -0.0661, "num_tokens": 509750846.0, "reward": 0.6941964626312256, "reward_std": 0.14921018481254578, "rewards/verify_math_reward/mean": 0.6941964030265808, "rewards/verify_math_reward/std": 0.4610042870044708, "step": 858 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1350446428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 4050.0, "completions/mean_length": 1124.654052734375, "completions/mean_terminated_length": 660.7406616210938, "completions/min_length": 170.0, "completions/min_terminated_length": 170.0, "epoch": 8.02798833819242, "grad_norm": 0.1408451795578003, "learning_rate": 1e-06, "loss": -0.0535, "num_tokens": 510347512.0, "reward": 0.6305803656578064, "reward_std": 0.14150846004486084, "rewards/verify_math_reward/mean": 0.6305803656578064, "rewards/verify_math_reward/std": 0.4829172194004059, "step": 859 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0982142857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3762.0, "completions/mean_length": 1014.8984985351562, "completions/mean_terminated_length": 679.3328857421875, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 8.03731778425656, "grad_norm": 0.14122828841209412, "learning_rate": 1e-06, "loss": -0.0615, "num_tokens": 510976981.0, "reward": 0.6618303656578064, "reward_std": 0.15724678337574005, "rewards/verify_math_reward/mean": 0.6618303656578064, "rewards/verify_math_reward/std": 0.4733508229255676, "step": 860 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3877.0, "completions/mean_length": 976.6160888671875, "completions/mean_terminated_length": 623.9900512695312, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 8.0466472303207, "grad_norm": 0.1603347212076187, "learning_rate": 1e-06, "loss": -0.0626, "num_tokens": 511564341.0, "reward": 0.6729910969734192, "reward_std": 0.14984887838363647, "rewards/verify_math_reward/mean": 0.6729910969734192, "rewards/verify_math_reward/std": 0.46938255429267883, "step": 861 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3578.0, "completions/mean_length": 1054.18310546875, "completions/mean_terminated_length": 619.6377563476562, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 8.055976676384839, "grad_norm": 0.1483280211687088, "learning_rate": 1e-06, "loss": -0.0766, "num_tokens": 512131569.0, "reward": 0.6785714626312256, "reward_std": 0.15158231556415558, "rewards/verify_math_reward/mean": 0.6785714030265808, "rewards/verify_math_reward/std": 0.46728572249412537, "step": 862 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1104910714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3436.0, "completions/mean_length": 1040.1484375, "completions/mean_terminated_length": 660.5633544921875, "completions/min_length": 173.0, "completions/min_terminated_length": 173.0, "epoch": 8.06530612244898, "grad_norm": 0.13730108737945557, "learning_rate": 1e-06, "loss": -0.0416, "num_tokens": 512762374.0, "reward": 0.6484375, "reward_std": 0.14564156532287598, "rewards/verify_math_reward/mean": 0.6484375, "rewards/verify_math_reward/std": 0.4777248501777649, "step": 863 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0970982142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2843.0, "completions/mean_length": 981.15185546875, "completions/mean_terminated_length": 646.1804809570312, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 8.07463556851312, "grad_norm": 0.1306883692741394, "learning_rate": 1e-06, "loss": -0.0273, "num_tokens": 513371022.0, "reward": 0.6863839626312256, "reward_std": 0.12989820539951324, "rewards/verify_math_reward/mean": 0.6863839030265808, "rewards/verify_math_reward/std": 0.46422144770622253, "step": 864 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1361607142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3030.0, "completions/mean_length": 1080.7410888671875, "completions/mean_terminated_length": 605.4677124023438, "completions/min_length": 157.0, "completions/min_terminated_length": 157.0, "epoch": 8.08396501457726, "grad_norm": 0.1641305387020111, "learning_rate": 1e-06, "loss": -0.0549, "num_tokens": 513937078.0, "reward": 0.6227678656578064, "reward_std": 0.16386516392230988, "rewards/verify_math_reward/mean": 0.6227678656578064, "rewards/verify_math_reward/std": 0.4849644899368286, "step": 865 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0904017857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4046.0, "completions/mean_length": 916.4085083007812, "completions/mean_terminated_length": 600.4000244140625, "completions/min_length": 194.0, "completions/min_terminated_length": 194.0, "epoch": 8.093294460641399, "grad_norm": 0.16136740148067474, "learning_rate": 1e-06, "loss": -0.0102, "num_tokens": 514512044.0, "reward": 0.65625, "reward_std": 0.15721426904201508, "rewards/verify_math_reward/mean": 0.65625, "rewards/verify_math_reward/std": 0.4752241373062134, "step": 866 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1383928571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3194.0, "completions/mean_length": 1137.96875, "completions/mean_terminated_length": 662.8445434570312, "completions/min_length": 162.0, "completions/min_terminated_length": 162.0, "epoch": 8.102623906705539, "grad_norm": 0.1663280725479126, "learning_rate": 1e-06, "loss": -0.0662, "num_tokens": 515104464.0, "reward": 0.6261160969734192, "reward_std": 0.17161037027835846, "rewards/verify_math_reward/mean": 0.6261160969734192, "rewards/verify_math_reward/std": 0.48410362005233765, "step": 867 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1183035714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3849.0, "completions/mean_length": 1049.0, "completions/mean_terminated_length": 640.1620483398438, "completions/min_length": 162.0, "completions/min_terminated_length": 162.0, "epoch": 8.11195335276968, "grad_norm": 0.1567470133304596, "learning_rate": 1e-06, "loss": -0.0548, "num_tokens": 515707992.0, "reward": 0.6752232313156128, "reward_std": 0.17156578600406647, "rewards/verify_math_reward/mean": 0.6752232313156128, "rewards/verify_math_reward/std": 0.46855294704437256, "step": 868 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0915178571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3279.0, "completions/mean_length": 955.7210083007812, "completions/mean_terminated_length": 639.3783569335938, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 8.12128279883382, "grad_norm": 0.13543248176574707, "learning_rate": 1e-06, "loss": -0.0483, "num_tokens": 516310606.0, "reward": 0.6696428656578064, "reward_std": 0.16514535248279572, "rewards/verify_math_reward/mean": 0.6696428656578064, "rewards/verify_math_reward/std": 0.47060438990592957, "step": 869 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1082589285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3671.0, "completions/mean_length": 1006.14404296875, "completions/mean_terminated_length": 631.030029296875, "completions/min_length": 118.0, "completions/min_terminated_length": 118.0, "epoch": 8.130612244897959, "grad_norm": 0.14321519434452057, "learning_rate": 1e-06, "loss": -0.0334, "num_tokens": 516900183.0, "reward": 0.6026785969734192, "reward_std": 0.15312324464321136, "rewards/verify_math_reward/mean": 0.6026785969734192, "rewards/verify_math_reward/std": 0.48961687088012695, "step": 870 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1104910714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 4047.0, "completions/mean_length": 1002.904052734375, "completions/mean_terminated_length": 618.6925659179688, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 8.139941690962099, "grad_norm": 0.140425443649292, "learning_rate": 1e-06, "loss": -0.0255, "num_tokens": 517488593.0, "reward": 0.6584821939468384, "reward_std": 0.12050722539424896, "rewards/verify_math_reward/mean": 0.6584821343421936, "rewards/verify_math_reward/std": 0.4744836091995239, "step": 871 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0814732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3419.0, "completions/mean_length": 902.1406860351562, "completions/mean_terminated_length": 618.845703125, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 8.14927113702624, "grad_norm": 0.135789155960083, "learning_rate": 1e-06, "loss": -0.0351, "num_tokens": 518094303.0, "reward": 0.6819196939468384, "reward_std": 0.13771162927150726, "rewards/verify_math_reward/mean": 0.6819196343421936, "rewards/verify_math_reward/std": 0.46599099040031433, "step": 872 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0982142857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3545.0, "completions/mean_length": 953.4230346679688, "completions/mean_terminated_length": 611.162109375, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 8.15860058309038, "grad_norm": 0.1614873856306076, "learning_rate": 1e-06, "loss": -0.0537, "num_tokens": 518680834.0, "reward": 0.6975446939468384, "reward_std": 0.17351828515529633, "rewards/verify_math_reward/mean": 0.6975446343421936, "rewards/verify_math_reward/std": 0.45957788825035095, "step": 873 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1294642857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3688.0, "completions/mean_length": 1118.2332763671875, "completions/mean_terminated_length": 675.3859252929688, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 8.167930029154519, "grad_norm": 0.16200335323810577, "learning_rate": 1e-06, "loss": -0.1006, "num_tokens": 519300195.0, "reward": 0.6361607313156128, "reward_std": 0.18185940384864807, "rewards/verify_math_reward/mean": 0.6361607313156128, "rewards/verify_math_reward/std": 0.4813718795776367, "step": 874 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1082589285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 4001.0, "completions/mean_length": 974.4152221679688, "completions/mean_terminated_length": 595.4493408203125, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 8.177259475218658, "grad_norm": 0.1391468197107315, "learning_rate": 1e-06, "loss": -0.0429, "num_tokens": 519856455.0, "reward": 0.6830357313156128, "reward_std": 0.13981597125530243, "rewards/verify_math_reward/mean": 0.6830357313156128, "rewards/verify_math_reward/std": 0.46555325388908386, "step": 875 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1104910714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3661.0, "completions/mean_length": 950.3973388671875, "completions/mean_terminated_length": 559.6637573242188, "completions/min_length": 133.0, "completions/min_terminated_length": 133.0, "epoch": 8.186588921282798, "grad_norm": 0.12101931124925613, "learning_rate": 1e-06, "loss": -0.0314, "num_tokens": 520395035.0, "reward": 0.7031250596046448, "reward_std": 0.09866905957460403, "rewards/verify_math_reward/mean": 0.703125, "rewards/verify_math_reward/std": 0.4571361541748047, "step": 876 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1026785714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3659.0, "completions/mean_length": 977.36279296875, "completions/mean_terminated_length": 620.5037231445312, "completions/min_length": 133.0, "completions/min_terminated_length": 133.0, "epoch": 8.19591836734694, "grad_norm": 0.13639773428440094, "learning_rate": 1e-06, "loss": -0.0493, "num_tokens": 520993640.0, "reward": 0.6629464626312256, "reward_std": 0.1573241800069809, "rewards/verify_math_reward/mean": 0.6629464030265808, "rewards/verify_math_reward/std": 0.47296738624572754, "step": 877 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1551339285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3126.0, "completions/mean_length": 1146.048095703125, "completions/mean_terminated_length": 604.379150390625, "completions/min_length": 115.0, "completions/min_terminated_length": 115.0, "epoch": 8.205247813411079, "grad_norm": 0.1504117250442505, "learning_rate": 1e-06, "loss": -0.0784, "num_tokens": 521544475.0, "reward": 0.6395089626312256, "reward_std": 0.14917626976966858, "rewards/verify_math_reward/mean": 0.6395089030265808, "rewards/verify_math_reward/std": 0.4804111421108246, "step": 878 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0959821428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3926.0, "completions/mean_length": 952.1172485351562, "completions/mean_terminated_length": 618.322265625, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 8.214577259475218, "grad_norm": 0.14028151333332062, "learning_rate": 1e-06, "loss": -0.0182, "num_tokens": 522137692.0, "reward": 0.676339328289032, "reward_std": 0.12715697288513184, "rewards/verify_math_reward/mean": 0.6763392686843872, "rewards/verify_math_reward/std": 0.4681335985660553, "step": 879 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1216517857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3499.0, "completions/mean_length": 1094.4888916015625, "completions/mean_terminated_length": 678.777587890625, "completions/min_length": 169.0, "completions/min_terminated_length": 169.0, "epoch": 8.223906705539358, "grad_norm": 0.14301654696464539, "learning_rate": 1e-06, "loss": -0.0583, "num_tokens": 522761042.0, "reward": 0.6037946939468384, "reward_std": 0.16991718113422394, "rewards/verify_math_reward/mean": 0.6037946343421936, "rewards/verify_math_reward/std": 0.48938119411468506, "step": 880 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1205357142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3660.0, "completions/mean_length": 1016.747802734375, "completions/mean_terminated_length": 594.71826171875, "completions/min_length": 103.0, "completions/min_terminated_length": 103.0, "epoch": 8.2332361516035, "grad_norm": 0.14127644896507263, "learning_rate": 1e-06, "loss": -0.0644, "num_tokens": 523325104.0, "reward": 0.6462053656578064, "reward_std": 0.16901704668998718, "rewards/verify_math_reward/mean": 0.6462053656578064, "rewards/verify_math_reward/std": 0.478413462638855, "step": 881 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1339285714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3782.0, "completions/mean_length": 1095.1585693359375, "completions/mean_terminated_length": 631.1107788085938, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 8.242565597667639, "grad_norm": 0.15452386438846588, "learning_rate": 1e-06, "loss": -0.0646, "num_tokens": 523918358.0, "reward": 0.6205357313156128, "reward_std": 0.15357083082199097, "rewards/verify_math_reward/mean": 0.6205357313156128, "rewards/verify_math_reward/std": 0.4855247139930725, "step": 882 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1138392857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2608.0, "completions/mean_length": 1002.552490234375, "completions/mean_terminated_length": 605.1574096679688, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 8.251895043731778, "grad_norm": 0.14762280881404877, "learning_rate": 1e-06, "loss": -0.0523, "num_tokens": 524488565.0, "reward": 0.6383928656578064, "reward_std": 0.14203837513923645, "rewards/verify_math_reward/mean": 0.6383928656578064, "rewards/verify_math_reward/std": 0.4807341992855072, "step": 883 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1026785714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3903.0, "completions/mean_length": 982.7678833007812, "completions/mean_terminated_length": 626.52734375, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 8.261224489795918, "grad_norm": 0.1296025514602661, "learning_rate": 1e-06, "loss": -0.0472, "num_tokens": 525075093.0, "reward": 0.6618303656578064, "reward_std": 0.11712156236171722, "rewards/verify_math_reward/mean": 0.6618303656578064, "rewards/verify_math_reward/std": 0.4733508229255676, "step": 884 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3271.0, "completions/mean_length": 1014.65966796875, "completions/mean_terminated_length": 574.4680786132812, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 8.270553935860057, "grad_norm": 0.1451510339975357, "learning_rate": 1e-06, "loss": -0.0576, "num_tokens": 525622916.0, "reward": 0.6696428656578064, "reward_std": 0.1388377547264099, "rewards/verify_math_reward/mean": 0.6696428656578064, "rewards/verify_math_reward/std": 0.47060438990592957, "step": 885 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1216517857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2806.0, "completions/mean_length": 1030.2757568359375, "completions/mean_terminated_length": 605.6708984375, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 8.279883381924199, "grad_norm": 0.16212977468967438, "learning_rate": 1e-06, "loss": -0.0441, "num_tokens": 526190363.0, "reward": 0.6383928656578064, "reward_std": 0.14740821719169617, "rewards/verify_math_reward/mean": 0.6383928656578064, "rewards/verify_math_reward/std": 0.4807341992855072, "step": 886 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1049107142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2863.0, "completions/mean_length": 972.911865234375, "completions/mean_terminated_length": 606.8641357421875, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 8.289212827988338, "grad_norm": 0.15837009251117706, "learning_rate": 1e-06, "loss": -0.0716, "num_tokens": 526761604.0, "reward": 0.6819196939468384, "reward_std": 0.17513476312160492, "rewards/verify_math_reward/mean": 0.6819196343421936, "rewards/verify_math_reward/std": 0.46599099040031433, "step": 887 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0904017857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3496.0, "completions/mean_length": 937.294677734375, "completions/mean_terminated_length": 623.3619384765625, "completions/min_length": 165.0, "completions/min_terminated_length": 165.0, "epoch": 8.298542274052478, "grad_norm": 0.13528120517730713, "learning_rate": 1e-06, "loss": -0.0371, "num_tokens": 527359644.0, "reward": 0.6595982313156128, "reward_std": 0.13079974055290222, "rewards/verify_math_reward/mean": 0.6595982313156128, "rewards/verify_math_reward/std": 0.4741089344024658, "step": 888 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2403.0, "completions/mean_length": 941.3281860351562, "completions/mean_terminated_length": 584.7130126953125, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 8.307871720116617, "grad_norm": 0.1483733206987381, "learning_rate": 1e-06, "loss": -0.0509, "num_tokens": 527916826.0, "reward": 0.7254464626312256, "reward_std": 0.15056565403938293, "rewards/verify_math_reward/mean": 0.7254464030265808, "rewards/verify_math_reward/std": 0.4465382993221283, "step": 889 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1171875, "completions/max_length": 4096.0, "completions/max_terminated_length": 2832.0, "completions/mean_length": 1013.2500610351562, "completions/mean_terminated_length": 604.035400390625, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 8.317201166180759, "grad_norm": 0.1543199121952057, "learning_rate": 1e-06, "loss": -0.036, "num_tokens": 528488210.0, "reward": 0.6707589626312256, "reward_std": 0.14376434683799744, "rewards/verify_math_reward/mean": 0.6707589030265808, "rewards/verify_math_reward/std": 0.4702001214027405, "step": 890 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1238839285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3425.0, "completions/mean_length": 1051.4676513671875, "completions/mean_terminated_length": 620.9668579101562, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 8.326530612244898, "grad_norm": 0.14981353282928467, "learning_rate": 1e-06, "loss": -0.0488, "num_tokens": 529071613.0, "reward": 0.640625, "reward_std": 0.1513124257326126, "rewards/verify_math_reward/mean": 0.640625, "rewards/verify_math_reward/std": 0.48008525371551514, "step": 891 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1104910714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3392.0, "completions/mean_length": 965.3270263671875, "completions/mean_terminated_length": 576.4479370117188, "completions/min_length": 161.0, "completions/min_terminated_length": 161.0, "epoch": 8.335860058309038, "grad_norm": 0.14016583561897278, "learning_rate": 1e-06, "loss": -0.0434, "num_tokens": 529619026.0, "reward": 0.621651828289032, "reward_std": 0.13049665093421936, "rewards/verify_math_reward/mean": 0.6216517686843872, "rewards/verify_math_reward/std": 0.485245943069458, "step": 892 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0915178571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 2651.0, "completions/mean_length": 917.7388916015625, "completions/mean_terminated_length": 597.5700073242188, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 8.345189504373177, "grad_norm": 0.15088114142417908, "learning_rate": 1e-06, "loss": -0.0448, "num_tokens": 530186584.0, "reward": 0.6640625, "reward_std": 0.16758601367473602, "rewards/verify_math_reward/mean": 0.6640625, "rewards/verify_math_reward/std": 0.4725809693336487, "step": 893 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1205357142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3594.0, "completions/mean_length": 989.333740234375, "completions/mean_terminated_length": 563.5469360351562, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 8.354518950437317, "grad_norm": 0.16835367679595947, "learning_rate": 1e-06, "loss": -0.0282, "num_tokens": 530724155.0, "reward": 0.65625, "reward_std": 0.14203977584838867, "rewards/verify_math_reward/mean": 0.65625, "rewards/verify_math_reward/std": 0.4752241373062134, "step": 894 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1216517857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3645.0, "completions/mean_length": 1058.7076416015625, "completions/mean_terminated_length": 638.0406494140625, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 8.363848396501458, "grad_norm": 0.14951688051223755, "learning_rate": 1e-06, "loss": -0.0692, "num_tokens": 531309765.0, "reward": 0.6171875, "reward_std": 0.16728290915489197, "rewards/verify_math_reward/mean": 0.6171875, "rewards/verify_math_reward/std": 0.4863446056842804, "step": 895 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1238839285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2943.0, "completions/mean_length": 1047.4296875, "completions/mean_terminated_length": 616.3579711914062, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 8.373177842565598, "grad_norm": 0.1584387719631195, "learning_rate": 1e-06, "loss": -0.0682, "num_tokens": 531876990.0, "reward": 0.6707589626312256, "reward_std": 0.15289753675460815, "rewards/verify_math_reward/mean": 0.6707589030265808, "rewards/verify_math_reward/std": 0.4702001214027405, "step": 896 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0982142857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2528.0, "completions/mean_length": 884.5670166015625, "completions/mean_terminated_length": 534.8069458007812, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 8.382507288629737, "grad_norm": 0.15818488597869873, "learning_rate": 1e-06, "loss": -0.05, "num_tokens": 532409834.0, "reward": 0.676339328289032, "reward_std": 0.15349414944648743, "rewards/verify_math_reward/mean": 0.6763392686843872, "rewards/verify_math_reward/std": 0.4681335985660553, "step": 897 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1049107142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3117.0, "completions/mean_length": 955.9386596679688, "completions/mean_terminated_length": 587.9014892578125, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 8.391836734693877, "grad_norm": 0.16838958859443665, "learning_rate": 1e-06, "loss": -0.0675, "num_tokens": 532958347.0, "reward": 0.6964285969734192, "reward_std": 0.1521807461977005, "rewards/verify_math_reward/mean": 0.6964285969734192, "rewards/verify_math_reward/std": 0.4600565433502197, "step": 898 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0892857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3049.0, "completions/mean_length": 889.87060546875, "completions/mean_terminated_length": 575.5441284179688, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 8.401166180758018, "grad_norm": 0.1707538217306137, "learning_rate": 1e-06, "loss": -0.0181, "num_tokens": 533520095.0, "reward": 0.7008928656578064, "reward_std": 0.16067594289779663, "rewards/verify_math_reward/mean": 0.7008928656578064, "rewards/verify_math_reward/std": 0.458122581243515, "step": 899 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0959821428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3994.0, "completions/mean_length": 896.6998291015625, "completions/mean_terminated_length": 557.02099609375, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 8.410495626822158, "grad_norm": 0.16008999943733215, "learning_rate": 1e-06, "loss": -0.0336, "num_tokens": 534051682.0, "reward": 0.723214328289032, "reward_std": 0.16134603321552277, "rewards/verify_math_reward/mean": 0.7232142686843872, "rewards/verify_math_reward/std": 0.44765952229499817, "step": 900 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1149553571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3946.0, "completions/mean_length": 1003.747802734375, "completions/mean_terminated_length": 602.10595703125, "completions/min_length": 112.0, "completions/min_terminated_length": 112.0, "epoch": 8.419825072886297, "grad_norm": 0.1635473221540451, "learning_rate": 1e-06, "loss": -0.067, "num_tokens": 534612272.0, "reward": 0.6941964626312256, "reward_std": 0.17156507074832916, "rewards/verify_math_reward/mean": 0.6941964030265808, "rewards/verify_math_reward/std": 0.4610042870044708, "step": 901 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0703125, "completions/max_length": 4096.0, "completions/max_terminated_length": 2206.0, "completions/mean_length": 797.075927734375, "completions/mean_terminated_length": 547.577392578125, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 8.429154518950437, "grad_norm": 0.13711099326610565, "learning_rate": 1e-06, "loss": -0.0672, "num_tokens": 535149532.0, "reward": 0.738839328289032, "reward_std": 0.14083515107631683, "rewards/verify_math_reward/mean": 0.7388392686843872, "rewards/verify_math_reward/std": 0.439512699842453, "step": 902 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0904017857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2247.0, "completions/mean_length": 882.3504638671875, "completions/mean_terminated_length": 562.95703125, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 8.438483965014576, "grad_norm": 0.12448661029338837, "learning_rate": 1e-06, "loss": -0.0397, "num_tokens": 535695774.0, "reward": 0.6964285969734192, "reward_std": 0.11032027006149292, "rewards/verify_math_reward/mean": 0.6964285969734192, "rewards/verify_math_reward/std": 0.4600565731525421, "step": 903 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0970982142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3531.0, "completions/mean_length": 979.6038208007812, "completions/mean_terminated_length": 644.4660034179688, "completions/min_length": 171.0, "completions/min_terminated_length": 171.0, "epoch": 8.447813411078718, "grad_norm": 0.12877033650875092, "learning_rate": 1e-06, "loss": -0.0409, "num_tokens": 536313947.0, "reward": 0.6729910969734192, "reward_std": 0.11580956727266312, "rewards/verify_math_reward/mean": 0.6729910969734192, "rewards/verify_math_reward/std": 0.46938255429267883, "step": 904 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.109375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3182.0, "completions/mean_length": 972.34716796875, "completions/mean_terminated_length": 588.7406005859375, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 8.457142857142857, "grad_norm": 0.14431960880756378, "learning_rate": 1e-06, "loss": -0.0484, "num_tokens": 536879626.0, "reward": 0.6551339626312256, "reward_std": 0.15484780073165894, "rewards/verify_math_reward/mean": 0.6551339030265808, "rewards/verify_math_reward/std": 0.4755900800228119, "step": 905 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1026785714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 4038.0, "completions/mean_length": 951.5614013671875, "completions/mean_terminated_length": 591.75, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 8.466472303206997, "grad_norm": 0.14249777793884277, "learning_rate": 1e-06, "loss": -0.03, "num_tokens": 537441313.0, "reward": 0.7165178656578064, "reward_std": 0.11535493284463882, "rewards/verify_math_reward/mean": 0.7165178656578064, "rewards/verify_math_reward/std": 0.4509401023387909, "step": 906 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1238839285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2735.0, "completions/mean_length": 1031.3248291015625, "completions/mean_terminated_length": 597.975830078125, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 8.475801749271136, "grad_norm": 0.14390555024147034, "learning_rate": 1e-06, "loss": -0.0502, "num_tokens": 537996172.0, "reward": 0.6462053656578064, "reward_std": 0.14083333313465118, "rewards/verify_math_reward/mean": 0.6462053656578064, "rewards/verify_math_reward/std": 0.478413462638855, "step": 907 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1328125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3831.0, "completions/mean_length": 1109.84375, "completions/mean_terminated_length": 652.5045166015625, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 8.485131195335278, "grad_norm": 0.1546357423067093, "learning_rate": 1e-06, "loss": -0.0697, "num_tokens": 538586400.0, "reward": 0.6495535969734192, "reward_std": 0.15049009025096893, "rewards/verify_math_reward/mean": 0.6495535969734192, "rewards/verify_math_reward/std": 0.477376252412796, "step": 908 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0714285714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3880.0, "completions/mean_length": 847.724365234375, "completions/mean_terminated_length": 597.8569946289062, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 8.494460641399417, "grad_norm": 0.15690870583057404, "learning_rate": 1e-06, "loss": -0.036, "num_tokens": 539169873.0, "reward": 0.6941964626312256, "reward_std": 0.15018586814403534, "rewards/verify_math_reward/mean": 0.6941964030265808, "rewards/verify_math_reward/std": 0.4610042870044708, "step": 909 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3696.0, "completions/mean_length": 960.2567138671875, "completions/mean_terminated_length": 605.7813720703125, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 8.503790087463557, "grad_norm": 0.151783287525177, "learning_rate": 1e-06, "loss": -0.0513, "num_tokens": 539757591.0, "reward": 0.6517857313156128, "reward_std": 0.15311436355113983, "rewards/verify_math_reward/mean": 0.6517857313156128, "rewards/verify_math_reward/std": 0.47667041420936584, "step": 910 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.056919642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3717.0, "completions/mean_length": 747.2422485351562, "completions/mean_terminated_length": 545.1278076171875, "completions/min_length": 133.0, "completions/min_terminated_length": 133.0, "epoch": 8.513119533527696, "grad_norm": 0.15444251894950867, "learning_rate": 1e-06, "loss": -0.0229, "num_tokens": 540307080.0, "reward": 0.7109375596046448, "reward_std": 0.12384940683841705, "rewards/verify_math_reward/mean": 0.7109375, "rewards/verify_math_reward/std": 0.45358020067214966, "step": 911 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1316964285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3878.0, "completions/mean_length": 1106.2132568359375, "completions/mean_terminated_length": 652.7493286132812, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 8.522448979591836, "grad_norm": 0.1587335467338562, "learning_rate": 1e-06, "loss": -0.073, "num_tokens": 540909119.0, "reward": 0.5524553656578064, "reward_std": 0.15424413979053497, "rewards/verify_math_reward/mean": 0.5524553656578064, "rewards/verify_math_reward/std": 0.49751853942871094, "step": 912 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1104910714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3861.0, "completions/mean_length": 995.3114013671875, "completions/mean_terminated_length": 610.1568603515625, "completions/min_length": 170.0, "completions/min_terminated_length": 170.0, "epoch": 8.531778425655977, "grad_norm": 0.14729638397693634, "learning_rate": 1e-06, "loss": -0.0567, "num_tokens": 541480702.0, "reward": 0.6484375, "reward_std": 0.1466161608695984, "rewards/verify_math_reward/mean": 0.6484375, "rewards/verify_math_reward/std": 0.4777248501777649, "step": 913 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1305803571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 2520.0, "completions/mean_length": 1001.39404296875, "completions/mean_terminated_length": 536.607177734375, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 8.541107871720117, "grad_norm": 0.15659281611442566, "learning_rate": 1e-06, "loss": -0.0842, "num_tokens": 541992983.0, "reward": 0.6796875596046448, "reward_std": 0.13008618354797363, "rewards/verify_math_reward/mean": 0.6796875, "rewards/verify_math_reward/std": 0.4668572247028351, "step": 914 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0714285714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2784.0, "completions/mean_length": 837.9933471679688, "completions/mean_terminated_length": 587.37744140625, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 8.550437317784256, "grad_norm": 0.14385898411273956, "learning_rate": 1e-06, "loss": -0.0462, "num_tokens": 542570257.0, "reward": 0.7131696939468384, "reward_std": 0.14823377132415771, "rewards/verify_math_reward/mean": 0.7131696343421936, "rewards/verify_math_reward/std": 0.4525342881679535, "step": 915 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1004464285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3764.0, "completions/mean_length": 1045.6820068359375, "completions/mean_terminated_length": 705.07568359375, "completions/min_length": 175.0, "completions/min_terminated_length": 175.0, "epoch": 8.559766763848396, "grad_norm": 0.12489776313304901, "learning_rate": 1e-06, "loss": -0.0436, "num_tokens": 543237564.0, "reward": 0.582589328289032, "reward_std": 0.12140624225139618, "rewards/verify_math_reward/mean": 0.5825892686843872, "rewards/verify_math_reward/std": 0.4934072494506836, "step": 916 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1305803571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3977.0, "completions/mean_length": 1061.5279541015625, "completions/mean_terminated_length": 605.7728271484375, "completions/min_length": 176.0, "completions/min_terminated_length": 176.0, "epoch": 8.569096209912537, "grad_norm": 0.15697191655635834, "learning_rate": 1e-06, "loss": -0.0465, "num_tokens": 543797869.0, "reward": 0.6339285969734192, "reward_std": 0.15473198890686035, "rewards/verify_math_reward/mean": 0.6339285969734192, "rewards/verify_math_reward/std": 0.48199835419654846, "step": 917 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3649.0, "completions/mean_length": 945.2600708007812, "completions/mean_terminated_length": 589.0894165039062, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 8.578425655976677, "grad_norm": 0.14893698692321777, "learning_rate": 1e-06, "loss": -0.0396, "num_tokens": 544366062.0, "reward": 0.645089328289032, "reward_std": 0.15331122279167175, "rewards/verify_math_reward/mean": 0.6450892686843872, "rewards/verify_math_reward/std": 0.4787535071372986, "step": 918 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0714285714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3401.0, "completions/mean_length": 813.224365234375, "completions/mean_terminated_length": 560.703125, "completions/min_length": 153.0, "completions/min_terminated_length": 153.0, "epoch": 8.587755102040816, "grad_norm": 0.16182899475097656, "learning_rate": 1e-06, "loss": -0.0379, "num_tokens": 544928087.0, "reward": 0.6964285969734192, "reward_std": 0.16401740908622742, "rewards/verify_math_reward/mean": 0.6964285969734192, "rewards/verify_math_reward/std": 0.4600566029548645, "step": 919 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0948660714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3169.0, "completions/mean_length": 995.1763916015625, "completions/mean_terminated_length": 670.1824951171875, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 8.597084548104956, "grad_norm": 0.14866846799850464, "learning_rate": 1e-06, "loss": -0.0434, "num_tokens": 545554333.0, "reward": 0.606026828289032, "reward_std": 0.1726934313774109, "rewards/verify_math_reward/mean": 0.6060267686843872, "rewards/verify_math_reward/std": 0.48890191316604614, "step": 920 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1071428571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 2508.0, "completions/mean_length": 943.3449096679688, "completions/mean_terminated_length": 565.0262451171875, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 8.606413994169095, "grad_norm": 0.1682433933019638, "learning_rate": 1e-06, "loss": -0.0718, "num_tokens": 546089034.0, "reward": 0.6941964626312256, "reward_std": 0.16533967852592468, "rewards/verify_math_reward/mean": 0.6941964030265808, "rewards/verify_math_reward/std": 0.4610042870044708, "step": 921 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0870535714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2486.0, "completions/mean_length": 928.1741333007812, "completions/mean_terminated_length": 626.1076049804688, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 8.615743440233237, "grad_norm": 0.15417876839637756, "learning_rate": 1e-06, "loss": -0.0661, "num_tokens": 546685302.0, "reward": 0.6383928656578064, "reward_std": 0.1768607497215271, "rewards/verify_math_reward/mean": 0.6383928656578064, "rewards/verify_math_reward/std": 0.4807341694831848, "step": 922 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1294642857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2817.0, "completions/mean_length": 1046.7723388671875, "completions/mean_terminated_length": 593.2974243164062, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 8.625072886297376, "grad_norm": 0.15682968497276306, "learning_rate": 1e-06, "loss": -0.0738, "num_tokens": 547244258.0, "reward": 0.6718750596046448, "reward_std": 0.17427602410316467, "rewards/verify_math_reward/mean": 0.671875, "rewards/verify_math_reward/std": 0.46979284286499023, "step": 923 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0982142857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3816.0, "completions/mean_length": 911.6105346679688, "completions/mean_terminated_length": 564.7957763671875, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 8.634402332361516, "grad_norm": 0.1625637263059616, "learning_rate": 1e-06, "loss": -0.0684, "num_tokens": 547792933.0, "reward": 0.6629464626312256, "reward_std": 0.1560874581336975, "rewards/verify_math_reward/mean": 0.6629464030265808, "rewards/verify_math_reward/std": 0.47296738624572754, "step": 924 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1127232142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3288.0, "completions/mean_length": 1000.6172485351562, "completions/mean_terminated_length": 607.3673095703125, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 8.643731778425655, "grad_norm": 0.16285686194896698, "learning_rate": 1e-06, "loss": -0.0424, "num_tokens": 548367766.0, "reward": 0.6462053656578064, "reward_std": 0.1533093899488449, "rewards/verify_math_reward/mean": 0.6462053656578064, "rewards/verify_math_reward/std": 0.478413462638855, "step": 925 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1082589285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3667.0, "completions/mean_length": 969.62841796875, "completions/mean_terminated_length": 590.0813598632812, "completions/min_length": 113.0, "completions/min_terminated_length": 113.0, "epoch": 8.653061224489797, "grad_norm": 0.1668129414319992, "learning_rate": 1e-06, "loss": -0.0534, "num_tokens": 548927121.0, "reward": 0.676339328289032, "reward_std": 0.17145879566669464, "rewards/verify_math_reward/mean": 0.6763392686843872, "rewards/verify_math_reward/std": 0.4681335985660553, "step": 926 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0915178571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 2468.0, "completions/mean_length": 864.677490234375, "completions/mean_terminated_length": 539.1633911132812, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 8.662390670553936, "grad_norm": 0.15278904139995575, "learning_rate": 1e-06, "loss": -0.0245, "num_tokens": 549457216.0, "reward": 0.6629464626312256, "reward_std": 0.123512402176857, "rewards/verify_math_reward/mean": 0.6629464030265808, "rewards/verify_math_reward/std": 0.47296738624572754, "step": 927 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1328125, "completions/max_length": 4096.0, "completions/max_terminated_length": 2918.0, "completions/mean_length": 1068.4364013671875, "completions/mean_terminated_length": 604.7554931640625, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 8.671720116618076, "grad_norm": 0.14968419075012207, "learning_rate": 1e-06, "loss": -0.0584, "num_tokens": 550016215.0, "reward": 0.6171875, "reward_std": 0.15240898728370667, "rewards/verify_math_reward/mean": 0.6171875, "rewards/verify_math_reward/std": 0.4863446056842804, "step": 928 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1116071428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3248.0, "completions/mean_length": 992.3895263671875, "completions/mean_terminated_length": 602.4887084960938, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 8.681049562682215, "grad_norm": 0.14219050109386444, "learning_rate": 1e-06, "loss": -0.0423, "num_tokens": 550584492.0, "reward": 0.6707589626312256, "reward_std": 0.14894986152648926, "rewards/verify_math_reward/mean": 0.6707589030265808, "rewards/verify_math_reward/std": 0.4702001214027405, "step": 929 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1227678571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 4003.0, "completions/mean_length": 1016.3058471679688, "completions/mean_terminated_length": 585.3053588867188, "completions/min_length": 161.0, "completions/min_terminated_length": 161.0, "epoch": 8.690379008746355, "grad_norm": 0.146462082862854, "learning_rate": 1e-06, "loss": -0.105, "num_tokens": 551130854.0, "reward": 0.691964328289032, "reward_std": 0.1449248045682907, "rewards/verify_math_reward/mean": 0.6919642686843872, "rewards/verify_math_reward/std": 0.4619392454624176, "step": 930 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1328125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3965.0, "completions/mean_length": 1116.10498046875, "completions/mean_terminated_length": 659.724609375, "completions/min_length": 170.0, "completions/min_terminated_length": 170.0, "epoch": 8.699708454810496, "grad_norm": 0.16851374506950378, "learning_rate": 1e-06, "loss": -0.0629, "num_tokens": 551736900.0, "reward": 0.6428571939468384, "reward_std": 0.18144892156124115, "rewards/verify_math_reward/mean": 0.6428571343421936, "rewards/verify_math_reward/std": 0.4794250428676605, "step": 931 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1138392857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3786.0, "completions/mean_length": 1011.4308471679688, "completions/mean_terminated_length": 615.17626953125, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 8.709037900874636, "grad_norm": 0.14060620963573456, "learning_rate": 1e-06, "loss": -0.0586, "num_tokens": 552318958.0, "reward": 0.6183035969734192, "reward_std": 0.12877096235752106, "rewards/verify_math_reward/mean": 0.6183035969734192, "rewards/verify_math_reward/std": 0.4860740303993225, "step": 932 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1160714285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3944.0, "completions/mean_length": 1045.56591796875, "completions/mean_terminated_length": 645.0037841796875, "completions/min_length": 153.0, "completions/min_terminated_length": 153.0, "epoch": 8.718367346938775, "grad_norm": 0.14821182191371918, "learning_rate": 1e-06, "loss": -0.0495, "num_tokens": 552918489.0, "reward": 0.6618303656578064, "reward_std": 0.14797163009643555, "rewards/verify_math_reward/mean": 0.6618303656578064, "rewards/verify_math_reward/std": 0.4733508229255676, "step": 933 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1339285714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3940.0, "completions/mean_length": 1086.122802734375, "completions/mean_terminated_length": 620.6777954101562, "completions/min_length": 180.0, "completions/min_terminated_length": 180.0, "epoch": 8.727696793002915, "grad_norm": 0.15176919102668762, "learning_rate": 1e-06, "loss": -0.0384, "num_tokens": 553486303.0, "reward": 0.59375, "reward_std": 0.1425604224205017, "rewards/verify_math_reward/mean": 0.59375, "rewards/verify_math_reward/std": 0.4914066195487976, "step": 934 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1227678571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3582.0, "completions/mean_length": 999.4006958007812, "completions/mean_terminated_length": 566.0343627929688, "completions/min_length": 99.0, "completions/min_terminated_length": 99.0, "epoch": 8.737026239067056, "grad_norm": 0.1681135594844818, "learning_rate": 1e-06, "loss": -0.0458, "num_tokens": 554010670.0, "reward": 0.676339328289032, "reward_std": 0.138991117477417, "rewards/verify_math_reward/mean": 0.6763392686843872, "rewards/verify_math_reward/std": 0.4681335985660553, "step": 935 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0982142857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2998.0, "completions/mean_length": 901.6607666015625, "completions/mean_terminated_length": 553.7623901367188, "completions/min_length": 164.0, "completions/min_terminated_length": 164.0, "epoch": 8.746355685131196, "grad_norm": 0.17645412683486938, "learning_rate": 1e-06, "loss": -0.0575, "num_tokens": 554546222.0, "reward": 0.660714328289032, "reward_std": 0.1832929402589798, "rewards/verify_math_reward/mean": 0.6607142686843872, "rewards/verify_math_reward/std": 0.4737313687801361, "step": 936 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0904017857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3279.0, "completions/mean_length": 902.8873291015625, "completions/mean_terminated_length": 585.5349731445312, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 8.755685131195335, "grad_norm": 0.15916696190834045, "learning_rate": 1e-06, "loss": -0.0621, "num_tokens": 555116649.0, "reward": 0.7031250596046448, "reward_std": 0.14684300124645233, "rewards/verify_math_reward/mean": 0.703125, "rewards/verify_math_reward/std": 0.4571361541748047, "step": 937 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1294642857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3678.0, "completions/mean_length": 1073.328125, "completions/mean_terminated_length": 623.8026123046875, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 8.765014577259475, "grad_norm": 0.16375704109668732, "learning_rate": 1e-06, "loss": -0.0714, "num_tokens": 555689887.0, "reward": 0.6618303656578064, "reward_std": 0.17212960124015808, "rewards/verify_math_reward/mean": 0.6618303656578064, "rewards/verify_math_reward/std": 0.4733508229255676, "step": 938 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0714285714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3306.0, "completions/mean_length": 778.216552734375, "completions/mean_terminated_length": 523.00244140625, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 8.774344023323614, "grad_norm": 0.14863629639148712, "learning_rate": 1e-06, "loss": -0.046, "num_tokens": 556209001.0, "reward": 0.7366071939468384, "reward_std": 0.12143944203853607, "rewards/verify_math_reward/mean": 0.7366071343421936, "rewards/verify_math_reward/std": 0.44071969389915466, "step": 939 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1138392857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3990.0, "completions/mean_length": 1018.8873291015625, "completions/mean_terminated_length": 623.5906372070312, "completions/min_length": 153.0, "completions/min_terminated_length": 153.0, "epoch": 8.783673469387756, "grad_norm": 0.13999132812023163, "learning_rate": 1e-06, "loss": -0.0294, "num_tokens": 556797812.0, "reward": 0.652901828289032, "reward_std": 0.1420711725950241, "rewards/verify_math_reward/mean": 0.6529017686843872, "rewards/verify_math_reward/std": 0.47631320357322693, "step": 940 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0892857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2837.0, "completions/mean_length": 868.8917846679688, "completions/mean_terminated_length": 552.5086059570312, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 8.793002915451895, "grad_norm": 0.13161882758140564, "learning_rate": 1e-06, "loss": -0.0465, "num_tokens": 557327723.0, "reward": 0.7131696939468384, "reward_std": 0.12677791714668274, "rewards/verify_math_reward/mean": 0.7131696343421936, "rewards/verify_math_reward/std": 0.4525342881679535, "step": 941 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1238839285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3773.0, "completions/mean_length": 1007.5547485351562, "completions/mean_terminated_length": 570.8446044921875, "completions/min_length": 165.0, "completions/min_terminated_length": 165.0, "epoch": 8.802332361516035, "grad_norm": 0.13785584270954132, "learning_rate": 1e-06, "loss": -0.0311, "num_tokens": 557860932.0, "reward": 0.65625, "reward_std": 0.12317357957363129, "rewards/verify_math_reward/mean": 0.65625, "rewards/verify_math_reward/std": 0.4752241373062134, "step": 942 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0982142857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2927.0, "completions/mean_length": 923.950927734375, "completions/mean_terminated_length": 578.4801635742188, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 8.811661807580174, "grad_norm": 0.14556001126766205, "learning_rate": 1e-06, "loss": -0.0637, "num_tokens": 558418480.0, "reward": 0.6930803656578064, "reward_std": 0.1663813591003418, "rewards/verify_math_reward/mean": 0.6930803656578064, "rewards/verify_math_reward/std": 0.46147334575653076, "step": 943 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0948660714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 4031.0, "completions/mean_length": 965.3951416015625, "completions/mean_terminated_length": 637.2799072265625, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 8.820991253644316, "grad_norm": 0.13868778944015503, "learning_rate": 1e-06, "loss": -0.0572, "num_tokens": 559020906.0, "reward": 0.6729910969734192, "reward_std": 0.12438002973794937, "rewards/verify_math_reward/mean": 0.6729910969734192, "rewards/verify_math_reward/std": 0.46938255429267883, "step": 944 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1194196428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 4022.0, "completions/mean_length": 1029.735595703125, "completions/mean_terminated_length": 613.9049682617188, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 8.830320699708455, "grad_norm": 0.15609444677829742, "learning_rate": 1e-06, "loss": -0.0811, "num_tokens": 559589149.0, "reward": 0.6339285969734192, "reward_std": 0.16314797103405, "rewards/verify_math_reward/mean": 0.6339285969734192, "rewards/verify_math_reward/std": 0.48199835419654846, "step": 945 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1049107142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3523.0, "completions/mean_length": 939.0480346679688, "completions/mean_terminated_length": 569.0311889648438, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 8.839650145772595, "grad_norm": 0.15562525391578674, "learning_rate": 1e-06, "loss": -0.0571, "num_tokens": 560129256.0, "reward": 0.6819196939468384, "reward_std": 0.14846017956733704, "rewards/verify_math_reward/mean": 0.6819196343421936, "rewards/verify_math_reward/std": 0.46599099040031433, "step": 946 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.109375, "completions/max_length": 4096.0, "completions/max_terminated_length": 2939.0, "completions/mean_length": 971.0000610351562, "completions/mean_terminated_length": 587.2280883789062, "completions/min_length": 173.0, "completions/min_terminated_length": 173.0, "epoch": 8.848979591836734, "grad_norm": 0.1636345386505127, "learning_rate": 1e-06, "loss": -0.0768, "num_tokens": 560680384.0, "reward": 0.6439732313156128, "reward_std": 0.1599937528371811, "rewards/verify_math_reward/mean": 0.6439732313156128, "rewards/verify_math_reward/std": 0.47909072041511536, "step": 947 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0926339285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3564.0, "completions/mean_length": 981.5234985351562, "completions/mean_terminated_length": 663.5633544921875, "completions/min_length": 111.0, "completions/min_terminated_length": 111.0, "epoch": 8.858309037900874, "grad_norm": 0.11569786816835403, "learning_rate": 1e-06, "loss": -0.0453, "num_tokens": 561309981.0, "reward": 0.6662946939468384, "reward_std": 0.13121412694454193, "rewards/verify_math_reward/mean": 0.6662946343421936, "rewards/verify_math_reward/std": 0.47179922461509705, "step": 948 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1171875, "completions/max_length": 4096.0, "completions/max_terminated_length": 3260.0, "completions/mean_length": 998.7254638671875, "completions/mean_terminated_length": 587.5828247070312, "completions/min_length": 119.0, "completions/min_terminated_length": 119.0, "epoch": 8.867638483965015, "grad_norm": 0.14304448664188385, "learning_rate": 1e-06, "loss": -0.0717, "num_tokens": 561861527.0, "reward": 0.676339328289032, "reward_std": 0.14444763958454132, "rewards/verify_math_reward/mean": 0.6763392686843872, "rewards/verify_math_reward/std": 0.4681335687637329, "step": 949 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1439732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2845.0, "completions/mean_length": 1089.8560791015625, "completions/mean_terminated_length": 584.2594604492188, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 8.876967930029155, "grad_norm": 0.1965818852186203, "learning_rate": 1e-06, "loss": -0.0607, "num_tokens": 562399710.0, "reward": 0.6540178656578064, "reward_std": 0.14687760174274445, "rewards/verify_math_reward/mean": 0.6540178656578064, "rewards/verify_math_reward/std": 0.4759531021118164, "step": 950 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1316964285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3841.0, "completions/mean_length": 1047.482177734375, "completions/mean_terminated_length": 585.1105346679688, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 8.886297376093294, "grad_norm": 0.15234307944774628, "learning_rate": 1e-06, "loss": -0.0609, "num_tokens": 562948966.0, "reward": 0.6328125, "reward_std": 0.13083434104919434, "rewards/verify_math_reward/mean": 0.6328125, "rewards/verify_math_reward/std": 0.48230743408203125, "step": 951 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1104910714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3128.0, "completions/mean_length": 988.5558471679688, "completions/mean_terminated_length": 602.5620727539062, "completions/min_length": 172.0, "completions/min_terminated_length": 172.0, "epoch": 8.895626822157434, "grad_norm": 0.13719192147254944, "learning_rate": 1e-06, "loss": -0.0582, "num_tokens": 563519256.0, "reward": 0.6417410969734192, "reward_std": 0.1342613846063614, "rewards/verify_math_reward/mean": 0.6417410969734192, "rewards/verify_math_reward/std": 0.47975656390190125, "step": 952 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1082589285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2888.0, "completions/mean_length": 960.536865234375, "completions/mean_terminated_length": 579.8861083984375, "completions/min_length": 157.0, "completions/min_terminated_length": 157.0, "epoch": 8.904956268221575, "grad_norm": 0.20323546230793, "learning_rate": 1e-06, "loss": -0.0396, "num_tokens": 564073761.0, "reward": 0.645089328289032, "reward_std": 0.19314108788967133, "rewards/verify_math_reward/mean": 0.6450892686843872, "rewards/verify_math_reward/std": 0.4787535071372986, "step": 953 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0904017857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2772.0, "completions/mean_length": 900.1730346679688, "completions/mean_terminated_length": 582.5509033203125, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 8.914285714285715, "grad_norm": 0.17294132709503174, "learning_rate": 1e-06, "loss": -0.0444, "num_tokens": 564639772.0, "reward": 0.6975446939468384, "reward_std": 0.17066673934459686, "rewards/verify_math_reward/mean": 0.6975446343421936, "rewards/verify_math_reward/std": 0.45957788825035095, "step": 954 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1026785714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 4063.0, "completions/mean_length": 971.8906860351562, "completions/mean_terminated_length": 614.4054565429688, "completions/min_length": 111.0, "completions/min_terminated_length": 111.0, "epoch": 8.923615160349854, "grad_norm": 0.14424920082092285, "learning_rate": 1e-06, "loss": -0.0544, "num_tokens": 565217474.0, "reward": 0.7109375596046448, "reward_std": 0.13940481841564178, "rewards/verify_math_reward/mean": 0.7109375, "rewards/verify_math_reward/std": 0.45358020067214966, "step": 955 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1227678571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 4051.0, "completions/mean_length": 1035.2757568359375, "completions/mean_terminated_length": 606.9300537109375, "completions/min_length": 165.0, "completions/min_terminated_length": 165.0, "epoch": 8.932944606413994, "grad_norm": 0.1515893191099167, "learning_rate": 1e-06, "loss": -0.0562, "num_tokens": 565784753.0, "reward": 0.6439732313156128, "reward_std": 0.14887316524982452, "rewards/verify_math_reward/mean": 0.6439732313156128, "rewards/verify_math_reward/std": 0.47909072041511536, "step": 956 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.15625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3463.0, "completions/mean_length": 1158.1328125, "completions/mean_terminated_length": 614.0833129882812, "completions/min_length": 185.0, "completions/min_terminated_length": 185.0, "epoch": 8.942274052478133, "grad_norm": 0.16238532960414886, "learning_rate": 1e-06, "loss": -0.0726, "num_tokens": 566340680.0, "reward": 0.5959821939468384, "reward_std": 0.13380561769008636, "rewards/verify_math_reward/mean": 0.5959821343421936, "rewards/verify_math_reward/std": 0.490975022315979, "step": 957 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1160714285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3308.0, "completions/mean_length": 1034.575927734375, "completions/mean_terminated_length": 632.5706787109375, "completions/min_length": 116.0, "completions/min_terminated_length": 116.0, "epoch": 8.951603498542275, "grad_norm": 0.16529758274555206, "learning_rate": 1e-06, "loss": -0.0535, "num_tokens": 566936924.0, "reward": 0.6383928656578064, "reward_std": 0.1720893532037735, "rewards/verify_math_reward/mean": 0.6383928656578064, "rewards/verify_math_reward/std": 0.4807341992855072, "step": 958 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3321.0, "completions/mean_length": 1027.1082763671875, "completions/mean_terminated_length": 588.6951293945312, "completions/min_length": 163.0, "completions/min_terminated_length": 163.0, "epoch": 8.960932944606414, "grad_norm": 0.14592179656028748, "learning_rate": 1e-06, "loss": -0.0565, "num_tokens": 567489669.0, "reward": 0.6964285969734192, "reward_std": 0.13425210118293762, "rewards/verify_math_reward/mean": 0.6964285969734192, "rewards/verify_math_reward/std": 0.4600565731525421, "step": 959 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3533.0, "completions/mean_length": 1098.482177734375, "completions/mean_terminated_length": 670.2653198242188, "completions/min_length": 160.0, "completions/min_terminated_length": 160.0, "epoch": 8.970262390670554, "grad_norm": 0.14832952618598938, "learning_rate": 1e-06, "loss": -0.0602, "num_tokens": 568111877.0, "reward": 0.676339328289032, "reward_std": 0.16183707118034363, "rewards/verify_math_reward/mean": 0.6763392686843872, "rewards/verify_math_reward/std": 0.4681335985660553, "step": 960 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0904017857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2840.0, "completions/mean_length": 917.3873291015625, "completions/mean_terminated_length": 601.47607421875, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 8.979591836734693, "grad_norm": 0.14388814568519592, "learning_rate": 1e-06, "loss": -0.0588, "num_tokens": 568690640.0, "reward": 0.707589328289032, "reward_std": 0.1379055380821228, "rewards/verify_math_reward/mean": 0.7075892686843872, "rewards/verify_math_reward/std": 0.45512402057647705, "step": 961 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1328125, "completions/max_length": 4096.0, "completions/max_terminated_length": 2771.0, "completions/mean_length": 1085.079345703125, "completions/mean_terminated_length": 623.947265625, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 8.988921282798835, "grad_norm": 0.15324236452579498, "learning_rate": 1e-06, "loss": -0.0331, "num_tokens": 569272719.0, "reward": 0.6339285969734192, "reward_std": 0.15709525346755981, "rewards/verify_math_reward/mean": 0.6339285969734192, "rewards/verify_math_reward/std": 0.48199835419654846, "step": 962 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.10795454545454541, "completions/max_length": 4096.0, "completions/max_terminated_length": 3484.0, "completions/mean_length": 1034.82958984375, "completions/mean_terminated_length": 664.3694458007812, "completions/min_length": 179.0, "completions/min_terminated_length": 179.0, "epoch": 8.998250728862974, "grad_norm": 0.14960594475269318, "learning_rate": 1e-06, "loss": -0.0857, "num_tokens": 569898578.0, "reward": 0.6127232313156128, "reward_std": 0.15454721450805664, "rewards/verify_math_reward/mean": 0.6127232313156128, "rewards/verify_math_reward/std": 0.4873998463153839, "step": 963 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1138392857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3632.0, "completions/mean_length": 1084.6842041015625, "completions/mean_terminated_length": 697.8400268554688, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 9.00932944606414, "grad_norm": 0.13798940181732178, "learning_rate": 1e-06, "loss": -0.0505, "num_tokens": 570544871.0, "reward": 0.6640625, "reward_std": 0.1573990434408188, "rewards/verify_math_reward/mean": 0.6640625, "rewards/verify_math_reward/std": 0.4725809693336487, "step": 964 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1272321428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3912.0, "completions/mean_length": 1014.8527221679688, "completions/mean_terminated_length": 565.682861328125, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 9.018658892128279, "grad_norm": 0.1554582715034485, "learning_rate": 1e-06, "loss": -0.0397, "num_tokens": 571073219.0, "reward": 0.6897321939468384, "reward_std": 0.11239181458950043, "rewards/verify_math_reward/mean": 0.6897321343421936, "rewards/verify_math_reward/std": 0.4628615975379944, "step": 965 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0904017857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3055.0, "completions/mean_length": 910.2723388671875, "completions/mean_terminated_length": 593.6539916992188, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 9.02798833819242, "grad_norm": 0.16948619484901428, "learning_rate": 1e-06, "loss": -0.0649, "num_tokens": 571653719.0, "reward": 0.6819196939468384, "reward_std": 0.15529540181159973, "rewards/verify_math_reward/mean": 0.6819196343421936, "rewards/verify_math_reward/std": 0.46599099040031433, "step": 966 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1339285714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3826.0, "completions/mean_length": 1083.469970703125, "completions/mean_terminated_length": 617.6146850585938, "completions/min_length": 153.0, "completions/min_terminated_length": 153.0, "epoch": 9.03731778425656, "grad_norm": 0.15437005460262299, "learning_rate": 1e-06, "loss": -0.058, "num_tokens": 572223388.0, "reward": 0.6428571939468384, "reward_std": 0.15785479545593262, "rewards/verify_math_reward/mean": 0.6428571343421936, "rewards/verify_math_reward/std": 0.47942501306533813, "step": 967 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1328125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3944.0, "completions/mean_length": 1079.0848388671875, "completions/mean_terminated_length": 617.0347290039062, "completions/min_length": 174.0, "completions/min_terminated_length": 174.0, "epoch": 9.0466472303207, "grad_norm": 0.16558527946472168, "learning_rate": 1e-06, "loss": -0.0653, "num_tokens": 572808672.0, "reward": 0.6127232313156128, "reward_std": 0.16784563660621643, "rewards/verify_math_reward/mean": 0.6127232313156128, "rewards/verify_math_reward/std": 0.4873998463153839, "step": 968 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1071428571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3676.0, "completions/mean_length": 991.9810791015625, "completions/mean_terminated_length": 619.4987182617188, "completions/min_length": 161.0, "completions/min_terminated_length": 161.0, "epoch": 9.055976676384839, "grad_norm": 0.1664285659790039, "learning_rate": 1e-06, "loss": -0.0499, "num_tokens": 573388911.0, "reward": 0.707589328289032, "reward_std": 0.16308125853538513, "rewards/verify_math_reward/mean": 0.7075892686843872, "rewards/verify_math_reward/std": 0.45512402057647705, "step": 969 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1004464285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2918.0, "completions/mean_length": 912.30810546875, "completions/mean_terminated_length": 556.8088989257812, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 9.06530612244898, "grad_norm": 0.1441843956708908, "learning_rate": 1e-06, "loss": -0.0377, "num_tokens": 573922227.0, "reward": 0.7053571939468384, "reward_std": 0.13760104775428772, "rewards/verify_math_reward/mean": 0.7053571343421936, "rewards/verify_math_reward/std": 0.45613667368888855, "step": 970 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1417410714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3922.0, "completions/mean_length": 1120.3148193359375, "completions/mean_terminated_length": 628.8816528320312, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 9.07463556851312, "grad_norm": 0.1594502031803131, "learning_rate": 1e-06, "loss": -0.0444, "num_tokens": 574498245.0, "reward": 0.6339285969734192, "reward_std": 0.1505335569381714, "rewards/verify_math_reward/mean": 0.6339285969734192, "rewards/verify_math_reward/std": 0.48199835419654846, "step": 971 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0915178571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3337.0, "completions/mean_length": 918.4766235351562, "completions/mean_terminated_length": 598.382080078125, "completions/min_length": 161.0, "completions/min_terminated_length": 161.0, "epoch": 9.08396501457726, "grad_norm": 0.13108834624290466, "learning_rate": 1e-06, "loss": -0.0321, "num_tokens": 575068248.0, "reward": 0.7131696939468384, "reward_std": 0.1353137493133545, "rewards/verify_math_reward/mean": 0.7131696343421936, "rewards/verify_math_reward/std": 0.4525342881679535, "step": 972 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0926339285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2631.0, "completions/mean_length": 938.8582763671875, "completions/mean_terminated_length": 616.5424194335938, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 9.093294460641399, "grad_norm": 0.1505901962518692, "learning_rate": 1e-06, "loss": -0.0466, "num_tokens": 575657521.0, "reward": 0.6930803656578064, "reward_std": 0.15691189467906952, "rewards/verify_math_reward/mean": 0.6930803656578064, "rewards/verify_math_reward/std": 0.46147334575653076, "step": 973 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1305803571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 2541.0, "completions/mean_length": 1112.43310546875, "completions/mean_terminated_length": 664.323486328125, "completions/min_length": 156.0, "completions/min_terminated_length": 156.0, "epoch": 9.102623906705539, "grad_norm": 0.14574332535266876, "learning_rate": 1e-06, "loss": -0.0251, "num_tokens": 576255141.0, "reward": 0.6383928656578064, "reward_std": 0.1285124570131302, "rewards/verify_math_reward/mean": 0.6383928656578064, "rewards/verify_math_reward/std": 0.4807341694831848, "step": 974 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1328125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3185.0, "completions/mean_length": 1083.8695068359375, "completions/mean_terminated_length": 622.5521240234375, "completions/min_length": 153.0, "completions/min_terminated_length": 153.0, "epoch": 9.11195335276968, "grad_norm": 0.1633671373128891, "learning_rate": 1e-06, "loss": -0.0821, "num_tokens": 576844376.0, "reward": 0.6227678656578064, "reward_std": 0.15785479545593262, "rewards/verify_math_reward/mean": 0.6227678656578064, "rewards/verify_math_reward/std": 0.4849644899368286, "step": 975 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1495535714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3928.0, "completions/mean_length": 1111.6507568359375, "completions/mean_terminated_length": 586.8438110351562, "completions/min_length": 194.0, "completions/min_terminated_length": 194.0, "epoch": 9.12128279883382, "grad_norm": 0.14949971437454224, "learning_rate": 1e-06, "loss": -0.0462, "num_tokens": 577383727.0, "reward": 0.6171875, "reward_std": 0.14458921551704407, "rewards/verify_math_reward/mean": 0.6171875, "rewards/verify_math_reward/std": 0.4863446056842804, "step": 976 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1149553571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3850.0, "completions/mean_length": 1097.341552734375, "completions/mean_terminated_length": 707.8562622070312, "completions/min_length": 158.0, "completions/min_terminated_length": 158.0, "epoch": 9.130612244897959, "grad_norm": 0.14869371056556702, "learning_rate": 1e-06, "loss": -0.0469, "num_tokens": 578038009.0, "reward": 0.625, "reward_std": 0.17559193074703217, "rewards/verify_math_reward/mean": 0.625, "rewards/verify_math_reward/std": 0.48439329862594604, "step": 977 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.140625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3343.0, "completions/mean_length": 1100.8192138671875, "completions/mean_terminated_length": 610.6986694335938, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 9.139941690962099, "grad_norm": 0.14077144861221313, "learning_rate": 1e-06, "loss": -0.0513, "num_tokens": 578597015.0, "reward": 0.6729910969734192, "reward_std": 0.12400025129318237, "rewards/verify_math_reward/mean": 0.6729910969734192, "rewards/verify_math_reward/std": 0.46938255429267883, "step": 978 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1283482142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2632.0, "completions/mean_length": 1064.946533203125, "completions/mean_terminated_length": 618.6325073242188, "completions/min_length": 159.0, "completions/min_terminated_length": 159.0, "epoch": 9.14927113702624, "grad_norm": 0.15716727077960968, "learning_rate": 1e-06, "loss": -0.0558, "num_tokens": 579174959.0, "reward": 0.6718750596046448, "reward_std": 0.14042328298091888, "rewards/verify_math_reward/mean": 0.671875, "rewards/verify_math_reward/std": 0.46979284286499023, "step": 979 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1261160714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3250.0, "completions/mean_length": 1080.828125, "completions/mean_terminated_length": 645.6883544921875, "completions/min_length": 158.0, "completions/min_terminated_length": 158.0, "epoch": 9.15860058309038, "grad_norm": 0.17431092262268066, "learning_rate": 1e-06, "loss": -0.0813, "num_tokens": 579771733.0, "reward": 0.6729910969734192, "reward_std": 0.19831563532352448, "rewards/verify_math_reward/mean": 0.6729910969734192, "rewards/verify_math_reward/std": 0.46938255429267883, "step": 980 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1171875, "completions/max_length": 4096.0, "completions/max_terminated_length": 3907.0, "completions/mean_length": 1025.091552734375, "completions/mean_terminated_length": 617.4487915039062, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 9.167930029154519, "grad_norm": 0.15481217205524445, "learning_rate": 1e-06, "loss": -0.072, "num_tokens": 580357559.0, "reward": 0.6462053656578064, "reward_std": 0.1678135246038437, "rewards/verify_math_reward/mean": 0.6462053656578064, "rewards/verify_math_reward/std": 0.478413462638855, "step": 981 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 2707.0, "completions/mean_length": 874.5625610351562, "completions/mean_terminated_length": 601.559326171875, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 9.177259475218658, "grad_norm": 0.14269818365573883, "learning_rate": 1e-06, "loss": -0.0474, "num_tokens": 580933903.0, "reward": 0.6707589626312256, "reward_std": 0.1479741632938385, "rewards/verify_math_reward/mean": 0.6707589030265808, "rewards/verify_math_reward/std": 0.4702001214027405, "step": 982 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0669642857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3637.0, "completions/mean_length": 839.9397583007812, "completions/mean_terminated_length": 606.2511596679688, "completions/min_length": 176.0, "completions/min_terminated_length": 176.0, "epoch": 9.186588921282798, "grad_norm": 0.14914286136627197, "learning_rate": 1e-06, "loss": -0.0626, "num_tokens": 581526081.0, "reward": 0.7522321939468384, "reward_std": 0.16788700222969055, "rewards/verify_math_reward/mean": 0.7522321343421936, "rewards/verify_math_reward/std": 0.4319573938846588, "step": 983 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1238839285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3861.0, "completions/mean_length": 1065.5692138671875, "completions/mean_terminated_length": 637.0624389648438, "completions/min_length": 153.0, "completions/min_terminated_length": 153.0, "epoch": 9.19591836734694, "grad_norm": 0.14959770441055298, "learning_rate": 1e-06, "loss": -0.0521, "num_tokens": 582115927.0, "reward": 0.6428571939468384, "reward_std": 0.16266827285289764, "rewards/verify_math_reward/mean": 0.6428571343421936, "rewards/verify_math_reward/std": 0.4794250428676605, "step": 984 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.046875, "completions/max_length": 4096.0, "completions/max_terminated_length": 3236.0, "completions/mean_length": 747.8939819335938, "completions/mean_terminated_length": 583.2329711914062, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 9.205247813411079, "grad_norm": 0.13443413376808167, "learning_rate": 1e-06, "loss": -0.0214, "num_tokens": 582685680.0, "reward": 0.7444196939468384, "reward_std": 0.1287727802991867, "rewards/verify_math_reward/mean": 0.7444196343421936, "rewards/verify_math_reward/std": 0.43643051385879517, "step": 985 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0703125, "completions/max_length": 4096.0, "completions/max_terminated_length": 2657.0, "completions/mean_length": 821.0814819335938, "completions/mean_terminated_length": 573.3985595703125, "completions/min_length": 137.0, "completions/min_terminated_length": 137.0, "epoch": 9.214577259475218, "grad_norm": 0.13425439596176147, "learning_rate": 1e-06, "loss": -0.0545, "num_tokens": 583248905.0, "reward": 0.754464328289032, "reward_std": 0.11914923042058945, "rewards/verify_math_reward/mean": 0.7544642686843872, "rewards/verify_math_reward/std": 0.43064478039741516, "step": 986 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1584821428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3953.0, "completions/mean_length": 1207.0413818359375, "completions/mean_terminated_length": 662.9668579101562, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 9.223906705539358, "grad_norm": 0.15486279129981995, "learning_rate": 1e-06, "loss": -0.0709, "num_tokens": 583848238.0, "reward": 0.6160714626312256, "reward_std": 0.1367775946855545, "rewards/verify_math_reward/mean": 0.6160714030265808, "rewards/verify_math_reward/std": 0.486612468957901, "step": 987 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1138392857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3059.0, "completions/mean_length": 1071.575927734375, "completions/mean_terminated_length": 683.0478515625, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 9.2332361516035, "grad_norm": 0.14331471920013428, "learning_rate": 1e-06, "loss": -0.062, "num_tokens": 584490458.0, "reward": 0.640625, "reward_std": 0.16308125853538513, "rewards/verify_math_reward/mean": 0.640625, "rewards/verify_math_reward/std": 0.48008525371551514, "step": 988 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1026785714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3276.0, "completions/mean_length": 939.4933471679688, "completions/mean_terminated_length": 578.3009643554688, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 9.242565597667639, "grad_norm": 0.17624999582767487, "learning_rate": 1e-06, "loss": -0.052, "num_tokens": 585039044.0, "reward": 0.707589328289032, "reward_std": 0.14800554513931274, "rewards/verify_math_reward/mean": 0.7075892686843872, "rewards/verify_math_reward/std": 0.45512402057647705, "step": 989 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1104910714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 4046.0, "completions/mean_length": 986.411865234375, "completions/mean_terminated_length": 600.1517944335938, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 9.251895043731778, "grad_norm": 0.16277343034744263, "learning_rate": 1e-06, "loss": -0.0733, "num_tokens": 585614749.0, "reward": 0.6662946939468384, "reward_std": 0.1639414280653, "rewards/verify_math_reward/mean": 0.6662946343421936, "rewards/verify_math_reward/std": 0.47179925441741943, "step": 990 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0993303571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 2801.0, "completions/mean_length": 942.0614013671875, "completions/mean_terminated_length": 594.229248046875, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 9.261224489795918, "grad_norm": 0.1278885006904602, "learning_rate": 1e-06, "loss": -0.02, "num_tokens": 586196420.0, "reward": 0.6350446939468384, "reward_std": 0.1173504963517189, "rewards/verify_math_reward/mean": 0.6350446343421936, "rewards/verify_math_reward/std": 0.481686532497406, "step": 991 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1104910714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3554.0, "completions/mean_length": 1033.33935546875, "completions/mean_terminated_length": 652.9083862304688, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 9.270553935860057, "grad_norm": 0.16274511814117432, "learning_rate": 1e-06, "loss": -0.0916, "num_tokens": 586803124.0, "reward": 0.637276828289032, "reward_std": 0.1754392832517624, "rewards/verify_math_reward/mean": 0.6372767686843872, "rewards/verify_math_reward/std": 0.481054425239563, "step": 992 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1149553571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3278.0, "completions/mean_length": 1030.6138916015625, "completions/mean_terminated_length": 632.4615478515625, "completions/min_length": 166.0, "completions/min_terminated_length": 166.0, "epoch": 9.279883381924199, "grad_norm": 0.13017813861370087, "learning_rate": 1e-06, "loss": -0.0704, "num_tokens": 587403874.0, "reward": 0.6729910969734192, "reward_std": 0.1233583390712738, "rewards/verify_math_reward/mean": 0.6729910969734192, "rewards/verify_math_reward/std": 0.46938255429267883, "step": 993 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1127232142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3528.0, "completions/mean_length": 1033.0770263671875, "completions/mean_terminated_length": 643.950927734375, "completions/min_length": 164.0, "completions/min_terminated_length": 164.0, "epoch": 9.289212827988338, "grad_norm": 0.1934451460838318, "learning_rate": 1e-06, "loss": -0.0575, "num_tokens": 587998991.0, "reward": 0.707589328289032, "reward_std": 0.1688666045665741, "rewards/verify_math_reward/mean": 0.7075892686843872, "rewards/verify_math_reward/std": 0.45512402057647705, "step": 994 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0970982142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3972.0, "completions/mean_length": 945.72998046875, "completions/mean_terminated_length": 606.9493408203125, "completions/min_length": 165.0, "completions/min_terminated_length": 165.0, "epoch": 9.298542274052478, "grad_norm": 0.14389818906784058, "learning_rate": 1e-06, "loss": -0.0465, "num_tokens": 588582557.0, "reward": 0.6819196939468384, "reward_std": 0.13639964163303375, "rewards/verify_math_reward/mean": 0.6819196343421936, "rewards/verify_math_reward/std": 0.46599099040031433, "step": 995 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0848214285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 4025.0, "completions/mean_length": 906.7779541015625, "completions/mean_terminated_length": 611.1914672851562, "completions/min_length": 158.0, "completions/min_terminated_length": 158.0, "epoch": 9.307871720116617, "grad_norm": 0.13750776648521423, "learning_rate": 1e-06, "loss": -0.0147, "num_tokens": 589176174.0, "reward": 0.6886160969734192, "reward_std": 0.11355367302894592, "rewards/verify_math_reward/mean": 0.6886160969734192, "rewards/verify_math_reward/std": 0.46331799030303955, "step": 996 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0837053571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3042.0, "completions/mean_length": 905.4319458007812, "completions/mean_terminated_length": 613.9671630859375, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 9.317201166180759, "grad_norm": 0.16331742703914642, "learning_rate": 1e-06, "loss": -0.0443, "num_tokens": 589767569.0, "reward": 0.6852678656578064, "reward_std": 0.18325723707675934, "rewards/verify_math_reward/mean": 0.6852678656578064, "rewards/verify_math_reward/std": 0.46466848254203796, "step": 997 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0814732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3535.0, "completions/mean_length": 891.9029541015625, "completions/mean_terminated_length": 607.6998901367188, "completions/min_length": 98.0, "completions/min_terminated_length": 98.0, "epoch": 9.326530612244898, "grad_norm": 0.1405782550573349, "learning_rate": 1e-06, "loss": -0.0219, "num_tokens": 590345418.0, "reward": 0.6863839626312256, "reward_std": 0.14417481422424316, "rewards/verify_math_reward/mean": 0.6863839030265808, "rewards/verify_math_reward/std": 0.46422141790390015, "step": 998 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0904017857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2858.0, "completions/mean_length": 855.7332763671875, "completions/mean_terminated_length": 533.6944580078125, "completions/min_length": 122.0, "completions/min_terminated_length": 122.0, "epoch": 9.335860058309038, "grad_norm": 0.14971332252025604, "learning_rate": 1e-06, "loss": -0.044, "num_tokens": 590871755.0, "reward": 0.7042410969734192, "reward_std": 0.14771313965320587, "rewards/verify_math_reward/mean": 0.7042410969734192, "rewards/verify_math_reward/std": 0.45663803815841675, "step": 999 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0904017857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3361.0, "completions/mean_length": 931.8750610351562, "completions/mean_terminated_length": 617.4036865234375, "completions/min_length": 169.0, "completions/min_terminated_length": 169.0, "epoch": 9.345189504373177, "grad_norm": 0.1444764882326126, "learning_rate": 1e-06, "loss": -0.0138, "num_tokens": 591469659.0, "reward": 0.6830357313156128, "reward_std": 0.11971446871757507, "rewards/verify_math_reward/mean": 0.6830357313156128, "rewards/verify_math_reward/std": 0.46555325388908386, "step": 1000 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1529017857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3928.0, "completions/mean_length": 1138.9129638671875, "completions/mean_terminated_length": 605.1567993164062, "completions/min_length": 180.0, "completions/min_terminated_length": 180.0, "epoch": 9.354518950437317, "grad_norm": 0.16810676455497742, "learning_rate": 1e-06, "loss": -0.0831, "num_tokens": 592022821.0, "reward": 0.6741071939468384, "reward_std": 0.1713821291923523, "rewards/verify_math_reward/mean": 0.6741071343421936, "rewards/verify_math_reward/std": 0.46896928548812866, "step": 1001 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1216517857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4012.0, "completions/mean_length": 1000.6585083007812, "completions/mean_terminated_length": 571.95166015625, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 9.363848396501458, "grad_norm": 0.1481969803571701, "learning_rate": 1e-06, "loss": -0.0379, "num_tokens": 592555651.0, "reward": 0.6674107313156128, "reward_std": 0.1141170859336853, "rewards/verify_math_reward/mean": 0.6674107313156128, "rewards/verify_math_reward/std": 0.47140392661094666, "step": 1002 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1272321428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 2707.0, "completions/mean_length": 1031.969970703125, "completions/mean_terminated_length": 585.29541015625, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 9.373177842565598, "grad_norm": 0.15297362208366394, "learning_rate": 1e-06, "loss": -0.0702, "num_tokens": 593101040.0, "reward": 0.6428571939468384, "reward_std": 0.14207187294960022, "rewards/verify_math_reward/mean": 0.6428571343421936, "rewards/verify_math_reward/std": 0.4794250428676605, "step": 1003 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1082589285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2803.0, "completions/mean_length": 1000.8984985351562, "completions/mean_terminated_length": 625.147705078125, "completions/min_length": 169.0, "completions/min_terminated_length": 169.0, "epoch": 9.382507288629737, "grad_norm": 0.14825621247291565, "learning_rate": 1e-06, "loss": -0.0752, "num_tokens": 593691989.0, "reward": 0.6573660969734192, "reward_std": 0.1544409692287445, "rewards/verify_math_reward/mean": 0.6573660969734192, "rewards/verify_math_reward/std": 0.47485533356666565, "step": 1004 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1473214285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3943.0, "completions/mean_length": 1137.28466796875, "completions/mean_terminated_length": 626.0929565429688, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 9.391836734693877, "grad_norm": 0.13579760491847992, "learning_rate": 1e-06, "loss": -0.0352, "num_tokens": 594255356.0, "reward": 0.6640625, "reward_std": 0.10043821483850479, "rewards/verify_math_reward/mean": 0.6640625, "rewards/verify_math_reward/std": 0.4725809693336487, "step": 1005 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1049107142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3277.0, "completions/mean_length": 1002.0279541015625, "completions/mean_terminated_length": 639.3927612304688, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 9.401166180758018, "grad_norm": 0.14921943843364716, "learning_rate": 1e-06, "loss": -0.0322, "num_tokens": 594856757.0, "reward": 0.6540178656578064, "reward_std": 0.13083365559577942, "rewards/verify_math_reward/mean": 0.6540178656578064, "rewards/verify_math_reward/std": 0.4759531021118164, "step": 1006 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1484375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3523.0, "completions/mean_length": 1081.48779296875, "completions/mean_terminated_length": 556.0222778320312, "completions/min_length": 163.0, "completions/min_terminated_length": 163.0, "epoch": 9.410495626822158, "grad_norm": 0.16988012194633484, "learning_rate": 1e-06, "loss": -0.0603, "num_tokens": 595369450.0, "reward": 0.6718750596046448, "reward_std": 0.1468455195426941, "rewards/verify_math_reward/mean": 0.671875, "rewards/verify_math_reward/std": 0.46979284286499023, "step": 1007 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1160714285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3736.0, "completions/mean_length": 1034.6898193359375, "completions/mean_terminated_length": 632.6995239257812, "completions/min_length": 161.0, "completions/min_terminated_length": 161.0, "epoch": 9.419825072886297, "grad_norm": 0.15362633764743805, "learning_rate": 1e-06, "loss": -0.0353, "num_tokens": 595959132.0, "reward": 0.629464328289032, "reward_std": 0.13554087281227112, "rewards/verify_math_reward/mean": 0.6294642686843872, "rewards/verify_math_reward/std": 0.4832179844379425, "step": 1008 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1160714285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3153.0, "completions/mean_length": 1046.751220703125, "completions/mean_terminated_length": 646.3447265625, "completions/min_length": 158.0, "completions/min_terminated_length": 158.0, "epoch": 9.429154518950437, "grad_norm": 0.13277749717235565, "learning_rate": 1e-06, "loss": -0.0552, "num_tokens": 596558869.0, "reward": 0.6718750596046448, "reward_std": 0.12993352115154266, "rewards/verify_math_reward/mean": 0.671875, "rewards/verify_math_reward/std": 0.46979284286499023, "step": 1009 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.109375, "completions/max_length": 4096.0, "completions/max_terminated_length": 2678.0, "completions/mean_length": 966.3984985351562, "completions/mean_terminated_length": 582.0614013671875, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 9.438483965014576, "grad_norm": 0.17353737354278564, "learning_rate": 1e-06, "loss": -0.0524, "num_tokens": 597120634.0, "reward": 0.6495535969734192, "reward_std": 0.1468009203672409, "rewards/verify_math_reward/mean": 0.6495535969734192, "rewards/verify_math_reward/std": 0.477376252412796, "step": 1010 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1060267857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3267.0, "completions/mean_length": 965.2433471679688, "completions/mean_terminated_length": 593.9300537109375, "completions/min_length": 156.0, "completions/min_terminated_length": 156.0, "epoch": 9.447813411078718, "grad_norm": 0.1273731142282486, "learning_rate": 1e-06, "loss": -0.0399, "num_tokens": 597686860.0, "reward": 0.7299107313156128, "reward_std": 0.1226850375533104, "rewards/verify_math_reward/mean": 0.7299107313156128, "rewards/verify_math_reward/std": 0.44425368309020996, "step": 1011 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1183035714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3336.0, "completions/mean_length": 973.4989013671875, "completions/mean_terminated_length": 554.5303955078125, "completions/min_length": 122.0, "completions/min_terminated_length": 122.0, "epoch": 9.457142857142857, "grad_norm": 0.14967063069343567, "learning_rate": 1e-06, "loss": -0.0464, "num_tokens": 598214675.0, "reward": 0.7008928656578064, "reward_std": 0.12099436670541763, "rewards/verify_math_reward/mean": 0.7008928656578064, "rewards/verify_math_reward/std": 0.4581226110458374, "step": 1012 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1037946428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 2466.0, "completions/mean_length": 975.9777221679688, "completions/mean_terminated_length": 614.630126953125, "completions/min_length": 169.0, "completions/min_terminated_length": 169.0, "epoch": 9.466472303206997, "grad_norm": 0.15361587703227997, "learning_rate": 1e-06, "loss": -0.0746, "num_tokens": 598815063.0, "reward": 0.6428571939468384, "reward_std": 0.17040501534938812, "rewards/verify_math_reward/mean": 0.6428571343421936, "rewards/verify_math_reward/std": 0.47942501306533813, "step": 1013 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1328125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3505.0, "completions/mean_length": 1141.0648193359375, "completions/mean_terminated_length": 688.507080078125, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 9.475801749271136, "grad_norm": 0.1506103128194809, "learning_rate": 1e-06, "loss": -0.0689, "num_tokens": 599433513.0, "reward": 0.6428571939468384, "reward_std": 0.16401740908622742, "rewards/verify_math_reward/mean": 0.6428571343421936, "rewards/verify_math_reward/std": 0.47942501306533813, "step": 1014 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1283482142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3918.0, "completions/mean_length": 1147.2445068359375, "completions/mean_terminated_length": 713.0486450195312, "completions/min_length": 167.0, "completions/min_terminated_length": 167.0, "epoch": 9.485131195335278, "grad_norm": 0.16414706408977509, "learning_rate": 1e-06, "loss": -0.0593, "num_tokens": 600080492.0, "reward": 0.5736607313156128, "reward_std": 0.19309872388839722, "rewards/verify_math_reward/mean": 0.5736607313156128, "rewards/verify_math_reward/std": 0.4948205351829529, "step": 1015 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0870535714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3410.0, "completions/mean_length": 872.0859985351562, "completions/mean_terminated_length": 564.6712036132812, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 9.494460641399417, "grad_norm": 0.14051392674446106, "learning_rate": 1e-06, "loss": -0.0586, "num_tokens": 600622465.0, "reward": 0.6986607313156128, "reward_std": 0.1277196854352951, "rewards/verify_math_reward/mean": 0.6986607313156128, "rewards/verify_math_reward/std": 0.4590960443019867, "step": 1016 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1227678571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 2636.0, "completions/mean_length": 1015.87060546875, "completions/mean_terminated_length": 584.8091430664062, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 9.503790087463557, "grad_norm": 0.16711576282978058, "learning_rate": 1e-06, "loss": -0.0544, "num_tokens": 601164437.0, "reward": 0.7098214626312256, "reward_std": 0.1406829059123993, "rewards/verify_math_reward/mean": 0.7098214030265808, "rewards/verify_math_reward/std": 0.454098105430603, "step": 1017 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1082589285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3433.0, "completions/mean_length": 979.5156860351562, "completions/mean_terminated_length": 601.1689453125, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 9.513119533527696, "grad_norm": 0.16849058866500854, "learning_rate": 1e-06, "loss": -0.0812, "num_tokens": 601738163.0, "reward": 0.7020089626312256, "reward_std": 0.19320710003376007, "rewards/verify_math_reward/mean": 0.7020089030265808, "rewards/verify_math_reward/std": 0.45763099193573, "step": 1018 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0892857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3570.0, "completions/mean_length": 936.489990234375, "completions/mean_terminated_length": 626.7340698242188, "completions/min_length": 122.0, "completions/min_terminated_length": 122.0, "epoch": 9.522448979591836, "grad_norm": 0.1501246690750122, "learning_rate": 1e-06, "loss": -0.0814, "num_tokens": 602330778.0, "reward": 0.7488839626312256, "reward_std": 0.1563873291015625, "rewards/verify_math_reward/mean": 0.7488839030265808, "rewards/verify_math_reward/std": 0.43389734625816345, "step": 1019 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1506696428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3444.0, "completions/mean_length": 1142.0859375, "completions/mean_terminated_length": 618.0670166015625, "completions/min_length": 123.0, "completions/min_terminated_length": 123.0, "epoch": 9.531778425655977, "grad_norm": 0.1566106081008911, "learning_rate": 1e-06, "loss": -0.0677, "num_tokens": 602885975.0, "reward": 0.6227678656578064, "reward_std": 0.14733155071735382, "rewards/verify_math_reward/mean": 0.6227678656578064, "rewards/verify_math_reward/std": 0.4849644601345062, "step": 1020 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1417410714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3076.0, "completions/mean_length": 1067.13623046875, "completions/mean_terminated_length": 566.920654296875, "completions/min_length": 159.0, "completions/min_terminated_length": 159.0, "epoch": 9.541107871720117, "grad_norm": 0.17518474161624908, "learning_rate": 1e-06, "loss": -0.071, "num_tokens": 603409729.0, "reward": 0.6540178656578064, "reward_std": 0.15800705552101135, "rewards/verify_math_reward/mean": 0.6540178656578064, "rewards/verify_math_reward/std": 0.4759531021118164, "step": 1021 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1183035714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3153.0, "completions/mean_length": 1032.0357666015625, "completions/mean_terminated_length": 620.9215087890625, "completions/min_length": 162.0, "completions/min_terminated_length": 162.0, "epoch": 9.550437317784256, "grad_norm": 0.14200487732887268, "learning_rate": 1e-06, "loss": -0.0492, "num_tokens": 603988409.0, "reward": 0.6886160969734192, "reward_std": 0.12118053436279297, "rewards/verify_math_reward/mean": 0.6886160969734192, "rewards/verify_math_reward/std": 0.46331802010536194, "step": 1022 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1517857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3979.0, "completions/mean_length": 1218.75341796875, "completions/mean_terminated_length": 703.877685546875, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 9.559766763848396, "grad_norm": 0.14762155711650848, "learning_rate": 1e-06, "loss": -0.0586, "num_tokens": 604624500.0, "reward": 0.574776828289032, "reward_std": 0.13729864358901978, "rewards/verify_math_reward/mean": 0.5747767686843872, "rewards/verify_math_reward/std": 0.49465295672416687, "step": 1023 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0814732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3154.0, "completions/mean_length": 898.07373046875, "completions/mean_terminated_length": 614.4180297851562, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 9.569096209912537, "grad_norm": 0.1496865153312683, "learning_rate": 1e-06, "loss": -0.03, "num_tokens": 605229534.0, "reward": 0.6941964626312256, "reward_std": 0.14770880341529846, "rewards/verify_math_reward/mean": 0.6941964030265808, "rewards/verify_math_reward/std": 0.4610042870044708, "step": 1024 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1116071428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3939.0, "completions/mean_length": 1017.4888916015625, "completions/mean_terminated_length": 630.7412109375, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 9.578425655976677, "grad_norm": 0.1566886007785797, "learning_rate": 1e-06, "loss": -0.0589, "num_tokens": 605814708.0, "reward": 0.691964328289032, "reward_std": 0.15567448735237122, "rewards/verify_math_reward/mean": 0.6919642686843872, "rewards/verify_math_reward/std": 0.4619392454624176, "step": 1025 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1261160714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 4006.0, "completions/mean_length": 987.8616333007812, "completions/mean_terminated_length": 539.3052368164062, "completions/min_length": 95.0, "completions/min_terminated_length": 95.0, "epoch": 9.587755102040816, "grad_norm": 0.13987243175506592, "learning_rate": 1e-06, "loss": -0.0501, "num_tokens": 606334264.0, "reward": 0.6830357313156128, "reward_std": 0.094690702855587, "rewards/verify_math_reward/mean": 0.6830357313156128, "rewards/verify_math_reward/std": 0.46555325388908386, "step": 1026 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0993303571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3537.0, "completions/mean_length": 943.87841796875, "completions/mean_terminated_length": 596.24658203125, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 9.597084548104956, "grad_norm": 0.1412520706653595, "learning_rate": 1e-06, "loss": -0.0361, "num_tokens": 606901419.0, "reward": 0.7142857313156128, "reward_std": 0.10074201226234436, "rewards/verify_math_reward/mean": 0.7142857313156128, "rewards/verify_math_reward/std": 0.4520062506198883, "step": 1027 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3698.0, "completions/mean_length": 1085.5546875, "completions/mean_terminated_length": 655.4910278320312, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 9.606413994169095, "grad_norm": 0.15377016365528107, "learning_rate": 1e-06, "loss": -0.0562, "num_tokens": 607509756.0, "reward": 0.6194196939468384, "reward_std": 0.14744214713573456, "rewards/verify_math_reward/mean": 0.6194196343421936, "rewards/verify_math_reward/std": 0.48580074310302734, "step": 1028 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1104910714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3705.0, "completions/mean_length": 944.6574096679688, "completions/mean_terminated_length": 553.2107543945312, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 9.615743440233237, "grad_norm": 0.15246158838272095, "learning_rate": 1e-06, "loss": -0.0621, "num_tokens": 608045433.0, "reward": 0.6674107313156128, "reward_std": 0.1385371834039688, "rewards/verify_math_reward/mean": 0.6674107313156128, "rewards/verify_math_reward/std": 0.47140392661094666, "step": 1029 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0825892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2611.0, "completions/mean_length": 840.896240234375, "completions/mean_terminated_length": 547.857666015625, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 9.625072886297376, "grad_norm": 0.1378517597913742, "learning_rate": 1e-06, "loss": -0.0574, "num_tokens": 608581124.0, "reward": 0.7198660969734192, "reward_std": 0.12297996133565903, "rewards/verify_math_reward/mean": 0.7198660969734192, "rewards/verify_math_reward/std": 0.44931527972221375, "step": 1030 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1116071428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3184.0, "completions/mean_length": 1007.8839721679688, "completions/mean_terminated_length": 619.9296264648438, "completions/min_length": 162.0, "completions/min_terminated_length": 162.0, "epoch": 9.634402332361516, "grad_norm": 0.17682576179504395, "learning_rate": 1e-06, "loss": -0.0557, "num_tokens": 609164252.0, "reward": 0.6584821939468384, "reward_std": 0.1277197003364563, "rewards/verify_math_reward/mean": 0.6584821343421936, "rewards/verify_math_reward/std": 0.4744836091995239, "step": 1031 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1127232142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3310.0, "completions/mean_length": 1014.5391235351562, "completions/mean_terminated_length": 623.057861328125, "completions/min_length": 120.0, "completions/min_terminated_length": 120.0, "epoch": 9.643731778425655, "grad_norm": 0.15746831893920898, "learning_rate": 1e-06, "loss": -0.0636, "num_tokens": 609753823.0, "reward": 0.6674107313156128, "reward_std": 0.19419346749782562, "rewards/verify_math_reward/mean": 0.6674107313156128, "rewards/verify_math_reward/std": 0.47140392661094666, "step": 1032 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.109375, "completions/max_length": 4096.0, "completions/max_terminated_length": 2535.0, "completions/mean_length": 965.8917846679688, "completions/mean_terminated_length": 581.4924926757812, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 9.653061224489797, "grad_norm": 0.15120132267475128, "learning_rate": 1e-06, "loss": -0.0273, "num_tokens": 610301798.0, "reward": 0.7120535969734192, "reward_std": 0.1345216929912567, "rewards/verify_math_reward/mean": 0.7120535969734192, "rewards/verify_math_reward/std": 0.4530589282512665, "step": 1033 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1383928571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3883.0, "completions/mean_length": 1081.328125, "completions/mean_terminated_length": 597.106201171875, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 9.662390670553936, "grad_norm": 0.1371956616640091, "learning_rate": 1e-06, "loss": -0.0355, "num_tokens": 610852388.0, "reward": 0.6640625, "reward_std": 0.12046445161104202, "rewards/verify_math_reward/mean": 0.6640625, "rewards/verify_math_reward/std": 0.4725809693336487, "step": 1034 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0881696428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3800.0, "completions/mean_length": 890.560302734375, "completions/mean_terminated_length": 580.6095581054688, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 9.671720116618076, "grad_norm": 0.15502233803272247, "learning_rate": 1e-06, "loss": -0.0734, "num_tokens": 611411858.0, "reward": 0.6863839626312256, "reward_std": 0.16375456750392914, "rewards/verify_math_reward/mean": 0.6863839030265808, "rewards/verify_math_reward/std": 0.46422144770622253, "step": 1035 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1183035714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3866.0, "completions/mean_length": 1024.638427734375, "completions/mean_terminated_length": 612.5316772460938, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 9.681049562682215, "grad_norm": 0.1512136310338974, "learning_rate": 1e-06, "loss": -0.075, "num_tokens": 611984750.0, "reward": 0.6808035969734192, "reward_std": 0.15939712524414062, "rewards/verify_math_reward/mean": 0.6808035969734192, "rewards/verify_math_reward/std": 0.46642565727233887, "step": 1036 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1339285714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3534.0, "completions/mean_length": 1063.9241943359375, "completions/mean_terminated_length": 595.04638671875, "completions/min_length": 174.0, "completions/min_terminated_length": 174.0, "epoch": 9.690379008746355, "grad_norm": 0.15705522894859314, "learning_rate": 1e-06, "loss": -0.0607, "num_tokens": 612525778.0, "reward": 0.6774553656578064, "reward_std": 0.13617070019245148, "rewards/verify_math_reward/mean": 0.6774553656578064, "rewards/verify_math_reward/std": 0.4677111804485321, "step": 1037 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1149553571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 2797.0, "completions/mean_length": 1029.1842041015625, "completions/mean_terminated_length": 630.8461303710938, "completions/min_length": 113.0, "completions/min_terminated_length": 113.0, "epoch": 9.699708454810496, "grad_norm": 0.15667232871055603, "learning_rate": 1e-06, "loss": -0.0679, "num_tokens": 613109703.0, "reward": 0.6194196939468384, "reward_std": 0.17499276995658875, "rewards/verify_math_reward/mean": 0.6194196343421936, "rewards/verify_math_reward/std": 0.48580074310302734, "step": 1038 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0993303571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3893.0, "completions/mean_length": 941.700927734375, "completions/mean_terminated_length": 593.8289794921875, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 9.709037900874636, "grad_norm": 0.16166822612285614, "learning_rate": 1e-06, "loss": -0.0346, "num_tokens": 613678235.0, "reward": 0.65625, "reward_std": 0.14260390400886536, "rewards/verify_math_reward/mean": 0.65625, "rewards/verify_math_reward/std": 0.4752241373062134, "step": 1039 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1417410714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2820.0, "completions/mean_length": 1102.009033203125, "completions/mean_terminated_length": 607.5526733398438, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 9.718367346938775, "grad_norm": 0.15334701538085938, "learning_rate": 1e-06, "loss": -0.072, "num_tokens": 614230603.0, "reward": 0.6439732313156128, "reward_std": 0.14000847935676575, "rewards/verify_math_reward/mean": 0.6439732313156128, "rewards/verify_math_reward/std": 0.47909072041511536, "step": 1040 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0915178571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3770.0, "completions/mean_length": 880.9285888671875, "completions/mean_terminated_length": 557.0515747070312, "completions/min_length": 168.0, "completions/min_terminated_length": 168.0, "epoch": 9.727696793002915, "grad_norm": 0.15773150324821472, "learning_rate": 1e-06, "loss": -0.0599, "num_tokens": 614774603.0, "reward": 0.7042410969734192, "reward_std": 0.15353691577911377, "rewards/verify_math_reward/mean": 0.7042410969734192, "rewards/verify_math_reward/std": 0.45663803815841675, "step": 1041 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3833.0, "completions/mean_length": 860.6138916015625, "completions/mean_terminated_length": 586.4285888671875, "completions/min_length": 157.0, "completions/min_terminated_length": 157.0, "epoch": 9.737026239067056, "grad_norm": 0.15409444272518158, "learning_rate": 1e-06, "loss": -0.036, "num_tokens": 615341377.0, "reward": 0.7421875596046448, "reward_std": 0.14481674134731293, "rewards/verify_math_reward/mean": 0.7421875, "rewards/verify_math_reward/std": 0.43767455220222473, "step": 1042 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1149553571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 2648.0, "completions/mean_length": 980.2656860351562, "completions/mean_terminated_length": 575.5737915039062, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 9.746355685131196, "grad_norm": 0.15941530466079712, "learning_rate": 1e-06, "loss": -0.0827, "num_tokens": 615884735.0, "reward": 0.6584821939468384, "reward_std": 0.14414453506469727, "rewards/verify_math_reward/mean": 0.6584821343421936, "rewards/verify_math_reward/std": 0.4744836091995239, "step": 1043 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1205357142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2472.0, "completions/mean_length": 1015.44091796875, "completions/mean_terminated_length": 593.2322387695312, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 9.755685131195335, "grad_norm": 0.17075827717781067, "learning_rate": 1e-06, "loss": -0.0778, "num_tokens": 616437930.0, "reward": 0.691964328289032, "reward_std": 0.14692078530788422, "rewards/verify_math_reward/mean": 0.6919642686843872, "rewards/verify_math_reward/std": 0.4619392454624176, "step": 1044 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3558.0, "completions/mean_length": 929.5592041015625, "completions/mean_terminated_length": 571.6136474609375, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 9.765014577259475, "grad_norm": 0.12774479389190674, "learning_rate": 1e-06, "loss": -0.0586, "num_tokens": 616973047.0, "reward": 0.7198660969734192, "reward_std": 0.12253489345312119, "rewards/verify_math_reward/mean": 0.7198660969734192, "rewards/verify_math_reward/std": 0.44931527972221375, "step": 1045 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0892857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2893.0, "completions/mean_length": 997.10498046875, "completions/mean_terminated_length": 693.2916870117188, "completions/min_length": 179.0, "completions/min_terminated_length": 179.0, "epoch": 9.774344023323614, "grad_norm": 0.142217755317688, "learning_rate": 1e-06, "loss": -0.0456, "num_tokens": 617633069.0, "reward": 0.668526828289032, "reward_std": 0.16206417977809906, "rewards/verify_math_reward/mean": 0.6685267686843872, "rewards/verify_math_reward/std": 0.4710056483745575, "step": 1046 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1607142857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3108.0, "completions/mean_length": 1195.575927734375, "completions/mean_terminated_length": 640.175537109375, "completions/min_length": 179.0, "completions/min_terminated_length": 179.0, "epoch": 9.783673469387756, "grad_norm": 0.14058978855609894, "learning_rate": 1e-06, "loss": -0.0683, "num_tokens": 618211889.0, "reward": 0.606026828289032, "reward_std": 0.12215623259544373, "rewards/verify_math_reward/mean": 0.6060267686843872, "rewards/verify_math_reward/std": 0.48890194296836853, "step": 1047 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1171875, "completions/max_length": 4096.0, "completions/max_terminated_length": 3573.0, "completions/mean_length": 1019.1953735351562, "completions/mean_terminated_length": 610.7699584960938, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 9.793002915451895, "grad_norm": 0.15759840607643127, "learning_rate": 1e-06, "loss": -0.0574, "num_tokens": 618788432.0, "reward": 0.6729910969734192, "reward_std": 0.13534724712371826, "rewards/verify_math_reward/mean": 0.6729910969734192, "rewards/verify_math_reward/std": 0.46938255429267883, "step": 1048 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0970982142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3467.0, "completions/mean_length": 942.2656860351562, "completions/mean_terminated_length": 603.1124877929688, "completions/min_length": 161.0, "completions/min_terminated_length": 161.0, "epoch": 9.802332361516035, "grad_norm": 0.14560723304748535, "learning_rate": 1e-06, "loss": -0.0244, "num_tokens": 619364190.0, "reward": 0.6339285969734192, "reward_std": 0.13711388409137726, "rewards/verify_math_reward/mean": 0.6339285969734192, "rewards/verify_math_reward/std": 0.48199835419654846, "step": 1049 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0870535714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3019.0, "completions/mean_length": 901.3504638671875, "completions/mean_terminated_length": 596.7261962890625, "completions/min_length": 94.0, "completions/min_terminated_length": 94.0, "epoch": 9.811661807580174, "grad_norm": 0.14852198958396912, "learning_rate": 1e-06, "loss": -0.0725, "num_tokens": 619940152.0, "reward": 0.7299107313156128, "reward_std": 0.157290980219841, "rewards/verify_math_reward/mean": 0.7299107313156128, "rewards/verify_math_reward/std": 0.44425368309020996, "step": 1050 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1484375, "completions/max_length": 4096.0, "completions/max_terminated_length": 2936.0, "completions/mean_length": 1114.2545166015625, "completions/mean_terminated_length": 594.5006713867188, "completions/min_length": 116.0, "completions/min_terminated_length": 116.0, "epoch": 9.820991253644316, "grad_norm": 0.1678614467382431, "learning_rate": 1e-06, "loss": -0.0968, "num_tokens": 620474324.0, "reward": 0.6662946939468384, "reward_std": 0.16078147292137146, "rewards/verify_math_reward/mean": 0.6662946343421936, "rewards/verify_math_reward/std": 0.47179925441741943, "step": 1051 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1729910714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3515.0, "completions/mean_length": 1249.8616943359375, "completions/mean_terminated_length": 654.5155029296875, "completions/min_length": 157.0, "completions/min_terminated_length": 157.0, "epoch": 9.830320699708455, "grad_norm": 0.1502968668937683, "learning_rate": 1e-06, "loss": -0.0108, "num_tokens": 621053568.0, "reward": 0.6171875, "reward_std": 0.12944427132606506, "rewards/verify_math_reward/mean": 0.6171875, "rewards/verify_math_reward/std": 0.4863446056842804, "step": 1052 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.140625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3461.0, "completions/mean_length": 1086.83154296875, "completions/mean_terminated_length": 594.4220581054688, "completions/min_length": 176.0, "completions/min_terminated_length": 176.0, "epoch": 9.839650145772595, "grad_norm": 0.15375304222106934, "learning_rate": 1e-06, "loss": -0.0698, "num_tokens": 621594113.0, "reward": 0.6718750596046448, "reward_std": 0.12621337175369263, "rewards/verify_math_reward/mean": 0.671875, "rewards/verify_math_reward/std": 0.46979284286499023, "step": 1053 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1238839285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2540.0, "completions/mean_length": 1051.484375, "completions/mean_terminated_length": 620.9860229492188, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 9.848979591836734, "grad_norm": 0.13623444736003876, "learning_rate": 1e-06, "loss": -0.042, "num_tokens": 622171643.0, "reward": 0.606026828289032, "reward_std": 0.14166070520877838, "rewards/verify_math_reward/mean": 0.6060267686843872, "rewards/verify_math_reward/std": 0.48890194296836853, "step": 1054 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1305803571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 2847.0, "completions/mean_length": 1043.6707763671875, "completions/mean_terminated_length": 585.233642578125, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 9.858309037900874, "grad_norm": 0.14690545201301575, "learning_rate": 1e-06, "loss": -0.0409, "num_tokens": 622729692.0, "reward": 0.637276828289032, "reward_std": 0.12471521645784378, "rewards/verify_math_reward/mean": 0.6372767686843872, "rewards/verify_math_reward/std": 0.481054425239563, "step": 1055 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1183035714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3755.0, "completions/mean_length": 1062.4554443359375, "completions/mean_terminated_length": 655.4227905273438, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 9.867638483965015, "grad_norm": 0.13697752356529236, "learning_rate": 1e-06, "loss": -0.0495, "num_tokens": 623341684.0, "reward": 0.6339285969734192, "reward_std": 0.1335780918598175, "rewards/verify_math_reward/mean": 0.6339285969734192, "rewards/verify_math_reward/std": 0.48199835419654846, "step": 1056 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1305803571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3519.0, "completions/mean_length": 1059.7489013671875, "completions/mean_terminated_length": 603.7265625, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 9.876967930029155, "grad_norm": 0.14473684132099152, "learning_rate": 1e-06, "loss": -0.0599, "num_tokens": 623899091.0, "reward": 0.6540178656578064, "reward_std": 0.14180973172187805, "rewards/verify_math_reward/mean": 0.6540178656578064, "rewards/verify_math_reward/std": 0.4759531021118164, "step": 1057 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1060267857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3030.0, "completions/mean_length": 960.9207763671875, "completions/mean_terminated_length": 589.0948486328125, "completions/min_length": 182.0, "completions/min_terminated_length": 182.0, "epoch": 9.886297376093294, "grad_norm": 0.17573131620883942, "learning_rate": 1e-06, "loss": -0.0561, "num_tokens": 624464804.0, "reward": 0.6808035969734192, "reward_std": 0.1548050194978714, "rewards/verify_math_reward/mean": 0.6808035969734192, "rewards/verify_math_reward/std": 0.4664256274700165, "step": 1058 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1116071428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 2810.0, "completions/mean_length": 1017.9732666015625, "completions/mean_terminated_length": 631.2864379882812, "completions/min_length": 116.0, "completions/min_terminated_length": 116.0, "epoch": 9.895626822157434, "grad_norm": 0.14279092848300934, "learning_rate": 1e-06, "loss": -0.0498, "num_tokens": 625053820.0, "reward": 0.6160714626312256, "reward_std": 0.12166837602853775, "rewards/verify_math_reward/mean": 0.6160714030265808, "rewards/verify_math_reward/std": 0.486612468957901, "step": 1059 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1205357142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2843.0, "completions/mean_length": 976.1038208007812, "completions/mean_terminated_length": 548.5037841796875, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 9.904956268221575, "grad_norm": 0.1449451446533203, "learning_rate": 1e-06, "loss": -0.0513, "num_tokens": 625572225.0, "reward": 0.6540178656578064, "reward_std": 0.12230778485536575, "rewards/verify_math_reward/mean": 0.6540178656578064, "rewards/verify_math_reward/std": 0.4759531021118164, "step": 1060 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0803571428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3009.0, "completions/mean_length": 809.3482666015625, "completions/mean_terminated_length": 522.1650390625, "completions/min_length": 169.0, "completions/min_terminated_length": 169.0, "epoch": 9.914285714285715, "grad_norm": 0.15559326112270355, "learning_rate": 1e-06, "loss": -0.0357, "num_tokens": 626083113.0, "reward": 0.7444196939468384, "reward_std": 0.13147373497486115, "rewards/verify_math_reward/mean": 0.7444196343421936, "rewards/verify_math_reward/std": 0.43643057346343994, "step": 1061 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0959821428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3141.0, "completions/mean_length": 915.5792846679688, "completions/mean_terminated_length": 577.9049682617188, "completions/min_length": 193.0, "completions/min_terminated_length": 193.0, "epoch": 9.923615160349854, "grad_norm": 0.1454796940088272, "learning_rate": 1e-06, "loss": -0.0399, "num_tokens": 626640072.0, "reward": 0.7020089626312256, "reward_std": 0.135757714509964, "rewards/verify_math_reward/mean": 0.7020089030265808, "rewards/verify_math_reward/std": 0.45763099193573, "step": 1062 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3874.0, "completions/mean_length": 903.3516235351562, "completions/mean_terminated_length": 573.0775756835938, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 9.932944606413994, "grad_norm": 0.16085104644298553, "learning_rate": 1e-06, "loss": -0.0282, "num_tokens": 627201011.0, "reward": 0.6930803656578064, "reward_std": 0.13602760434150696, "rewards/verify_math_reward/mean": 0.6930803656578064, "rewards/verify_math_reward/std": 0.46147337555885315, "step": 1063 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1205357142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4053.0, "completions/mean_length": 1088.03466796875, "completions/mean_terminated_length": 675.775390625, "completions/min_length": 185.0, "completions/min_terminated_length": 185.0, "epoch": 9.942274052478133, "grad_norm": 0.17610017955303192, "learning_rate": 1e-06, "loss": -0.076, "num_tokens": 627813362.0, "reward": 0.6752232313156128, "reward_std": 0.15161874890327454, "rewards/verify_math_reward/mean": 0.6752232313156128, "rewards/verify_math_reward/std": 0.46855294704437256, "step": 1064 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1316964285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3562.0, "completions/mean_length": 1067.4888916015625, "completions/mean_terminated_length": 608.1516723632812, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 9.951603498542275, "grad_norm": 0.1570524126291275, "learning_rate": 1e-06, "loss": -0.0795, "num_tokens": 628378408.0, "reward": 0.6953125596046448, "reward_std": 0.15161871910095215, "rewards/verify_math_reward/mean": 0.6953125, "rewards/verify_math_reward/std": 0.4605320394039154, "step": 1065 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0970982142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3239.0, "completions/mean_length": 933.4364013671875, "completions/mean_terminated_length": 593.333740234375, "completions/min_length": 83.0, "completions/min_terminated_length": 83.0, "epoch": 9.960932944606414, "grad_norm": 0.15946438908576965, "learning_rate": 1e-06, "loss": -0.056, "num_tokens": 628949591.0, "reward": 0.65625, "reward_std": 0.1517256796360016, "rewards/verify_math_reward/mean": 0.65625, "rewards/verify_math_reward/std": 0.4752241373062134, "step": 1066 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1428571428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3924.0, "completions/mean_length": 1110.5045166015625, "completions/mean_terminated_length": 612.921875, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 9.970262390670554, "grad_norm": 0.17599667608737946, "learning_rate": 1e-06, "loss": -0.0912, "num_tokens": 629513099.0, "reward": 0.6506696939468384, "reward_std": 0.17979852855205536, "rewards/verify_math_reward/mean": 0.6506696343421936, "rewards/verify_math_reward/std": 0.47702476382255554, "step": 1067 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0915178571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 4059.0, "completions/mean_length": 910.0491333007812, "completions/mean_terminated_length": 589.1056518554688, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 9.979591836734693, "grad_norm": 0.13086532056331635, "learning_rate": 1e-06, "loss": -0.0395, "num_tokens": 630071575.0, "reward": 0.7377232313156128, "reward_std": 0.1244862899184227, "rewards/verify_math_reward/mean": 0.7377232313156128, "rewards/verify_math_reward/std": 0.4401180148124695, "step": 1068 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1517857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3421.0, "completions/mean_length": 1142.2132568359375, "completions/mean_terminated_length": 613.6408081054688, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 9.988921282798835, "grad_norm": 0.14581549167633057, "learning_rate": 1e-06, "loss": -0.0634, "num_tokens": 630642030.0, "reward": 0.6194196939468384, "reward_std": 0.13955312967300415, "rewards/verify_math_reward/mean": 0.6194196343421936, "rewards/verify_math_reward/std": 0.48580074310302734, "step": 1069 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1335227272727273, "completions/max_length": 4096.0, "completions/max_terminated_length": 3067.0, "completions/mean_length": 1102.3096923828125, "completions/mean_terminated_length": 640.9868774414062, "completions/min_length": 210.0, "completions/min_terminated_length": 210.0, "epoch": 9.998250728862974, "grad_norm": 0.14599387347698212, "learning_rate": 1e-06, "loss": -0.0734, "num_tokens": 631205603.0, "reward": 0.6953125596046448, "reward_std": 0.15421095490455627, "rewards/verify_math_reward/mean": 0.6953125, "rewards/verify_math_reward/std": 0.4605320394039154, "step": 1070 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1283482142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3081.0, "completions/mean_length": 1036.8929443359375, "completions/mean_terminated_length": 586.4481811523438, "completions/min_length": 171.0, "completions/min_terminated_length": 171.0, "epoch": 10.00932944606414, "grad_norm": 0.1592647284269333, "learning_rate": 1e-06, "loss": -0.0385, "num_tokens": 631756123.0, "reward": 0.6238839626312256, "reward_std": 0.14958925545215607, "rewards/verify_math_reward/mean": 0.6238839030265808, "rewards/verify_math_reward/std": 0.4846802353858948, "step": 1071 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1294642857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3764.0, "completions/mean_length": 1067.7421875, "completions/mean_terminated_length": 617.3859252929688, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 10.018658892128279, "grad_norm": 0.15054309368133545, "learning_rate": 1e-06, "loss": -0.0666, "num_tokens": 632331092.0, "reward": 0.625, "reward_std": 0.13447962701320648, "rewards/verify_math_reward/mean": 0.625, "rewards/verify_math_reward/std": 0.48439329862594604, "step": 1072 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1261160714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3609.0, "completions/mean_length": 1039.235595703125, "completions/mean_terminated_length": 598.0932006835938, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 10.02798833819242, "grad_norm": 0.15350356698036194, "learning_rate": 1e-06, "loss": -0.0826, "num_tokens": 632890327.0, "reward": 0.6819196939468384, "reward_std": 0.1584860384464264, "rewards/verify_math_reward/mean": 0.6819196343421936, "rewards/verify_math_reward/std": 0.46599099040031433, "step": 1073 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.109375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3400.0, "completions/mean_length": 974.411865234375, "completions/mean_terminated_length": 591.0588989257812, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 10.03731778425656, "grad_norm": 0.15882743895053864, "learning_rate": 1e-06, "loss": -0.0523, "num_tokens": 633447944.0, "reward": 0.7008928656578064, "reward_std": 0.14094392955303192, "rewards/verify_math_reward/mean": 0.7008928656578064, "rewards/verify_math_reward/std": 0.458122581243515, "step": 1074 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1138392857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4084.0, "completions/mean_length": 1041.391845703125, "completions/mean_terminated_length": 648.9861450195312, "completions/min_length": 111.0, "completions/min_terminated_length": 111.0, "epoch": 10.0466472303207, "grad_norm": 0.1584683358669281, "learning_rate": 1e-06, "loss": -0.0762, "num_tokens": 634058671.0, "reward": 0.6640625, "reward_std": 0.15826597809791565, "rewards/verify_math_reward/mean": 0.6640625, "rewards/verify_math_reward/std": 0.4725809693336487, "step": 1075 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0881696428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3758.0, "completions/mean_length": 876.3125610351562, "completions/mean_terminated_length": 564.9840698242188, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 10.055976676384839, "grad_norm": 0.15721167623996735, "learning_rate": 1e-06, "loss": -0.0481, "num_tokens": 634596839.0, "reward": 0.7645089626312256, "reward_std": 0.13655047118663788, "rewards/verify_math_reward/mean": 0.7645089030265808, "rewards/verify_math_reward/std": 0.42454230785369873, "step": 1076 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1305803571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 4074.0, "completions/mean_length": 1095.872802734375, "completions/mean_terminated_length": 645.2760009765625, "completions/min_length": 120.0, "completions/min_terminated_length": 120.0, "epoch": 10.06530612244898, "grad_norm": 0.13884396851062775, "learning_rate": 1e-06, "loss": -0.0713, "num_tokens": 635191853.0, "reward": 0.6752232313156128, "reward_std": 0.1356835663318634, "rewards/verify_math_reward/mean": 0.6752232313156128, "rewards/verify_math_reward/std": 0.46855294704437256, "step": 1077 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1127232142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3146.0, "completions/mean_length": 1035.4554443359375, "completions/mean_terminated_length": 646.6314086914062, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 10.07463556851312, "grad_norm": 0.17396743595600128, "learning_rate": 1e-06, "loss": -0.0641, "num_tokens": 635793405.0, "reward": 0.6629464626312256, "reward_std": 0.17085261642932892, "rewards/verify_math_reward/mean": 0.6629464030265808, "rewards/verify_math_reward/std": 0.47296738624572754, "step": 1078 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1205357142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3872.0, "completions/mean_length": 1029.828125, "completions/mean_terminated_length": 609.5913696289062, "completions/min_length": 92.0, "completions/min_terminated_length": 92.0, "epoch": 10.08396501457726, "grad_norm": 0.15869788825511932, "learning_rate": 1e-06, "loss": -0.0717, "num_tokens": 636376547.0, "reward": 0.652901828289032, "reward_std": 0.14278724789619446, "rewards/verify_math_reward/mean": 0.6529017686843872, "rewards/verify_math_reward/std": 0.47631317377090454, "step": 1079 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1294642857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4086.0, "completions/mean_length": 1071.9442138671875, "completions/mean_terminated_length": 622.2128295898438, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 10.093294460641399, "grad_norm": 0.13492093980312347, "learning_rate": 1e-06, "loss": -0.0686, "num_tokens": 636963745.0, "reward": 0.6707589626312256, "reward_std": 0.13230746984481812, "rewards/verify_math_reward/mean": 0.6707589030265808, "rewards/verify_math_reward/std": 0.4702001214027405, "step": 1080 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1194196428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 2528.0, "completions/mean_length": 995.6964721679688, "completions/mean_terminated_length": 575.2496948242188, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 10.102623906705539, "grad_norm": 0.15736635029315948, "learning_rate": 1e-06, "loss": -0.061, "num_tokens": 637510281.0, "reward": 0.6573660969734192, "reward_std": 0.1522146463394165, "rewards/verify_math_reward/mean": 0.6573660969734192, "rewards/verify_math_reward/std": 0.47485533356666565, "step": 1081 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1026785714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2751.0, "completions/mean_length": 938.6629638671875, "completions/mean_terminated_length": 577.3756103515625, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 10.11195335276968, "grad_norm": 0.1429387331008911, "learning_rate": 1e-06, "loss": -0.0675, "num_tokens": 638069763.0, "reward": 0.65625, "reward_std": 0.13632366061210632, "rewards/verify_math_reward/mean": 0.65625, "rewards/verify_math_reward/std": 0.4752241373062134, "step": 1082 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0970982142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3363.0, "completions/mean_length": 952.0335083007812, "completions/mean_terminated_length": 613.9307861328125, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 10.12128279883382, "grad_norm": 0.1521647572517395, "learning_rate": 1e-06, "loss": -0.0679, "num_tokens": 638666449.0, "reward": 0.6741071939468384, "reward_std": 0.12651757895946503, "rewards/verify_math_reward/mean": 0.6741071343421936, "rewards/verify_math_reward/std": 0.4689692258834839, "step": 1083 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1238839285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3644.0, "completions/mean_length": 1054.157470703125, "completions/mean_terminated_length": 624.0369262695312, "completions/min_length": 157.0, "completions/min_terminated_length": 157.0, "epoch": 10.130612244897959, "grad_norm": 0.14456379413604736, "learning_rate": 1e-06, "loss": -0.0326, "num_tokens": 639250758.0, "reward": 0.6104910969734192, "reward_std": 0.1328292191028595, "rewards/verify_math_reward/mean": 0.6104910969734192, "rewards/verify_math_reward/std": 0.48791125416755676, "step": 1084 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0881696428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3379.0, "completions/mean_length": 945.0011596679688, "completions/mean_terminated_length": 640.3145751953125, "completions/min_length": 96.0, "completions/min_terminated_length": 96.0, "epoch": 10.139941690962099, "grad_norm": 0.1351221203804016, "learning_rate": 1e-06, "loss": -0.0504, "num_tokens": 639857543.0, "reward": 0.6886160969734192, "reward_std": 0.13632294535636902, "rewards/verify_math_reward/mean": 0.6886160969734192, "rewards/verify_math_reward/std": 0.46331802010536194, "step": 1085 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1026785714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3385.0, "completions/mean_length": 887.974365234375, "completions/mean_terminated_length": 520.8867797851562, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 10.14927113702624, "grad_norm": 0.15145932137966156, "learning_rate": 1e-06, "loss": -0.0333, "num_tokens": 640359888.0, "reward": 0.7533482313156128, "reward_std": 0.11881474405527115, "rewards/verify_math_reward/mean": 0.7533482313156128, "rewards/verify_math_reward/std": 0.4313030242919922, "step": 1086 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1316964285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3285.0, "completions/mean_length": 1033.5670166015625, "completions/mean_terminated_length": 569.0848388671875, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 10.15860058309038, "grad_norm": 0.15234391391277313, "learning_rate": 1e-06, "loss": -0.0441, "num_tokens": 640897724.0, "reward": 0.6875000596046448, "reward_std": 0.11840105801820755, "rewards/verify_math_reward/mean": 0.6875, "rewards/verify_math_reward/std": 0.4637712836265564, "step": 1087 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1506696428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3239.0, "completions/mean_length": 1144.555908203125, "completions/mean_terminated_length": 620.9750366210938, "completions/min_length": 165.0, "completions/min_terminated_length": 165.0, "epoch": 10.167930029154519, "grad_norm": 0.18200331926345825, "learning_rate": 1e-06, "loss": -0.0862, "num_tokens": 641455310.0, "reward": 0.6796875596046448, "reward_std": 0.1666409969329834, "rewards/verify_math_reward/mean": 0.6796875, "rewards/verify_math_reward/std": 0.4668572247028351, "step": 1088 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1417410714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3760.0, "completions/mean_length": 1143.0279541015625, "completions/mean_terminated_length": 655.3458862304688, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 10.177259475218658, "grad_norm": 0.15295979380607605, "learning_rate": 1e-06, "loss": -0.0284, "num_tokens": 642050831.0, "reward": 0.6350446939468384, "reward_std": 0.1215910017490387, "rewards/verify_math_reward/mean": 0.6350446343421936, "rewards/verify_math_reward/std": 0.481686532497406, "step": 1089 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1060267857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2933.0, "completions/mean_length": 922.83935546875, "completions/mean_terminated_length": 546.4968872070312, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 10.186588921282798, "grad_norm": 0.1875612586736679, "learning_rate": 1e-06, "loss": -0.0659, "num_tokens": 642573999.0, "reward": 0.7321428656578064, "reward_std": 0.14053206145763397, "rewards/verify_math_reward/mean": 0.7321428656578064, "rewards/verify_math_reward/std": 0.4430900514125824, "step": 1090 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0904017857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4059.0, "completions/mean_length": 864.3248291015625, "completions/mean_terminated_length": 543.139892578125, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 10.19591836734694, "grad_norm": 0.17017853260040283, "learning_rate": 1e-06, "loss": -0.0586, "num_tokens": 643100810.0, "reward": 0.7399553656578064, "reward_std": 0.14109547436237335, "rewards/verify_math_reward/mean": 0.7399553656578064, "rewards/verify_math_reward/std": 0.43890368938446045, "step": 1091 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1573660714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3600.0, "completions/mean_length": 1183.9296875, "completions/mean_terminated_length": 640.0861206054688, "completions/min_length": 158.0, "completions/min_terminated_length": 158.0, "epoch": 10.205247813411079, "grad_norm": 0.15581268072128296, "learning_rate": 1e-06, "loss": -0.0602, "num_tokens": 643675971.0, "reward": 0.5814732313156128, "reward_std": 0.16871143877506256, "rewards/verify_math_reward/mean": 0.5814732313156128, "rewards/verify_math_reward/std": 0.4935929775238037, "step": 1092 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1272321428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3823.0, "completions/mean_length": 1058.7679443359375, "completions/mean_terminated_length": 616.0, "completions/min_length": 137.0, "completions/min_terminated_length": 137.0, "epoch": 10.214577259475218, "grad_norm": 0.15681420266628265, "learning_rate": 1e-06, "loss": -0.0534, "num_tokens": 644240875.0, "reward": 0.6484375, "reward_std": 0.127794548869133, "rewards/verify_math_reward/mean": 0.6484375, "rewards/verify_math_reward/std": 0.4777248501777649, "step": 1093 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1372767857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2770.0, "completions/mean_length": 1101.3226318359375, "completions/mean_terminated_length": 624.8085327148438, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 10.223906705539358, "grad_norm": 0.1676253229379654, "learning_rate": 1e-06, "loss": -0.0415, "num_tokens": 644820436.0, "reward": 0.6473214626312256, "reward_std": 0.1525605320930481, "rewards/verify_math_reward/mean": 0.6473214030265808, "rewards/verify_math_reward/std": 0.47807058691978455, "step": 1094 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1104910714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2845.0, "completions/mean_length": 987.5324096679688, "completions/mean_terminated_length": 601.4115600585938, "completions/min_length": 168.0, "completions/min_terminated_length": 168.0, "epoch": 10.2332361516035, "grad_norm": 0.16065557301044464, "learning_rate": 1e-06, "loss": -0.0608, "num_tokens": 645383737.0, "reward": 0.668526828289032, "reward_std": 0.15416745841503143, "rewards/verify_math_reward/mean": 0.6685267686843872, "rewards/verify_math_reward/std": 0.4710056483745575, "step": 1095 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1897321428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 2385.0, "completions/mean_length": 1279.7366943359375, "completions/mean_terminated_length": 620.281005859375, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 10.242565597667639, "grad_norm": 0.18482163548469543, "learning_rate": 1e-06, "loss": -0.086, "num_tokens": 645924821.0, "reward": 0.6339285969734192, "reward_std": 0.15454541146755219, "rewards/verify_math_reward/mean": 0.6339285969734192, "rewards/verify_math_reward/std": 0.48199835419654846, "step": 1096 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0915178571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3035.0, "completions/mean_length": 911.0859985351562, "completions/mean_terminated_length": 590.2468872070312, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 10.251895043731778, "grad_norm": 0.13147014379501343, "learning_rate": 1e-06, "loss": -0.0463, "num_tokens": 646491762.0, "reward": 0.7243303656578064, "reward_std": 0.10957279056310654, "rewards/verify_math_reward/mean": 0.7243303656578064, "rewards/verify_math_reward/std": 0.4471006691455841, "step": 1097 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1517857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3002.0, "completions/mean_length": 1121.532470703125, "completions/mean_terminated_length": 589.2592163085938, "completions/min_length": 159.0, "completions/min_terminated_length": 159.0, "epoch": 10.261224489795918, "grad_norm": 0.18821877241134644, "learning_rate": 1e-06, "loss": -0.0758, "num_tokens": 647025767.0, "reward": 0.6718750596046448, "reward_std": 0.14563976228237152, "rewards/verify_math_reward/mean": 0.671875, "rewards/verify_math_reward/std": 0.46979284286499023, "step": 1098 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1238839285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2475.0, "completions/mean_length": 994.2120971679688, "completions/mean_terminated_length": 555.6152954101562, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 10.270553935860057, "grad_norm": 0.15949542820453644, "learning_rate": 1e-06, "loss": -0.0513, "num_tokens": 647555245.0, "reward": 0.6718750596046448, "reward_std": 0.1250917762517929, "rewards/verify_math_reward/mean": 0.671875, "rewards/verify_math_reward/std": 0.46979284286499023, "step": 1099 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1395089285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3541.0, "completions/mean_length": 1059.86279296875, "completions/mean_terminated_length": 567.62255859375, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 10.279883381924199, "grad_norm": 0.15890197455883026, "learning_rate": 1e-06, "loss": -0.068, "num_tokens": 648094338.0, "reward": 0.6104910969734192, "reward_std": 0.14369018375873566, "rewards/verify_math_reward/mean": 0.6104910969734192, "rewards/verify_math_reward/std": 0.48791128396987915, "step": 1100 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0926339285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3352.0, "completions/mean_length": 900.8761596679688, "completions/mean_terminated_length": 574.6826171875, "completions/min_length": 172.0, "completions/min_terminated_length": 172.0, "epoch": 10.289212827988338, "grad_norm": 0.1462251842021942, "learning_rate": 1e-06, "loss": -0.0407, "num_tokens": 648651491.0, "reward": 0.6897321939468384, "reward_std": 0.15082526206970215, "rewards/verify_math_reward/mean": 0.6897321343421936, "rewards/verify_math_reward/std": 0.462861567735672, "step": 1101 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1339285714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 4056.0, "completions/mean_length": 1093.40185546875, "completions/mean_terminated_length": 629.0824584960938, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 10.298542274052478, "grad_norm": 0.17598183453083038, "learning_rate": 1e-06, "loss": -0.1181, "num_tokens": 649232307.0, "reward": 0.6417410969734192, "reward_std": 0.18468095362186432, "rewards/verify_math_reward/mean": 0.6417410969734192, "rewards/verify_math_reward/std": 0.47975659370422363, "step": 1102 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1294642857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3879.0, "completions/mean_length": 1065.2020263671875, "completions/mean_terminated_length": 614.4679565429688, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 10.307871720116617, "grad_norm": 0.16817888617515564, "learning_rate": 1e-06, "loss": -0.0647, "num_tokens": 649804608.0, "reward": 0.6741071939468384, "reward_std": 0.15774603188037872, "rewards/verify_math_reward/mean": 0.6741071343421936, "rewards/verify_math_reward/std": 0.4689692556858063, "step": 1103 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1339285714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3969.0, "completions/mean_length": 1103.15625, "completions/mean_terminated_length": 640.3453369140625, "completions/min_length": 158.0, "completions/min_terminated_length": 158.0, "epoch": 10.317201166180759, "grad_norm": 0.13881205022335052, "learning_rate": 1e-06, "loss": -0.0595, "num_tokens": 650385860.0, "reward": 0.6383928656578064, "reward_std": 0.13387976586818695, "rewards/verify_math_reward/mean": 0.6383928656578064, "rewards/verify_math_reward/std": 0.4807341992855072, "step": 1104 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1339285714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2864.0, "completions/mean_length": 1060.6663818359375, "completions/mean_terminated_length": 591.2847900390625, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 10.326530612244898, "grad_norm": 0.15930691361427307, "learning_rate": 1e-06, "loss": -0.101, "num_tokens": 650929385.0, "reward": 0.6618303656578064, "reward_std": 0.14210577309131622, "rewards/verify_math_reward/mean": 0.6618303656578064, "rewards/verify_math_reward/std": 0.4733508229255676, "step": 1105 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1082589285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2614.0, "completions/mean_length": 991.09716796875, "completions/mean_terminated_length": 614.1564331054688, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 10.335860058309038, "grad_norm": 0.16268374025821686, "learning_rate": 1e-06, "loss": -0.0545, "num_tokens": 651504872.0, "reward": 0.6573660969734192, "reward_std": 0.1695399135351181, "rewards/verify_math_reward/mean": 0.6573660969734192, "rewards/verify_math_reward/std": 0.47485536336898804, "step": 1106 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0770089285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2973.0, "completions/mean_length": 817.107177734375, "completions/mean_terminated_length": 543.5357055664062, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 10.345189504373177, "grad_norm": 0.16051194071769714, "learning_rate": 1e-06, "loss": -0.0424, "num_tokens": 652040960.0, "reward": 0.7087053656578064, "reward_std": 0.13883958756923676, "rewards/verify_math_reward/mean": 0.7087053656578064, "rewards/verify_math_reward/std": 0.45461276173591614, "step": 1107 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1037946428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3574.0, "completions/mean_length": 951.17529296875, "completions/mean_terminated_length": 586.9551391601562, "completions/min_length": 123.0, "completions/min_terminated_length": 123.0, "epoch": 10.354518950437317, "grad_norm": 0.1385965794324875, "learning_rate": 1e-06, "loss": -0.0719, "num_tokens": 652596197.0, "reward": 0.7176339626312256, "reward_std": 0.14011837542057037, "rewards/verify_math_reward/mean": 0.7176339030265808, "rewards/verify_math_reward/std": 0.4504019320011139, "step": 1108 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0970982142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3001.0, "completions/mean_length": 906.2500610351562, "completions/mean_terminated_length": 563.2237548828125, "completions/min_length": 99.0, "completions/min_terminated_length": 99.0, "epoch": 10.363848396501458, "grad_norm": 0.15858794748783112, "learning_rate": 1e-06, "loss": -0.0595, "num_tokens": 653139237.0, "reward": 0.7187500596046448, "reward_std": 0.1417366862297058, "rewards/verify_math_reward/mean": 0.71875, "rewards/verify_math_reward/std": 0.4498603343963623, "step": 1109 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.140625, "completions/max_length": 4096.0, "completions/max_terminated_length": 4081.0, "completions/mean_length": 1086.62841796875, "completions/mean_terminated_length": 594.1857299804688, "completions/min_length": 177.0, "completions/min_terminated_length": 177.0, "epoch": 10.373177842565598, "grad_norm": 0.14364366233348846, "learning_rate": 1e-06, "loss": -0.0691, "num_tokens": 653681856.0, "reward": 0.6361607313156128, "reward_std": 0.12790516018867493, "rewards/verify_math_reward/mean": 0.6361607313156128, "rewards/verify_math_reward/std": 0.4813718795776367, "step": 1110 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1305803571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3847.0, "completions/mean_length": 1050.5703125, "completions/mean_terminated_length": 593.1694946289062, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 10.382507288629737, "grad_norm": 0.14082437753677368, "learning_rate": 1e-06, "loss": -0.0634, "num_tokens": 654217687.0, "reward": 0.6718750596046448, "reward_std": 0.12117871642112732, "rewards/verify_math_reward/mean": 0.671875, "rewards/verify_math_reward/std": 0.46979284286499023, "step": 1111 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1216517857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3709.0, "completions/mean_length": 992.2913208007812, "completions/mean_terminated_length": 562.4256591796875, "completions/min_length": 174.0, "completions/min_terminated_length": 174.0, "epoch": 10.391836734693877, "grad_norm": 0.16474808752536774, "learning_rate": 1e-06, "loss": -0.0607, "num_tokens": 654745836.0, "reward": 0.7299107313156128, "reward_std": 0.1471807062625885, "rewards/verify_math_reward/mean": 0.7299107313156128, "rewards/verify_math_reward/std": 0.44425368309020996, "step": 1112 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1752232142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3031.0, "completions/mean_length": 1236.1317138671875, "completions/mean_terminated_length": 628.5547485351562, "completions/min_length": 117.0, "completions/min_terminated_length": 117.0, "epoch": 10.401166180758018, "grad_norm": 0.1435873955488205, "learning_rate": 1e-06, "loss": -0.0601, "num_tokens": 655310090.0, "reward": 0.6071428656578064, "reward_std": 0.09596949070692062, "rewards/verify_math_reward/mean": 0.6071428656578064, "rewards/verify_math_reward/std": 0.48865827918052673, "step": 1113 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1104910714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2884.0, "completions/mean_length": 966.1886596679688, "completions/mean_terminated_length": 577.4165649414062, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 10.410495626822158, "grad_norm": 0.14196231961250305, "learning_rate": 1e-06, "loss": -0.071, "num_tokens": 655864659.0, "reward": 0.7087053656578064, "reward_std": 0.13816556334495544, "rewards/verify_math_reward/mean": 0.7087053656578064, "rewards/verify_math_reward/std": 0.45461276173591614, "step": 1114 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1383928571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 2549.0, "completions/mean_length": 1070.321533203125, "completions/mean_terminated_length": 584.3316040039062, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 10.419825072886297, "grad_norm": 0.1511499583721161, "learning_rate": 1e-06, "loss": -0.0889, "num_tokens": 656395059.0, "reward": 0.7042410969734192, "reward_std": 0.1297919601202011, "rewards/verify_math_reward/mean": 0.7042410969734192, "rewards/verify_math_reward/std": 0.45663803815841675, "step": 1115 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1495535714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3452.0, "completions/mean_length": 1130.4007568359375, "completions/mean_terminated_length": 608.8910522460938, "completions/min_length": 158.0, "completions/min_terminated_length": 158.0, "epoch": 10.429154518950437, "grad_norm": 0.18190579116344452, "learning_rate": 1e-06, "loss": -0.0725, "num_tokens": 656951290.0, "reward": 0.6227678656578064, "reward_std": 0.1635277271270752, "rewards/verify_math_reward/mean": 0.6227678656578064, "rewards/verify_math_reward/std": 0.4849644899368286, "step": 1116 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1004464285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3742.0, "completions/mean_length": 944.6239013671875, "completions/mean_terminated_length": 592.7332153320312, "completions/min_length": 108.0, "completions/min_terminated_length": 108.0, "epoch": 10.438483965014576, "grad_norm": 0.1650150716304779, "learning_rate": 1e-06, "loss": -0.0663, "num_tokens": 657508921.0, "reward": 0.7053571939468384, "reward_std": 0.1522895097732544, "rewards/verify_math_reward/mean": 0.7053571343421936, "rewards/verify_math_reward/std": 0.45613664388656616, "step": 1117 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1272321428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 2980.0, "completions/mean_length": 1069.6160888671875, "completions/mean_terminated_length": 628.4296875, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 10.447813411078718, "grad_norm": 0.16112597286701202, "learning_rate": 1e-06, "loss": -0.0687, "num_tokens": 658091625.0, "reward": 0.6495535969734192, "reward_std": 0.16022199392318726, "rewards/verify_math_reward/mean": 0.6495535969734192, "rewards/verify_math_reward/std": 0.477376252412796, "step": 1118 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1428571428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 2301.0, "completions/mean_length": 1105.52685546875, "completions/mean_terminated_length": 607.1146240234375, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 10.457142857142857, "grad_norm": 0.36504054069519043, "learning_rate": 1e-06, "loss": -0.0644, "num_tokens": 658651041.0, "reward": 0.598214328289032, "reward_std": 0.14740823209285736, "rewards/verify_math_reward/mean": 0.5982142686843872, "rewards/verify_math_reward/std": 0.49053287506103516, "step": 1119 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1584821428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3316.0, "completions/mean_length": 1135.7176513671875, "completions/mean_terminated_length": 578.2108764648438, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 10.466472303206997, "grad_norm": 0.13417407870292664, "learning_rate": 1e-06, "loss": -0.092, "num_tokens": 659178388.0, "reward": 0.6819196939468384, "reward_std": 0.1305733323097229, "rewards/verify_math_reward/mean": 0.6819196343421936, "rewards/verify_math_reward/std": 0.46599099040031433, "step": 1120 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1361607142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2335.0, "completions/mean_length": 1073.161865234375, "completions/mean_terminated_length": 596.6937866210938, "completions/min_length": 118.0, "completions/min_terminated_length": 118.0, "epoch": 10.475801749271136, "grad_norm": 0.14503762125968933, "learning_rate": 1e-06, "loss": -0.0713, "num_tokens": 659731205.0, "reward": 0.6618303656578064, "reward_std": 0.1402692198753357, "rewards/verify_math_reward/mean": 0.6618303656578064, "rewards/verify_math_reward/std": 0.4733508229255676, "step": 1121 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1205357142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3090.0, "completions/mean_length": 987.4609985351562, "completions/mean_terminated_length": 561.41748046875, "completions/min_length": 117.0, "completions/min_terminated_length": 117.0, "epoch": 10.485131195335278, "grad_norm": 0.17859601974487305, "learning_rate": 1e-06, "loss": -0.074, "num_tokens": 660254234.0, "reward": 0.7332589626312256, "reward_std": 0.14316701889038086, "rewards/verify_math_reward/mean": 0.7332589030265808, "rewards/verify_math_reward/std": 0.4425029158592224, "step": 1122 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1037946428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3738.0, "completions/mean_length": 976.4375610351562, "completions/mean_terminated_length": 615.1431884765625, "completions/min_length": 161.0, "completions/min_terminated_length": 161.0, "epoch": 10.494460641399417, "grad_norm": 0.1336556077003479, "learning_rate": 1e-06, "loss": -0.0693, "num_tokens": 660832290.0, "reward": 0.6930803656578064, "reward_std": 0.1184028759598732, "rewards/verify_math_reward/mean": 0.6930803656578064, "rewards/verify_math_reward/std": 0.46147337555885315, "step": 1123 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1205357142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3536.0, "completions/mean_length": 992.0748291015625, "completions/mean_terminated_length": 566.6636962890625, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 10.503790087463557, "grad_norm": 0.15979568660259247, "learning_rate": 1e-06, "loss": -0.0559, "num_tokens": 661364725.0, "reward": 0.6718750596046448, "reward_std": 0.1386127471923828, "rewards/verify_math_reward/mean": 0.671875, "rewards/verify_math_reward/std": 0.46979284286499023, "step": 1124 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0982142857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3947.0, "completions/mean_length": 911.8973388671875, "completions/mean_terminated_length": 565.1138305664062, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 10.513119533527696, "grad_norm": 0.13665249943733215, "learning_rate": 1e-06, "loss": -0.0603, "num_tokens": 661901561.0, "reward": 0.7120535969734192, "reward_std": 0.13264445960521698, "rewards/verify_math_reward/mean": 0.7120535969734192, "rewards/verify_math_reward/std": 0.4530589282512665, "step": 1125 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1171875, "completions/max_length": 4096.0, "completions/max_terminated_length": 4092.0, "completions/mean_length": 977.8359985351562, "completions/mean_terminated_length": 563.9203491210938, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 10.522448979591836, "grad_norm": 0.16451455652713776, "learning_rate": 1e-06, "loss": -0.0352, "num_tokens": 662453406.0, "reward": 0.6941964626312256, "reward_std": 0.12163377553224564, "rewards/verify_math_reward/mean": 0.6941964030265808, "rewards/verify_math_reward/std": 0.46100425720214844, "step": 1126 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1104910714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2797.0, "completions/mean_length": 929.3739013671875, "completions/mean_terminated_length": 536.0288696289062, "completions/min_length": 153.0, "completions/min_terminated_length": 153.0, "epoch": 10.531778425655977, "grad_norm": 0.16072551906108856, "learning_rate": 1e-06, "loss": -0.0569, "num_tokens": 662972461.0, "reward": 0.7031250596046448, "reward_std": 0.13929423689842224, "rewards/verify_math_reward/mean": 0.703125, "rewards/verify_math_reward/std": 0.4571361541748047, "step": 1127 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1506696428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3121.0, "completions/mean_length": 1076.204345703125, "completions/mean_terminated_length": 540.4979858398438, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 10.541107871720117, "grad_norm": 0.1522071361541748, "learning_rate": 1e-06, "loss": -0.061, "num_tokens": 663485268.0, "reward": 0.6741071939468384, "reward_std": 0.13711389899253845, "rewards/verify_math_reward/mean": 0.6741071343421936, "rewards/verify_math_reward/std": 0.46896928548812866, "step": 1128 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1149553571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3468.0, "completions/mean_length": 970.818115234375, "completions/mean_terminated_length": 564.8991088867188, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 10.550437317784256, "grad_norm": 0.18777716159820557, "learning_rate": 1e-06, "loss": -0.06, "num_tokens": 664022017.0, "reward": 0.6696428656578064, "reward_std": 0.1272311508655548, "rewards/verify_math_reward/mean": 0.6696428656578064, "rewards/verify_math_reward/std": 0.47060438990592957, "step": 1129 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1049107142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3026.0, "completions/mean_length": 964.97216796875, "completions/mean_terminated_length": 597.9937744140625, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 10.559766763848396, "grad_norm": 0.1461143046617508, "learning_rate": 1e-06, "loss": -0.0512, "num_tokens": 664588520.0, "reward": 0.723214328289032, "reward_std": 0.13519570231437683, "rewards/verify_math_reward/mean": 0.7232142686843872, "rewards/verify_math_reward/std": 0.44765952229499817, "step": 1130 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0959821428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3775.0, "completions/mean_length": 907.4029541015625, "completions/mean_terminated_length": 568.8605346679688, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 10.569096209912537, "grad_norm": 0.1546100229024887, "learning_rate": 1e-06, "loss": -0.0612, "num_tokens": 665129761.0, "reward": 0.6886160969734192, "reward_std": 0.10772736370563507, "rewards/verify_math_reward/mean": 0.6886160969734192, "rewards/verify_math_reward/std": 0.46331799030303955, "step": 1131 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1674107142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4043.0, "completions/mean_length": 1245.7489013671875, "completions/mean_terminated_length": 672.64208984375, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 10.578425655976677, "grad_norm": 0.17100751399993896, "learning_rate": 1e-06, "loss": -0.0992, "num_tokens": 665725400.0, "reward": 0.6305803656578064, "reward_std": 0.1565767079591751, "rewards/verify_math_reward/mean": 0.6305803656578064, "rewards/verify_math_reward/std": 0.4829172194004059, "step": 1132 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1004464285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3373.0, "completions/mean_length": 957.8136596679688, "completions/mean_terminated_length": 607.395751953125, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 10.587755102040816, "grad_norm": 0.1777399778366089, "learning_rate": 1e-06, "loss": -0.0276, "num_tokens": 666313881.0, "reward": 0.6741071939468384, "reward_std": 0.14007559418678284, "rewards/verify_math_reward/mean": 0.6741071343421936, "rewards/verify_math_reward/std": 0.4689692556858063, "step": 1133 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1339285714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 4002.0, "completions/mean_length": 1098.4967041015625, "completions/mean_terminated_length": 634.9652099609375, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 10.597084548104956, "grad_norm": 0.1605757772922516, "learning_rate": 1e-06, "loss": -0.0571, "num_tokens": 666896446.0, "reward": 0.6238839626312256, "reward_std": 0.16044881939888, "rewards/verify_math_reward/mean": 0.6238839030265808, "rewards/verify_math_reward/std": 0.48468026518821716, "step": 1134 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1127232142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3384.0, "completions/mean_length": 976.9855346679688, "completions/mean_terminated_length": 580.7333374023438, "completions/min_length": 104.0, "completions/min_terminated_length": 104.0, "epoch": 10.606413994169095, "grad_norm": 0.17219507694244385, "learning_rate": 1e-06, "loss": -0.0469, "num_tokens": 667443337.0, "reward": 0.7254464626312256, "reward_std": 0.14037981629371643, "rewards/verify_math_reward/mean": 0.7254464030265808, "rewards/verify_math_reward/std": 0.4465382993221283, "step": 1135 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1439732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2925.0, "completions/mean_length": 1091.5045166015625, "completions/mean_terminated_length": 586.1851196289062, "completions/min_length": 108.0, "completions/min_terminated_length": 108.0, "epoch": 10.615743440233237, "grad_norm": 0.15457606315612793, "learning_rate": 1e-06, "loss": -0.0748, "num_tokens": 667980669.0, "reward": 0.660714328289032, "reward_std": 0.14992374181747437, "rewards/verify_math_reward/mean": 0.6607142686843872, "rewards/verify_math_reward/std": 0.4737313687801361, "step": 1136 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1551339285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2700.0, "completions/mean_length": 1113.646240234375, "completions/mean_terminated_length": 566.0277709960938, "completions/min_length": 121.0, "completions/min_terminated_length": 121.0, "epoch": 10.625072886297376, "grad_norm": 0.1607721745967865, "learning_rate": 1e-06, "loss": -0.0575, "num_tokens": 668505952.0, "reward": 0.6662946939468384, "reward_std": 0.13632294535636902, "rewards/verify_math_reward/mean": 0.6662946343421936, "rewards/verify_math_reward/std": 0.47179925441741943, "step": 1137 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09375, "completions/max_length": 4096.0, "completions/max_terminated_length": 4069.0, "completions/mean_length": 918.1785888671875, "completions/mean_terminated_length": 589.4384155273438, "completions/min_length": 95.0, "completions/min_terminated_length": 95.0, "epoch": 10.634402332361516, "grad_norm": 0.15498511493206024, "learning_rate": 1e-06, "loss": -0.0545, "num_tokens": 669076512.0, "reward": 0.7276785969734192, "reward_std": 0.12373882532119751, "rewards/verify_math_reward/mean": 0.7276785969734192, "rewards/verify_math_reward/std": 0.4454030692577362, "step": 1138 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1104910714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2971.0, "completions/mean_length": 975.7857666015625, "completions/mean_terminated_length": 588.2057495117188, "completions/min_length": 137.0, "completions/min_terminated_length": 137.0, "epoch": 10.643731778425655, "grad_norm": 0.14636258780956268, "learning_rate": 1e-06, "loss": -0.0479, "num_tokens": 669622448.0, "reward": 0.6718750596046448, "reward_std": 0.11126275360584259, "rewards/verify_math_reward/mean": 0.671875, "rewards/verify_math_reward/std": 0.46979284286499023, "step": 1139 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1506696428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3205.0, "completions/mean_length": 1157.4107666015625, "completions/mean_terminated_length": 636.1103515625, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 10.653061224489797, "grad_norm": 0.1539439857006073, "learning_rate": 1e-06, "loss": -0.058, "num_tokens": 670205104.0, "reward": 0.6339285969734192, "reward_std": 0.14914487302303314, "rewards/verify_math_reward/mean": 0.6339285969734192, "rewards/verify_math_reward/std": 0.48199835419654846, "step": 1140 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1473214285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3847.0, "completions/mean_length": 1152.1160888671875, "completions/mean_terminated_length": 643.4869384765625, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 10.662390670553936, "grad_norm": 0.12876634299755096, "learning_rate": 1e-06, "loss": -0.0878, "num_tokens": 670780664.0, "reward": 0.6428571939468384, "reward_std": 0.13609471917152405, "rewards/verify_math_reward/mean": 0.6428571343421936, "rewards/verify_math_reward/std": 0.4794250428676605, "step": 1141 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 4096.0, "completions/max_terminated_length": 2911.0, "completions/mean_length": 1046.185302734375, "completions/mean_terminated_length": 610.4974365234375, "completions/min_length": 184.0, "completions/min_terminated_length": 184.0, "epoch": 10.671720116618076, "grad_norm": 0.15197999775409698, "learning_rate": 1e-06, "loss": -0.0876, "num_tokens": 671353886.0, "reward": 0.715401828289032, "reward_std": 0.1395556628704071, "rewards/verify_math_reward/mean": 0.7154017686843872, "rewards/verify_math_reward/std": 0.4514748752117157, "step": 1142 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1316964285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3132.0, "completions/mean_length": 1013.1116333007812, "completions/mean_terminated_length": 545.5269775390625, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 10.681049562682215, "grad_norm": 0.15575233101844788, "learning_rate": 1e-06, "loss": -0.0595, "num_tokens": 671877178.0, "reward": 0.6941964626312256, "reward_std": 0.12102645635604858, "rewards/verify_math_reward/mean": 0.6941964030265808, "rewards/verify_math_reward/std": 0.4610042870044708, "step": 1143 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1339285714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 4001.0, "completions/mean_length": 1055.6585693359375, "completions/mean_terminated_length": 585.5025634765625, "completions/min_length": 159.0, "completions/min_terminated_length": 159.0, "epoch": 10.690379008746355, "grad_norm": 0.1572081446647644, "learning_rate": 1e-06, "loss": -0.0793, "num_tokens": 672421184.0, "reward": 0.6540178656578064, "reward_std": 0.15962213277816772, "rewards/verify_math_reward/mean": 0.6540178656578064, "rewards/verify_math_reward/std": 0.4759531021118164, "step": 1144 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1283482142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2792.0, "completions/mean_length": 1030.509033203125, "completions/mean_terminated_length": 579.1242065429688, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 10.699708454810496, "grad_norm": 0.18195772171020508, "learning_rate": 1e-06, "loss": -0.0902, "num_tokens": 672962536.0, "reward": 0.6696428656578064, "reward_std": 0.14579172432422638, "rewards/verify_math_reward/mean": 0.6696428656578064, "rewards/verify_math_reward/std": 0.47060438990592957, "step": 1145 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1127232142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3729.0, "completions/mean_length": 1003.8683471679688, "completions/mean_terminated_length": 611.0314331054688, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 10.709037900874636, "grad_norm": 0.15055854618549347, "learning_rate": 1e-06, "loss": -0.0471, "num_tokens": 673530002.0, "reward": 0.6640625, "reward_std": 0.13470645248889923, "rewards/verify_math_reward/mean": 0.6640625, "rewards/verify_math_reward/std": 0.4725809693336487, "step": 1146 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1395089285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3551.0, "completions/mean_length": 1094.1551513671875, "completions/mean_terminated_length": 607.4747314453125, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 10.718367346938775, "grad_norm": 0.13042044639587402, "learning_rate": 1e-06, "loss": -0.0335, "num_tokens": 674088045.0, "reward": 0.6964285969734192, "reward_std": 0.09532869607210159, "rewards/verify_math_reward/mean": 0.6964285969734192, "rewards/verify_math_reward/std": 0.4600565731525421, "step": 1147 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1350446428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 2898.0, "completions/mean_length": 1082.0, "completions/mean_terminated_length": 611.4271240234375, "completions/min_length": 118.0, "completions/min_terminated_length": 118.0, "epoch": 10.727696793002915, "grad_norm": 0.1522504836320877, "learning_rate": 1e-06, "loss": -0.075, "num_tokens": 674647589.0, "reward": 0.6540178656578064, "reward_std": 0.14687760174274445, "rewards/verify_math_reward/mean": 0.6540178656578064, "rewards/verify_math_reward/std": 0.4759531021118164, "step": 1148 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1183035714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2928.0, "completions/mean_length": 1041.966552734375, "completions/mean_terminated_length": 632.184814453125, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 10.737026239067056, "grad_norm": 0.15824255347251892, "learning_rate": 1e-06, "loss": -0.1141, "num_tokens": 675232543.0, "reward": 0.707589328289032, "reward_std": 0.17362776398658752, "rewards/verify_math_reward/mean": 0.7075892686843872, "rewards/verify_math_reward/std": 0.45512402057647705, "step": 1149 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1316964285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3480.0, "completions/mean_length": 1091.946533203125, "completions/mean_terminated_length": 636.3187866210938, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 10.746355685131196, "grad_norm": 0.14918987452983856, "learning_rate": 1e-06, "loss": -0.0837, "num_tokens": 675818879.0, "reward": 0.6205357313156128, "reward_std": 0.174875870347023, "rewards/verify_math_reward/mean": 0.6205357313156128, "rewards/verify_math_reward/std": 0.4855247139930725, "step": 1150 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1361607142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4070.0, "completions/mean_length": 1094.774658203125, "completions/mean_terminated_length": 621.7131958007812, "completions/min_length": 167.0, "completions/min_terminated_length": 167.0, "epoch": 10.755685131195335, "grad_norm": 0.15345510840415955, "learning_rate": 1e-06, "loss": -0.0424, "num_tokens": 676390605.0, "reward": 0.6183035969734192, "reward_std": 0.13902321457862854, "rewards/verify_math_reward/mean": 0.6183035969734192, "rewards/verify_math_reward/std": 0.4860740303993225, "step": 1151 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1573660714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3995.0, "completions/mean_length": 1161.8660888671875, "completions/mean_terminated_length": 613.9019775390625, "completions/min_length": 161.0, "completions/min_terminated_length": 161.0, "epoch": 10.765014577259475, "grad_norm": 0.14456294476985931, "learning_rate": 1e-06, "loss": -0.0509, "num_tokens": 676948021.0, "reward": 0.6383928656578064, "reward_std": 0.11062336713075638, "rewards/verify_math_reward/mean": 0.6383928656578064, "rewards/verify_math_reward/std": 0.4807341694831848, "step": 1152 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1160714285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3539.0, "completions/mean_length": 989.404052734375, "completions/mean_terminated_length": 581.4671630859375, "completions/min_length": 161.0, "completions/min_terminated_length": 161.0, "epoch": 10.774344023323614, "grad_norm": 0.13233256340026855, "learning_rate": 1e-06, "loss": -0.0837, "num_tokens": 677499959.0, "reward": 0.6964285969734192, "reward_std": 0.12636421620845795, "rewards/verify_math_reward/mean": 0.6964285969734192, "rewards/verify_math_reward/std": 0.4600565731525421, "step": 1153 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1116071428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3740.0, "completions/mean_length": 1012.05029296875, "completions/mean_terminated_length": 624.6193237304688, "completions/min_length": 176.0, "completions/min_terminated_length": 176.0, "epoch": 10.783673469387756, "grad_norm": 0.16096143424510956, "learning_rate": 1e-06, "loss": -0.0545, "num_tokens": 678085556.0, "reward": 0.707589328289032, "reward_std": 0.1534174531698227, "rewards/verify_math_reward/mean": 0.7075892686843872, "rewards/verify_math_reward/std": 0.45512402057647705, "step": 1154 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1026785714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3837.0, "completions/mean_length": 961.6808471679688, "completions/mean_terminated_length": 603.02734375, "completions/min_length": 177.0, "completions/min_terminated_length": 177.0, "epoch": 10.793002915451895, "grad_norm": 0.14721722900867462, "learning_rate": 1e-06, "loss": -0.0535, "num_tokens": 678657694.0, "reward": 0.6383928656578064, "reward_std": 0.13729682564735413, "rewards/verify_math_reward/mean": 0.6383928656578064, "rewards/verify_math_reward/std": 0.4807341992855072, "step": 1155 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1383928571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3112.0, "completions/mean_length": 1073.1953125, "completions/mean_terminated_length": 587.6670532226562, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 10.802332361516035, "grad_norm": 0.15671247243881226, "learning_rate": 1e-06, "loss": -0.0454, "num_tokens": 679202237.0, "reward": 0.645089328289032, "reward_std": 0.1281326860189438, "rewards/verify_math_reward/mean": 0.6450892686843872, "rewards/verify_math_reward/std": 0.4787535071372986, "step": 1156 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1294642857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3751.0, "completions/mean_length": 1038.360595703125, "completions/mean_terminated_length": 583.6346435546875, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 10.811661807580174, "grad_norm": 0.18394167721271515, "learning_rate": 1e-06, "loss": -0.102, "num_tokens": 679755992.0, "reward": 0.6774553656578064, "reward_std": 0.15533748269081116, "rewards/verify_math_reward/mean": 0.6774553656578064, "rewards/verify_math_reward/std": 0.4677111804485321, "step": 1157 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1339285714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2841.0, "completions/mean_length": 1051.384033203125, "completions/mean_terminated_length": 580.5670166015625, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 10.820991253644316, "grad_norm": 0.16373591125011444, "learning_rate": 1e-06, "loss": -0.044, "num_tokens": 680296296.0, "reward": 0.6796875596046448, "reward_std": 0.13203756511211395, "rewards/verify_math_reward/mean": 0.6796875, "rewards/verify_math_reward/std": 0.4668572247028351, "step": 1158 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0680803571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3278.0, "completions/mean_length": 790.7199096679688, "completions/mean_terminated_length": 549.2562866210938, "completions/min_length": 114.0, "completions/min_terminated_length": 114.0, "epoch": 10.830320699708455, "grad_norm": 0.15067870914936066, "learning_rate": 1e-06, "loss": -0.0323, "num_tokens": 680840077.0, "reward": 0.7812500596046448, "reward_std": 0.10919371992349625, "rewards/verify_math_reward/mean": 0.78125, "rewards/verify_math_reward/std": 0.41362953186035156, "step": 1159 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1450892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3370.0, "completions/mean_length": 1111.4364013671875, "completions/mean_terminated_length": 604.9177856445312, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 10.839650145772595, "grad_norm": 0.1591438353061676, "learning_rate": 1e-06, "loss": -0.1107, "num_tokens": 681397252.0, "reward": 0.6651785969734192, "reward_std": 0.16288693249225616, "rewards/verify_math_reward/mean": 0.6651785969734192, "rewards/verify_math_reward/std": 0.47219157218933105, "step": 1160 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1127232142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4014.0, "completions/mean_length": 967.5558471679688, "completions/mean_terminated_length": 570.1056518554688, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 10.848979591836734, "grad_norm": 0.17167727649211884, "learning_rate": 1e-06, "loss": -0.0579, "num_tokens": 681932534.0, "reward": 0.715401828289032, "reward_std": 0.16150008141994476, "rewards/verify_math_reward/mean": 0.7154017686843872, "rewards/verify_math_reward/std": 0.4514748752117157, "step": 1161 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0904017857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3729.0, "completions/mean_length": 932.87841796875, "completions/mean_terminated_length": 618.5067749023438, "completions/min_length": 196.0, "completions/min_terminated_length": 196.0, "epoch": 10.858309037900874, "grad_norm": 0.15950097143650055, "learning_rate": 1e-06, "loss": -0.0563, "num_tokens": 682524241.0, "reward": 0.6941964626312256, "reward_std": 0.15431900322437286, "rewards/verify_math_reward/mean": 0.6941964030265808, "rewards/verify_math_reward/std": 0.4610042870044708, "step": 1162 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0714285714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3398.0, "completions/mean_length": 822.8873291015625, "completions/mean_terminated_length": 571.109375, "completions/min_length": 110.0, "completions/min_terminated_length": 110.0, "epoch": 10.867638483965015, "grad_norm": 0.1505705863237381, "learning_rate": 1e-06, "loss": -0.037, "num_tokens": 683089084.0, "reward": 0.7265625596046448, "reward_std": 0.14496758580207825, "rewards/verify_math_reward/mean": 0.7265625, "rewards/verify_math_reward/std": 0.4459724426269531, "step": 1163 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1372767857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4044.0, "completions/mean_length": 1105.24560546875, "completions/mean_terminated_length": 629.3557739257812, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 10.876967930029155, "grad_norm": 0.1730506718158722, "learning_rate": 1e-06, "loss": -0.032, "num_tokens": 683657912.0, "reward": 0.645089328289032, "reward_std": 0.12839369475841522, "rewards/verify_math_reward/mean": 0.6450892686843872, "rewards/verify_math_reward/std": 0.4787535071372986, "step": 1164 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1328125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3497.0, "completions/mean_length": 1048.118408203125, "completions/mean_terminated_length": 581.3256225585938, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 10.886297376093294, "grad_norm": 0.15575364232063293, "learning_rate": 1e-06, "loss": -0.0605, "num_tokens": 684194818.0, "reward": 0.7053571939468384, "reward_std": 0.12790516018867493, "rewards/verify_math_reward/mean": 0.7053571343421936, "rewards/verify_math_reward/std": 0.45613667368888855, "step": 1165 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1060267857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3066.0, "completions/mean_length": 960.5100708007812, "completions/mean_terminated_length": 588.6354370117188, "completions/min_length": 170.0, "completions/min_terminated_length": 170.0, "epoch": 10.895626822157434, "grad_norm": 0.152786985039711, "learning_rate": 1e-06, "loss": -0.0338, "num_tokens": 684754787.0, "reward": 0.7433035969734192, "reward_std": 0.11073465645313263, "rewards/verify_math_reward/mean": 0.7433035969734192, "rewards/verify_math_reward/std": 0.43705442547798157, "step": 1166 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0982142857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2268.0, "completions/mean_length": 920.9933471679688, "completions/mean_terminated_length": 575.2005004882812, "completions/min_length": 164.0, "completions/min_terminated_length": 164.0, "epoch": 10.904956268221575, "grad_norm": 0.16257745027542114, "learning_rate": 1e-06, "loss": -0.0529, "num_tokens": 685307397.0, "reward": 0.7399553656578064, "reward_std": 0.14091001451015472, "rewards/verify_math_reward/mean": 0.7399553656578064, "rewards/verify_math_reward/std": 0.43890365958213806, "step": 1167 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1149553571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 2327.0, "completions/mean_length": 924.6094360351562, "completions/mean_terminated_length": 512.6885375976562, "completions/min_length": 169.0, "completions/min_terminated_length": 169.0, "epoch": 10.914285714285715, "grad_norm": 0.16915538907051086, "learning_rate": 1e-06, "loss": -0.0461, "num_tokens": 685805711.0, "reward": 0.7254464626312256, "reward_std": 0.12343572080135345, "rewards/verify_math_reward/mean": 0.7254464030265808, "rewards/verify_math_reward/std": 0.4465382993221283, "step": 1168 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1484375, "completions/max_length": 4096.0, "completions/max_terminated_length": 2432.0, "completions/mean_length": 1178.3638916015625, "completions/mean_terminated_length": 669.7850952148438, "completions/min_length": 159.0, "completions/min_terminated_length": 159.0, "epoch": 10.923615160349854, "grad_norm": 0.17527371644973755, "learning_rate": 1e-06, "loss": -0.0376, "num_tokens": 686402853.0, "reward": 0.6026785969734192, "reward_std": 0.17652417719364166, "rewards/verify_math_reward/mean": 0.6026785969734192, "rewards/verify_math_reward/std": 0.48961687088012695, "step": 1169 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1216517857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3981.0, "completions/mean_length": 988.1629638671875, "completions/mean_terminated_length": 557.7255249023438, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 10.932944606413994, "grad_norm": 0.1803063005208969, "learning_rate": 1e-06, "loss": -0.0959, "num_tokens": 686933031.0, "reward": 0.6741071939468384, "reward_std": 0.17908315360546112, "rewards/verify_math_reward/mean": 0.6741071343421936, "rewards/verify_math_reward/std": 0.4689692556858063, "step": 1170 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1138392857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3868.0, "completions/mean_length": 1003.6473388671875, "completions/mean_terminated_length": 606.3929443359375, "completions/min_length": 100.0, "completions/min_terminated_length": 100.0, "epoch": 10.942274052478133, "grad_norm": 0.16941961646080017, "learning_rate": 1e-06, "loss": -0.0566, "num_tokens": 687510931.0, "reward": 0.609375, "reward_std": 0.16378848254680634, "rewards/verify_math_reward/mean": 0.609375, "rewards/verify_math_reward/std": 0.48816296458244324, "step": 1171 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1517857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3778.0, "completions/mean_length": 1172.798095703125, "completions/mean_terminated_length": 649.69873046875, "completions/min_length": 183.0, "completions/min_terminated_length": 183.0, "epoch": 10.951603498542275, "grad_norm": 0.15194791555404663, "learning_rate": 1e-06, "loss": -0.0693, "num_tokens": 688095086.0, "reward": 0.606026828289032, "reward_std": 0.1280987709760666, "rewards/verify_math_reward/mean": 0.6060267686843872, "rewards/verify_math_reward/std": 0.48890194296836853, "step": 1172 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3887.0, "completions/mean_length": 1112.8226318359375, "completions/mean_terminated_length": 686.654296875, "completions/min_length": 181.0, "completions/min_terminated_length": 181.0, "epoch": 10.960932944606414, "grad_norm": 0.14911498129367828, "learning_rate": 1e-06, "loss": -0.0518, "num_tokens": 688714951.0, "reward": 0.6272321939468384, "reward_std": 0.17217238247394562, "rewards/verify_math_reward/mean": 0.6272321343421936, "rewards/verify_math_reward/std": 0.4838111698627472, "step": 1173 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1049107142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3428.0, "completions/mean_length": 993.4163208007812, "completions/mean_terminated_length": 629.7718505859375, "completions/min_length": 159.0, "completions/min_terminated_length": 159.0, "epoch": 10.970262390670554, "grad_norm": 0.1688496470451355, "learning_rate": 1e-06, "loss": -0.0539, "num_tokens": 689305444.0, "reward": 0.6629464626312256, "reward_std": 0.1675853133201599, "rewards/verify_math_reward/mean": 0.6629464030265808, "rewards/verify_math_reward/std": 0.47296738624572754, "step": 1174 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0814732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3238.0, "completions/mean_length": 907.755615234375, "completions/mean_terminated_length": 624.958740234375, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 10.979591836734693, "grad_norm": 0.1482262909412384, "learning_rate": 1e-06, "loss": -0.0588, "num_tokens": 689912785.0, "reward": 0.7220982313156128, "reward_std": 0.14905862510204315, "rewards/verify_math_reward/mean": 0.7220982313156128, "rewards/verify_math_reward/std": 0.44821488857269287, "step": 1175 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0948660714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3820.0, "completions/mean_length": 927.6172485351562, "completions/mean_terminated_length": 595.5425415039062, "completions/min_length": 179.0, "completions/min_terminated_length": 179.0, "epoch": 10.988921282798835, "grad_norm": 0.17880500853061676, "learning_rate": 1e-06, "loss": -0.0766, "num_tokens": 690485650.0, "reward": 0.7165178656578064, "reward_std": 0.1725853532552719, "rewards/verify_math_reward/mean": 0.7165178656578064, "rewards/verify_math_reward/std": 0.4509401023387909, "step": 1176 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09659090909090906, "completions/max_length": 4096.0, "completions/max_terminated_length": 3607.0, "completions/mean_length": 1018.5540161132812, "completions/mean_terminated_length": 689.5188598632812, "completions/min_length": 166.0, "completions/min_terminated_length": 166.0, "epoch": 10.998250728862974, "grad_norm": 0.16327017545700073, "learning_rate": 1e-06, "loss": -0.0616, "num_tokens": 691083697.0, "reward": 0.6283482313156128, "reward_std": 0.13665924966335297, "rewards/verify_math_reward/mean": 0.6283482313156128, "rewards/verify_math_reward/std": 0.4835159480571747, "step": 1177 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1116071428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 2466.0, "completions/mean_length": 1012.38623046875, "completions/mean_terminated_length": 624.9974975585938, "completions/min_length": 202.0, "completions/min_terminated_length": 202.0, "epoch": 11.00932944606414, "grad_norm": 0.15496833622455597, "learning_rate": 1e-06, "loss": -0.0673, "num_tokens": 691674891.0, "reward": 0.7031250596046448, "reward_std": 0.13673663139343262, "rewards/verify_math_reward/mean": 0.703125, "rewards/verify_math_reward/std": 0.4571361541748047, "step": 1178 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1439732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3647.0, "completions/mean_length": 1168.1015625, "completions/mean_terminated_length": 675.6649169921875, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 11.018658892128279, "grad_norm": 0.14826743304729462, "learning_rate": 1e-06, "loss": -0.0581, "num_tokens": 692284598.0, "reward": 0.6316964626312256, "reward_std": 0.15165013074874878, "rewards/verify_math_reward/mean": 0.6316964030265808, "rewards/verify_math_reward/std": 0.4826137125492096, "step": 1179 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1350446428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3563.0, "completions/mean_length": 1042.72998046875, "completions/mean_terminated_length": 566.0258178710938, "completions/min_length": 160.0, "completions/min_terminated_length": 160.0, "epoch": 11.02798833819242, "grad_norm": 0.18478870391845703, "learning_rate": 1e-06, "loss": -0.0947, "num_tokens": 692804636.0, "reward": 0.6640625, "reward_std": 0.1838906854391098, "rewards/verify_math_reward/mean": 0.6640625, "rewards/verify_math_reward/std": 0.4725809693336487, "step": 1180 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1272321428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3737.0, "completions/mean_length": 1021.7288208007812, "completions/mean_terminated_length": 573.5614013671875, "completions/min_length": 176.0, "completions/min_terminated_length": 176.0, "epoch": 11.03731778425656, "grad_norm": 0.1643456667661667, "learning_rate": 1e-06, "loss": -0.0697, "num_tokens": 693339313.0, "reward": 0.6819196939468384, "reward_std": 0.1612725704908371, "rewards/verify_math_reward/mean": 0.6819196343421936, "rewards/verify_math_reward/std": 0.46599099040031433, "step": 1181 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1138392857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3494.0, "completions/mean_length": 1022.122802734375, "completions/mean_terminated_length": 627.2417602539062, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 11.0466472303207, "grad_norm": 0.15247948467731476, "learning_rate": 1e-06, "loss": -0.0473, "num_tokens": 693923807.0, "reward": 0.6696428656578064, "reward_std": 0.15157341957092285, "rewards/verify_math_reward/mean": 0.6696428656578064, "rewards/verify_math_reward/std": 0.47060438990592957, "step": 1182 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1261160714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3656.0, "completions/mean_length": 1027.6607666015625, "completions/mean_terminated_length": 584.8480224609375, "completions/min_length": 170.0, "completions/min_terminated_length": 170.0, "epoch": 11.055976676384839, "grad_norm": 0.1399637758731842, "learning_rate": 1e-06, "loss": -0.0477, "num_tokens": 694471127.0, "reward": 0.6975446939468384, "reward_std": 0.11753343045711517, "rewards/verify_math_reward/mean": 0.6975446343421936, "rewards/verify_math_reward/std": 0.45957788825035095, "step": 1183 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1272321428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 2712.0, "completions/mean_length": 1075.77685546875, "completions/mean_terminated_length": 635.4884643554688, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 11.06530612244898, "grad_norm": 0.16120658814907074, "learning_rate": 1e-06, "loss": -0.0507, "num_tokens": 695064015.0, "reward": 0.6595982313156128, "reward_std": 0.1583426594734192, "rewards/verify_math_reward/mean": 0.6595982313156128, "rewards/verify_math_reward/std": 0.4741089344024658, "step": 1184 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1462053571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 2909.0, "completions/mean_length": 1113.318115234375, "completions/mean_terminated_length": 602.5581665039062, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 11.07463556851312, "grad_norm": 0.17223554849624634, "learning_rate": 1e-06, "loss": -0.1264, "num_tokens": 695614524.0, "reward": 0.6428571939468384, "reward_std": 0.16660960018634796, "rewards/verify_math_reward/mean": 0.6428571343421936, "rewards/verify_math_reward/std": 0.4794250428676605, "step": 1185 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1618303571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3482.0, "completions/mean_length": 1174.4241943359375, "completions/mean_terminated_length": 610.3381958007812, "completions/min_length": 115.0, "completions/min_terminated_length": 115.0, "epoch": 11.08396501457726, "grad_norm": 0.15575699508190155, "learning_rate": 1e-06, "loss": -0.0596, "num_tokens": 696167816.0, "reward": 0.6194196939468384, "reward_std": 0.11054850369691849, "rewards/verify_math_reward/mean": 0.6194196343421936, "rewards/verify_math_reward/std": 0.48580074310302734, "step": 1186 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1183035714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2536.0, "completions/mean_length": 978.62841796875, "completions/mean_terminated_length": 560.3480834960938, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 11.093294460641399, "grad_norm": 0.1487502008676529, "learning_rate": 1e-06, "loss": -0.0602, "num_tokens": 696703563.0, "reward": 0.684151828289032, "reward_std": 0.13256961107254028, "rewards/verify_math_reward/mean": 0.6841517686843872, "rewards/verify_math_reward/std": 0.4651124179363251, "step": 1187 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1261160714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3119.0, "completions/mean_length": 1066.2098388671875, "completions/mean_terminated_length": 628.9603881835938, "completions/min_length": 97.0, "completions/min_terminated_length": 97.0, "epoch": 11.102623906705539, "grad_norm": 0.1588362604379654, "learning_rate": 1e-06, "loss": -0.0894, "num_tokens": 697296047.0, "reward": 0.6395089626312256, "reward_std": 0.1698404997587204, "rewards/verify_math_reward/mean": 0.6395089030265808, "rewards/verify_math_reward/std": 0.4804111123085022, "step": 1188 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1149553571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 4086.0, "completions/mean_length": 950.0469360351562, "completions/mean_terminated_length": 541.4299926757812, "completions/min_length": 171.0, "completions/min_terminated_length": 171.0, "epoch": 11.11195335276968, "grad_norm": 0.15742437541484833, "learning_rate": 1e-06, "loss": -0.0566, "num_tokens": 697800745.0, "reward": 0.7053571939468384, "reward_std": 0.11498401314020157, "rewards/verify_math_reward/mean": 0.7053571343421936, "rewards/verify_math_reward/std": 0.45613664388656616, "step": 1189 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1997767857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3760.0, "completions/mean_length": 1293.1741943359375, "completions/mean_terminated_length": 593.4448852539062, "completions/min_length": 123.0, "completions/min_terminated_length": 123.0, "epoch": 11.12128279883382, "grad_norm": 0.14927829802036285, "learning_rate": 1e-06, "loss": -0.0831, "num_tokens": 698321021.0, "reward": 0.640625, "reward_std": 0.11742536723613739, "rewards/verify_math_reward/mean": 0.640625, "rewards/verify_math_reward/std": 0.48008525371551514, "step": 1190 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3805.0, "completions/mean_length": 994.3973388671875, "completions/mean_terminated_length": 551.3112182617188, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 11.130612244897959, "grad_norm": 0.12305353581905365, "learning_rate": 1e-06, "loss": -0.0703, "num_tokens": 698830321.0, "reward": 0.6975446939468384, "reward_std": 0.0839071124792099, "rewards/verify_math_reward/mean": 0.6975446343421936, "rewards/verify_math_reward/std": 0.45957788825035095, "step": 1191 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1662946428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 2690.0, "completions/mean_length": 1222.7801513671875, "completions/mean_terminated_length": 649.6746826171875, "completions/min_length": 122.0, "completions/min_terminated_length": 122.0, "epoch": 11.139941690962099, "grad_norm": 0.1583167016506195, "learning_rate": 1e-06, "loss": -0.052, "num_tokens": 699418660.0, "reward": 0.5558035969734192, "reward_std": 0.13658326864242554, "rewards/verify_math_reward/mean": 0.5558035969734192, "rewards/verify_math_reward/std": 0.49715372920036316, "step": 1192 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1305803571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 2730.0, "completions/mean_length": 1045.46875, "completions/mean_terminated_length": 587.3016967773438, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 11.14927113702624, "grad_norm": 0.1539103090763092, "learning_rate": 1e-06, "loss": -0.0591, "num_tokens": 699960192.0, "reward": 0.6852678656578064, "reward_std": 0.13534656167030334, "rewards/verify_math_reward/mean": 0.6852678656578064, "rewards/verify_math_reward/std": 0.46466848254203796, "step": 1193 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1283482142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3318.0, "completions/mean_length": 1040.3426513671875, "completions/mean_terminated_length": 590.4058837890625, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 11.15860058309038, "grad_norm": 0.26502928137779236, "learning_rate": 1e-06, "loss": -0.0305, "num_tokens": 700505795.0, "reward": 0.7332589626312256, "reward_std": 0.12084423005580902, "rewards/verify_math_reward/mean": 0.7332589030265808, "rewards/verify_math_reward/std": 0.4425029158592224, "step": 1194 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1450892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3942.0, "completions/mean_length": 1070.2757568359375, "completions/mean_terminated_length": 556.7715454101562, "completions/min_length": 104.0, "completions/min_terminated_length": 104.0, "epoch": 11.167930029154519, "grad_norm": 0.14310236275196075, "learning_rate": 1e-06, "loss": -0.0797, "num_tokens": 701034250.0, "reward": 0.6629464626312256, "reward_std": 0.12467243522405624, "rewards/verify_math_reward/mean": 0.6629464030265808, "rewards/verify_math_reward/std": 0.47296738624572754, "step": 1195 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1674107142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3851.0, "completions/mean_length": 1199.85498046875, "completions/mean_terminated_length": 617.5201416015625, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 11.177259475218658, "grad_norm": 0.16276240348815918, "learning_rate": 1e-06, "loss": -0.1077, "num_tokens": 701573664.0, "reward": 0.6796875596046448, "reward_std": 0.16055506467819214, "rewards/verify_math_reward/mean": 0.6796875, "rewards/verify_math_reward/std": 0.4668572247028351, "step": 1196 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1071428571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3972.0, "completions/mean_length": 922.1563110351562, "completions/mean_terminated_length": 541.2949829101562, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 11.186588921282798, "grad_norm": 0.1686992347240448, "learning_rate": 1e-06, "loss": -0.0853, "num_tokens": 702093356.0, "reward": 0.7042410969734192, "reward_std": 0.1265924572944641, "rewards/verify_math_reward/mean": 0.7042410969734192, "rewards/verify_math_reward/std": 0.45663803815841675, "step": 1197 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3806.0, "completions/mean_length": 1037.734375, "completions/mean_terminated_length": 600.8392944335938, "completions/min_length": 96.0, "completions/min_terminated_length": 96.0, "epoch": 11.19591836734694, "grad_norm": 0.13966944813728333, "learning_rate": 1e-06, "loss": -0.054, "num_tokens": 702648830.0, "reward": 0.6886160969734192, "reward_std": 0.13940481841564178, "rewards/verify_math_reward/mean": 0.6886160969734192, "rewards/verify_math_reward/std": 0.46331802010536194, "step": 1198 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1573660714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2947.0, "completions/mean_length": 1110.55810546875, "completions/mean_terminated_length": 553.0119018554688, "completions/min_length": 168.0, "completions/min_terminated_length": 168.0, "epoch": 11.205247813411079, "grad_norm": 0.14603488147258759, "learning_rate": 1e-06, "loss": -0.0722, "num_tokens": 703162066.0, "reward": 0.6551339626312256, "reward_std": 0.11460494995117188, "rewards/verify_math_reward/mean": 0.6551339030265808, "rewards/verify_math_reward/std": 0.4755900800228119, "step": 1199 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1183035714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2967.0, "completions/mean_length": 1005.5547485351562, "completions/mean_terminated_length": 590.8873291015625, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 11.214577259475218, "grad_norm": 0.3345600962638855, "learning_rate": 1e-06, "loss": -0.0945, "num_tokens": 703724075.0, "reward": 0.6908482313156128, "reward_std": 0.1744275689125061, "rewards/verify_math_reward/mean": 0.6908482313156128, "rewards/verify_math_reward/std": 0.46240198612213135, "step": 1200 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1339285714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 4008.0, "completions/mean_length": 1078.5648193359375, "completions/mean_terminated_length": 611.9509887695312, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 11.223906705539358, "grad_norm": 0.2436329424381256, "learning_rate": 1e-06, "loss": -0.0624, "num_tokens": 704281645.0, "reward": 0.6551339626312256, "reward_std": 0.1331976056098938, "rewards/verify_math_reward/mean": 0.6551339030265808, "rewards/verify_math_reward/std": 0.4755900502204895, "step": 1201 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1238839285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2771.0, "completions/mean_length": 1040.2578125, "completions/mean_terminated_length": 608.1719970703125, "completions/min_length": 121.0, "completions/min_terminated_length": 121.0, "epoch": 11.2332361516035, "grad_norm": 0.14937366545200348, "learning_rate": 1e-06, "loss": -0.0556, "num_tokens": 704846148.0, "reward": 0.7098214626312256, "reward_std": 0.09528662264347076, "rewards/verify_math_reward/mean": 0.7098214030265808, "rewards/verify_math_reward/std": 0.454098105430603, "step": 1202 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1383928571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3325.0, "completions/mean_length": 1042.2545166015625, "completions/mean_terminated_length": 551.7564697265625, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 11.242565597667639, "grad_norm": 0.14984160661697388, "learning_rate": 1e-06, "loss": -0.0645, "num_tokens": 705358536.0, "reward": 0.7332589626312256, "reward_std": 0.11201275140047073, "rewards/verify_math_reward/mean": 0.7332589030265808, "rewards/verify_math_reward/std": 0.4425029158592224, "step": 1203 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1116071428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3981.0, "completions/mean_length": 975.1629638671875, "completions/mean_terminated_length": 583.0979614257812, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 11.251895043731778, "grad_norm": 0.15446537733078003, "learning_rate": 1e-06, "loss": -0.0677, "num_tokens": 705915834.0, "reward": 0.738839328289032, "reward_std": 0.1284346580505371, "rewards/verify_math_reward/mean": 0.7388392686843872, "rewards/verify_math_reward/std": 0.439512699842453, "step": 1204 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1305803571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 4024.0, "completions/mean_length": 1101.9832763671875, "completions/mean_terminated_length": 652.3042602539062, "completions/min_length": 165.0, "completions/min_terminated_length": 165.0, "epoch": 11.261224489795918, "grad_norm": 0.177810400724411, "learning_rate": 1e-06, "loss": -0.0713, "num_tokens": 706533539.0, "reward": 0.6082589626312256, "reward_std": 0.14481674134731293, "rewards/verify_math_reward/mean": 0.6082589030265808, "rewards/verify_math_reward/std": 0.4884119927883148, "step": 1205 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1026785714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2734.0, "completions/mean_length": 907.114990234375, "completions/mean_terminated_length": 542.2176513671875, "completions/min_length": 160.0, "completions/min_terminated_length": 160.0, "epoch": 11.270553935860057, "grad_norm": 0.17740251123905182, "learning_rate": 1e-06, "loss": -0.0433, "num_tokens": 707060418.0, "reward": 0.684151828289032, "reward_std": 0.1470380276441574, "rewards/verify_math_reward/mean": 0.6841517686843872, "rewards/verify_math_reward/std": 0.4651124179363251, "step": 1206 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1238839285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3246.0, "completions/mean_length": 1018.427490234375, "completions/mean_terminated_length": 583.2547607421875, "completions/min_length": 110.0, "completions/min_terminated_length": 110.0, "epoch": 11.279883381924199, "grad_norm": 0.161585733294487, "learning_rate": 1e-06, "loss": -0.0692, "num_tokens": 707604401.0, "reward": 0.6852678656578064, "reward_std": 0.16239726543426514, "rewards/verify_math_reward/mean": 0.6852678656578064, "rewards/verify_math_reward/std": 0.46466848254203796, "step": 1207 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1149553571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3945.0, "completions/mean_length": 972.3750610351562, "completions/mean_terminated_length": 566.6582641601562, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 11.289212827988338, "grad_norm": 0.14691539108753204, "learning_rate": 1e-06, "loss": -0.0569, "num_tokens": 708146729.0, "reward": 0.668526828289032, "reward_std": 0.12181740999221802, "rewards/verify_math_reward/mean": 0.6685267686843872, "rewards/verify_math_reward/std": 0.4710056781768799, "step": 1208 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1037946428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 2608.0, "completions/mean_length": 923.67529296875, "completions/mean_terminated_length": 556.2702026367188, "completions/min_length": 100.0, "completions/min_terminated_length": 100.0, "epoch": 11.298542274052478, "grad_norm": 0.15742585062980652, "learning_rate": 1e-06, "loss": -0.0696, "num_tokens": 708681102.0, "reward": 0.691964328289032, "reward_std": 0.12043306231498718, "rewards/verify_math_reward/mean": 0.6919642686843872, "rewards/verify_math_reward/std": 0.4619392454624176, "step": 1209 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1004464285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3961.0, "completions/mean_length": 906.2366333007812, "completions/mean_terminated_length": 550.0595703125, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 11.307871720116617, "grad_norm": 0.16292087733745575, "learning_rate": 1e-06, "loss": -0.049, "num_tokens": 709203706.0, "reward": 0.7421875596046448, "reward_std": 0.10990910232067108, "rewards/verify_math_reward/mean": 0.7421875, "rewards/verify_math_reward/std": 0.43767455220222473, "step": 1210 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1841517857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2962.0, "completions/mean_length": 1280.727783203125, "completions/mean_terminated_length": 645.26953125, "completions/min_length": 137.0, "completions/min_terminated_length": 137.0, "epoch": 11.317201166180759, "grad_norm": 0.1581532210111618, "learning_rate": 1e-06, "loss": -0.0836, "num_tokens": 709775614.0, "reward": 0.629464328289032, "reward_std": 0.13444501161575317, "rewards/verify_math_reward/mean": 0.6294642686843872, "rewards/verify_math_reward/std": 0.4832179844379425, "step": 1211 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1283482142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3938.0, "completions/mean_length": 1014.2578735351562, "completions/mean_terminated_length": 560.4801635742188, "completions/min_length": 160.0, "completions/min_terminated_length": 160.0, "epoch": 11.326530612244898, "grad_norm": 0.15922218561172485, "learning_rate": 1e-06, "loss": -0.0763, "num_tokens": 710299853.0, "reward": 0.6830357313156128, "reward_std": 0.13616888225078583, "rewards/verify_math_reward/mean": 0.6830357313156128, "rewards/verify_math_reward/std": 0.46555325388908386, "step": 1212 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1071428571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 2643.0, "completions/mean_length": 932.7578735351562, "completions/mean_terminated_length": 553.1687622070312, "completions/min_length": 157.0, "completions/min_terminated_length": 157.0, "epoch": 11.335860058309038, "grad_norm": 0.1400115191936493, "learning_rate": 1e-06, "loss": -0.0186, "num_tokens": 710819716.0, "reward": 0.7243303656578064, "reward_std": 0.10092677175998688, "rewards/verify_math_reward/mean": 0.7243303656578064, "rewards/verify_math_reward/std": 0.4471006691455841, "step": 1213 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1450892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3967.0, "completions/mean_length": 1073.798095703125, "completions/mean_terminated_length": 560.8916625976562, "completions/min_length": 115.0, "completions/min_terminated_length": 115.0, "epoch": 11.345189504373177, "grad_norm": 0.17650899291038513, "learning_rate": 1e-06, "loss": -0.0366, "num_tokens": 711337575.0, "reward": 0.6462053656578064, "reward_std": 0.12009353935718536, "rewards/verify_math_reward/mean": 0.6462053656578064, "rewards/verify_math_reward/std": 0.478413462638855, "step": 1214 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1216517857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2247.0, "completions/mean_length": 1052.0670166015625, "completions/mean_terminated_length": 630.4802856445312, "completions/min_length": 182.0, "completions/min_terminated_length": 182.0, "epoch": 11.354518950437317, "grad_norm": 0.15283583104610443, "learning_rate": 1e-06, "loss": -0.0726, "num_tokens": 711928219.0, "reward": 0.7053571939468384, "reward_std": 0.1345216929912567, "rewards/verify_math_reward/mean": 0.7053571343421936, "rewards/verify_math_reward/std": 0.45613667368888855, "step": 1215 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1395089285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2247.0, "completions/mean_length": 1090.2254638671875, "completions/mean_terminated_length": 602.9078979492188, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 11.363848396501458, "grad_norm": 0.15655484795570374, "learning_rate": 1e-06, "loss": -0.0574, "num_tokens": 712474885.0, "reward": 0.6573660969734192, "reward_std": 0.11960610747337341, "rewards/verify_math_reward/mean": 0.6573660969734192, "rewards/verify_math_reward/std": 0.47485536336898804, "step": 1216 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 4096.0, "completions/max_terminated_length": 4002.0, "completions/mean_length": 1033.4888916015625, "completions/mean_terminated_length": 595.9872436523438, "completions/min_length": 159.0, "completions/min_terminated_length": 159.0, "epoch": 11.373177842565598, "grad_norm": 0.1904921531677246, "learning_rate": 1e-06, "loss": -0.0602, "num_tokens": 713042387.0, "reward": 0.6830357313156128, "reward_std": 0.17964698374271393, "rewards/verify_math_reward/mean": 0.6830357313156128, "rewards/verify_math_reward/std": 0.46555325388908386, "step": 1217 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.140625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2593.0, "completions/mean_length": 1093.075927734375, "completions/mean_terminated_length": 601.6882934570312, "completions/min_length": 160.0, "completions/min_terminated_length": 160.0, "epoch": 11.382507288629737, "grad_norm": 0.14345112442970276, "learning_rate": 1e-06, "loss": -0.0723, "num_tokens": 713600703.0, "reward": 0.7131696939468384, "reward_std": 0.11283759027719498, "rewards/verify_math_reward/mean": 0.7131696343421936, "rewards/verify_math_reward/std": 0.4525342881679535, "step": 1218 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.109375, "completions/max_length": 4096.0, "completions/max_terminated_length": 4055.0, "completions/mean_length": 959.4654541015625, "completions/mean_terminated_length": 574.2769165039062, "completions/min_length": 153.0, "completions/min_terminated_length": 153.0, "epoch": 11.391836734693877, "grad_norm": 0.164277583360672, "learning_rate": 1e-06, "loss": -0.0468, "num_tokens": 714149000.0, "reward": 0.6863839626312256, "reward_std": 0.14120283722877502, "rewards/verify_math_reward/mean": 0.6863839030265808, "rewards/verify_math_reward/std": 0.46422141790390015, "step": 1219 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1060267857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3164.0, "completions/mean_length": 966.4576416015625, "completions/mean_terminated_length": 595.2883911132812, "completions/min_length": 153.0, "completions/min_terminated_length": 153.0, "epoch": 11.401166180758018, "grad_norm": 0.19730933010578156, "learning_rate": 1e-06, "loss": -0.0436, "num_tokens": 714713730.0, "reward": 0.6975446939468384, "reward_std": 0.14147524535655975, "rewards/verify_math_reward/mean": 0.6975446343421936, "rewards/verify_math_reward/std": 0.45957788825035095, "step": 1220 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1506696428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3959.0, "completions/mean_length": 1147.0670166015625, "completions/mean_terminated_length": 623.931640625, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 11.410495626822158, "grad_norm": 0.1630871444940567, "learning_rate": 1e-06, "loss": -0.0694, "num_tokens": 715270942.0, "reward": 0.6350446939468384, "reward_std": 0.14582450687885284, "rewards/verify_math_reward/mean": 0.6350446343421936, "rewards/verify_math_reward/std": 0.481686532497406, "step": 1221 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1305803571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3800.0, "completions/mean_length": 1077.9442138671875, "completions/mean_terminated_length": 624.6547241210938, "completions/min_length": 156.0, "completions/min_terminated_length": 156.0, "epoch": 11.419825072886297, "grad_norm": 0.16397812962532043, "learning_rate": 1e-06, "loss": -0.0779, "num_tokens": 715844940.0, "reward": 0.7142857313156128, "reward_std": 0.14751701056957245, "rewards/verify_math_reward/mean": 0.7142857313156128, "rewards/verify_math_reward/std": 0.4520062506198883, "step": 1222 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1183035714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 4056.0, "completions/mean_length": 1096.1060791015625, "completions/mean_terminated_length": 693.588623046875, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 11.429154518950437, "grad_norm": 0.1463952511548996, "learning_rate": 1e-06, "loss": -0.0609, "num_tokens": 716472979.0, "reward": 0.6707589626312256, "reward_std": 0.1424841582775116, "rewards/verify_math_reward/mean": 0.6707589030265808, "rewards/verify_math_reward/std": 0.4702001214027405, "step": 1223 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1116071428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3269.0, "completions/mean_length": 945.107177734375, "completions/mean_terminated_length": 549.2662963867188, "completions/min_length": 99.0, "completions/min_terminated_length": 99.0, "epoch": 11.438483965014576, "grad_norm": 0.1774315983057022, "learning_rate": 1e-06, "loss": -0.0833, "num_tokens": 716994339.0, "reward": 0.723214328289032, "reward_std": 0.13444501161575317, "rewards/verify_math_reward/mean": 0.7232142686843872, "rewards/verify_math_reward/std": 0.44765952229499817, "step": 1224 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1741071428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 2899.0, "completions/mean_length": 1220.46435546875, "completions/mean_terminated_length": 614.270263671875, "completions/min_length": 157.0, "completions/min_terminated_length": 157.0, "epoch": 11.447813411078718, "grad_norm": 0.14416812360286713, "learning_rate": 1e-06, "loss": -0.052, "num_tokens": 717540859.0, "reward": 0.6361607313156128, "reward_std": 0.12058139592409134, "rewards/verify_math_reward/mean": 0.6361607313156128, "rewards/verify_math_reward/std": 0.4813718795776367, "step": 1225 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1350446428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3193.0, "completions/mean_length": 1058.6004638671875, "completions/mean_terminated_length": 584.3742065429688, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 11.457142857142857, "grad_norm": 0.1534576416015625, "learning_rate": 1e-06, "loss": -0.0851, "num_tokens": 718070165.0, "reward": 0.6863839626312256, "reward_std": 0.15503577888011932, "rewards/verify_math_reward/mean": 0.6863839030265808, "rewards/verify_math_reward/std": 0.46422144770622253, "step": 1226 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1227678571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3743.0, "completions/mean_length": 996.9063110351562, "completions/mean_terminated_length": 563.1908569335938, "completions/min_length": 167.0, "completions/min_terminated_length": 167.0, "epoch": 11.466472303206997, "grad_norm": 0.15613774955272675, "learning_rate": 1e-06, "loss": -0.0739, "num_tokens": 718594841.0, "reward": 0.6941964626312256, "reward_std": 0.10713215172290802, "rewards/verify_math_reward/mean": 0.6941964030265808, "rewards/verify_math_reward/std": 0.4610042870044708, "step": 1227 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1506696428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3383.0, "completions/mean_length": 1134.29248046875, "completions/mean_terminated_length": 608.8909301757812, "completions/min_length": 182.0, "completions/min_terminated_length": 182.0, "epoch": 11.475801749271136, "grad_norm": 0.18411079049110413, "learning_rate": 1e-06, "loss": -0.0962, "num_tokens": 719142807.0, "reward": 0.6729910969734192, "reward_std": 0.1811119168996811, "rewards/verify_math_reward/mean": 0.6729910969734192, "rewards/verify_math_reward/std": 0.46938255429267883, "step": 1228 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1618303571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3980.0, "completions/mean_length": 1106.0045166015625, "completions/mean_terminated_length": 528.7083740234375, "completions/min_length": 177.0, "completions/min_terminated_length": 177.0, "epoch": 11.485131195335278, "grad_norm": 0.17371106147766113, "learning_rate": 1e-06, "loss": -0.0519, "num_tokens": 719611939.0, "reward": 0.6774553656578064, "reward_std": 0.1385032683610916, "rewards/verify_math_reward/mean": 0.6774553656578064, "rewards/verify_math_reward/std": 0.4677111804485321, "step": 1229 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1205357142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3494.0, "completions/mean_length": 1053.766845703125, "completions/mean_terminated_length": 636.8109130859375, "completions/min_length": 174.0, "completions/min_terminated_length": 174.0, "epoch": 11.494460641399417, "grad_norm": 0.16804476082324982, "learning_rate": 1e-06, "loss": -0.0669, "num_tokens": 720205602.0, "reward": 0.6897321939468384, "reward_std": 0.18280190229415894, "rewards/verify_math_reward/mean": 0.6897321343421936, "rewards/verify_math_reward/std": 0.4628615975379944, "step": 1230 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1595982142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3170.0, "completions/mean_length": 1140.96435546875, "completions/mean_terminated_length": 579.7822265625, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 11.503790087463557, "grad_norm": 0.18616646528244019, "learning_rate": 1e-06, "loss": -0.0912, "num_tokens": 720735442.0, "reward": 0.6651785969734192, "reward_std": 0.1310625821352005, "rewards/verify_math_reward/mean": 0.6651785969734192, "rewards/verify_math_reward/std": 0.47219160199165344, "step": 1231 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0982142857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2625.0, "completions/mean_length": 938.8114013671875, "completions/mean_terminated_length": 594.9591674804688, "completions/min_length": 118.0, "completions/min_terminated_length": 118.0, "epoch": 11.513119533527696, "grad_norm": 0.17584441602230072, "learning_rate": 1e-06, "loss": -0.0665, "num_tokens": 721307465.0, "reward": 0.645089328289032, "reward_std": 0.146052747964859, "rewards/verify_math_reward/mean": 0.6450892686843872, "rewards/verify_math_reward/std": 0.4787535071372986, "step": 1232 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.140625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2501.0, "completions/mean_length": 1074.7020263671875, "completions/mean_terminated_length": 580.3078002929688, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 11.522448979591836, "grad_norm": 0.15414857864379883, "learning_rate": 1e-06, "loss": -0.084, "num_tokens": 721836742.0, "reward": 0.6908482313156128, "reward_std": 0.1335773915052414, "rewards/verify_math_reward/mean": 0.6908482313156128, "rewards/verify_math_reward/std": 0.46240198612213135, "step": 1233 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1149553571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3040.0, "completions/mean_length": 1039.44873046875, "completions/mean_terminated_length": 642.4439086914062, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 11.531778425655977, "grad_norm": 0.16479934751987457, "learning_rate": 1e-06, "loss": -0.097, "num_tokens": 722437680.0, "reward": 0.691964328289032, "reward_std": 0.17735788226127625, "rewards/verify_math_reward/mean": 0.6919642686843872, "rewards/verify_math_reward/std": 0.4619392454624176, "step": 1234 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.15625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2586.0, "completions/mean_length": 1177.0145263671875, "completions/mean_terminated_length": 636.4616088867188, "completions/min_length": 116.0, "completions/min_terminated_length": 116.0, "epoch": 11.541107871720117, "grad_norm": 0.15359684824943542, "learning_rate": 1e-06, "loss": -0.0975, "num_tokens": 723006973.0, "reward": 0.645089328289032, "reward_std": 0.13865482807159424, "rewards/verify_math_reward/mean": 0.6450892686843872, "rewards/verify_math_reward/std": 0.4787535071372986, "step": 1235 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1383928571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3926.0, "completions/mean_length": 1101.485595703125, "completions/mean_terminated_length": 620.5012817382812, "completions/min_length": 188.0, "completions/min_terminated_length": 188.0, "epoch": 11.550437317784256, "grad_norm": 0.16850945353507996, "learning_rate": 1e-06, "loss": -0.0861, "num_tokens": 723570232.0, "reward": 0.6819196939468384, "reward_std": 0.16469958424568176, "rewards/verify_math_reward/mean": 0.6819196343421936, "rewards/verify_math_reward/std": 0.46599099040031433, "step": 1236 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1584821428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3914.0, "completions/mean_length": 1160.8326416015625, "completions/mean_terminated_length": 608.0556640625, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 11.559766763848396, "grad_norm": 0.1628613919019699, "learning_rate": 1e-06, "loss": -0.0777, "num_tokens": 724133754.0, "reward": 0.6662946939468384, "reward_std": 0.1304224729537964, "rewards/verify_math_reward/mean": 0.6662946343421936, "rewards/verify_math_reward/std": 0.47179922461509705, "step": 1237 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1506696428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 4018.0, "completions/mean_length": 1143.20654296875, "completions/mean_terminated_length": 619.3862915039062, "completions/min_length": 181.0, "completions/min_terminated_length": 181.0, "epoch": 11.569096209912537, "grad_norm": 0.15765686333179474, "learning_rate": 1e-06, "loss": -0.0853, "num_tokens": 724693307.0, "reward": 0.6774553656578064, "reward_std": 0.14766854047775269, "rewards/verify_math_reward/mean": 0.6774553656578064, "rewards/verify_math_reward/std": 0.4677111804485321, "step": 1238 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1026785714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2472.0, "completions/mean_length": 948.685302734375, "completions/mean_terminated_length": 588.5447387695312, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 11.578425655976677, "grad_norm": 0.15406832098960876, "learning_rate": 1e-06, "loss": -0.0617, "num_tokens": 725253945.0, "reward": 0.7332589626312256, "reward_std": 0.13970790803432465, "rewards/verify_math_reward/mean": 0.7332589030265808, "rewards/verify_math_reward/std": 0.4425028860569, "step": 1239 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1484375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3812.0, "completions/mean_length": 1106.3616943359375, "completions/mean_terminated_length": 585.2319946289062, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 11.587755102040816, "grad_norm": 0.14483070373535156, "learning_rate": 1e-06, "loss": -0.0653, "num_tokens": 725794829.0, "reward": 0.6741071939468384, "reward_std": 0.09848611801862717, "rewards/verify_math_reward/mean": 0.6741071343421936, "rewards/verify_math_reward/std": 0.4689692556858063, "step": 1240 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1595982142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4068.0, "completions/mean_length": 1157.888427734375, "completions/mean_terminated_length": 599.9203491210938, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 11.597084548104956, "grad_norm": 0.18773047626018524, "learning_rate": 1e-06, "loss": -0.065, "num_tokens": 726351737.0, "reward": 0.6328125, "reward_std": 0.15300628542900085, "rewards/verify_math_reward/mean": 0.6328125, "rewards/verify_math_reward/std": 0.48230743408203125, "step": 1241 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1417410714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 4006.0, "completions/mean_length": 1102.5546875, "completions/mean_terminated_length": 608.1885375976562, "completions/min_length": 170.0, "completions/min_terminated_length": 170.0, "epoch": 11.606413994169095, "grad_norm": 0.16885830461978912, "learning_rate": 1e-06, "loss": -0.0714, "num_tokens": 726921914.0, "reward": 0.6160714626312256, "reward_std": 0.1453377902507782, "rewards/verify_math_reward/mean": 0.6160714030265808, "rewards/verify_math_reward/std": 0.486612468957901, "step": 1242 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1183035714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3845.0, "completions/mean_length": 1059.421875, "completions/mean_terminated_length": 651.9822998046875, "completions/min_length": 169.0, "completions/min_terminated_length": 169.0, "epoch": 11.615743440233237, "grad_norm": 0.14501583576202393, "learning_rate": 1e-06, "loss": -0.0432, "num_tokens": 727525540.0, "reward": 0.6785714626312256, "reward_std": 0.11561454832553864, "rewards/verify_math_reward/mean": 0.6785714030265808, "rewards/verify_math_reward/std": 0.46728572249412537, "step": 1243 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1261160714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3266.0, "completions/mean_length": 1040.430908203125, "completions/mean_terminated_length": 599.4610595703125, "completions/min_length": 122.0, "completions/min_terminated_length": 122.0, "epoch": 11.625072886297376, "grad_norm": 0.1352197825908661, "learning_rate": 1e-06, "loss": -0.0927, "num_tokens": 728083126.0, "reward": 0.7611607313156128, "reward_std": 0.1000591367483139, "rewards/verify_math_reward/mean": 0.7611607313156128, "rewards/verify_math_reward/std": 0.4266124963760376, "step": 1244 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1417410714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3022.0, "completions/mean_length": 1103.1328125, "completions/mean_terminated_length": 608.8621826171875, "completions/min_length": 112.0, "completions/min_terminated_length": 112.0, "epoch": 11.634402332361516, "grad_norm": 0.16308481991291046, "learning_rate": 1e-06, "loss": -0.0698, "num_tokens": 728639013.0, "reward": 0.660714328289032, "reward_std": 0.1360626220703125, "rewards/verify_math_reward/mean": 0.6607142686843872, "rewards/verify_math_reward/std": 0.4737313687801361, "step": 1245 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1428571428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3524.0, "completions/mean_length": 1109.404052734375, "completions/mean_terminated_length": 611.6380615234375, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 11.643731778425655, "grad_norm": 0.1664920151233673, "learning_rate": 1e-06, "loss": -0.0631, "num_tokens": 729197095.0, "reward": 0.6674107313156128, "reward_std": 0.14165747165679932, "rewards/verify_math_reward/mean": 0.6674107313156128, "rewards/verify_math_reward/std": 0.47140392661094666, "step": 1246 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1372767857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3043.0, "completions/mean_length": 1073.8046875, "completions/mean_terminated_length": 592.9120483398438, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 11.653061224489797, "grad_norm": 0.1885533183813095, "learning_rate": 1e-06, "loss": -0.0947, "num_tokens": 729744600.0, "reward": 0.6830357313156128, "reward_std": 0.1555236279964447, "rewards/verify_math_reward/mean": 0.6830357313156128, "rewards/verify_math_reward/std": 0.46555325388908386, "step": 1247 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1506696428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 2721.0, "completions/mean_length": 1117.083740234375, "completions/mean_terminated_length": 588.62939453125, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 11.662390670553936, "grad_norm": 0.18340948224067688, "learning_rate": 1e-06, "loss": -0.0962, "num_tokens": 730272659.0, "reward": 0.6830357313156128, "reward_std": 0.15856975317001343, "rewards/verify_math_reward/mean": 0.6830357313156128, "rewards/verify_math_reward/std": 0.46555325388908386, "step": 1248 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1495535714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2767.0, "completions/mean_length": 1137.3817138671875, "completions/mean_terminated_length": 617.0997314453125, "completions/min_length": 157.0, "completions/min_terminated_length": 157.0, "epoch": 11.671720116618076, "grad_norm": 0.16311952471733093, "learning_rate": 1e-06, "loss": -0.0612, "num_tokens": 730827473.0, "reward": 0.6774553656578064, "reward_std": 0.14995764195919037, "rewards/verify_math_reward/mean": 0.6774553656578064, "rewards/verify_math_reward/std": 0.4677111804485321, "step": 1249 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1473214285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3126.0, "completions/mean_length": 1131.6351318359375, "completions/mean_terminated_length": 619.46728515625, "completions/min_length": 163.0, "completions/min_terminated_length": 163.0, "epoch": 11.681049562682215, "grad_norm": 0.17689785361289978, "learning_rate": 1e-06, "loss": -0.0791, "num_tokens": 731392418.0, "reward": 0.6439732313156128, "reward_std": 0.15233227610588074, "rewards/verify_math_reward/mean": 0.6439732313156128, "rewards/verify_math_reward/std": 0.47909072041511536, "step": 1250 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.15625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3951.0, "completions/mean_length": 1203.5703125, "completions/mean_terminated_length": 667.9351806640625, "completions/min_length": 162.0, "completions/min_terminated_length": 162.0, "epoch": 11.690379008746355, "grad_norm": 0.18204478919506073, "learning_rate": 1e-06, "loss": -0.0769, "num_tokens": 731989369.0, "reward": 0.6026785969734192, "reward_std": 0.1702541708946228, "rewards/verify_math_reward/mean": 0.6026785969734192, "rewards/verify_math_reward/std": 0.48961687088012695, "step": 1251 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1294642857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3308.0, "completions/mean_length": 1070.3382568359375, "completions/mean_terminated_length": 620.3679809570312, "completions/min_length": 159.0, "completions/min_terminated_length": 159.0, "epoch": 11.699708454810496, "grad_norm": 0.14226645231246948, "learning_rate": 1e-06, "loss": -0.0557, "num_tokens": 732569696.0, "reward": 0.6819196939468384, "reward_std": 0.11768680810928345, "rewards/verify_math_reward/mean": 0.6819196343421936, "rewards/verify_math_reward/std": 0.46599099040031433, "step": 1252 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1640625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3946.0, "completions/mean_length": 1205.2879638671875, "completions/mean_terminated_length": 637.9519653320312, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 11.709037900874636, "grad_norm": 0.12835073471069336, "learning_rate": 1e-06, "loss": -0.1102, "num_tokens": 733134450.0, "reward": 0.6964285969734192, "reward_std": 0.11629742383956909, "rewards/verify_math_reward/mean": 0.6964285969734192, "rewards/verify_math_reward/std": 0.4600566029548645, "step": 1253 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1517857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3644.0, "completions/mean_length": 1139.524658203125, "completions/mean_terminated_length": 610.4710693359375, "completions/min_length": 159.0, "completions/min_terminated_length": 159.0, "epoch": 11.718367346938775, "grad_norm": 0.1552983522415161, "learning_rate": 1e-06, "loss": -0.0867, "num_tokens": 733688000.0, "reward": 0.6651785969734192, "reward_std": 0.13516110181808472, "rewards/verify_math_reward/mean": 0.6651785969734192, "rewards/verify_math_reward/std": 0.47219160199165344, "step": 1254 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.15625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3595.0, "completions/mean_length": 1179.3951416015625, "completions/mean_terminated_length": 639.2830200195312, "completions/min_length": 115.0, "completions/min_terminated_length": 115.0, "epoch": 11.727696793002915, "grad_norm": 0.18033091723918915, "learning_rate": 1e-06, "loss": -0.0972, "num_tokens": 734266410.0, "reward": 0.676339328289032, "reward_std": 0.16638068854808807, "rewards/verify_math_reward/mean": 0.6763392686843872, "rewards/verify_math_reward/std": 0.4681335985660553, "step": 1255 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1160714285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3076.0, "completions/mean_length": 1009.32373046875, "completions/mean_terminated_length": 604.0025024414062, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 11.737026239067056, "grad_norm": 0.17618383467197418, "learning_rate": 1e-06, "loss": -0.0647, "num_tokens": 734836812.0, "reward": 0.7087053656578064, "reward_std": 0.13726656138896942, "rewards/verify_math_reward/mean": 0.7087053656578064, "rewards/verify_math_reward/std": 0.45461276173591614, "step": 1256 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1383928571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3984.0, "completions/mean_length": 1138.4609375, "completions/mean_terminated_length": 663.415771484375, "completions/min_length": 163.0, "completions/min_terminated_length": 163.0, "epoch": 11.746355685131196, "grad_norm": 0.14334562420845032, "learning_rate": 1e-06, "loss": -0.0623, "num_tokens": 735439817.0, "reward": 0.6439732313156128, "reward_std": 0.12181992828845978, "rewards/verify_math_reward/mean": 0.6439732313156128, "rewards/verify_math_reward/std": 0.47909072041511536, "step": 1257 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1696428571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3802.0, "completions/mean_length": 1200.634033203125, "completions/mean_terminated_length": 609.1075439453125, "completions/min_length": 159.0, "completions/min_terminated_length": 159.0, "epoch": 11.755685131195335, "grad_norm": 0.19604165852069855, "learning_rate": 1e-06, "loss": -0.083, "num_tokens": 735987929.0, "reward": 0.609375, "reward_std": 0.16491642594337463, "rewards/verify_math_reward/mean": 0.609375, "rewards/verify_math_reward/std": 0.48816296458244324, "step": 1258 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1573660714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3705.0, "completions/mean_length": 1178.26123046875, "completions/mean_terminated_length": 633.3589477539062, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 11.765014577259475, "grad_norm": 0.1749848872423172, "learning_rate": 1e-06, "loss": -0.0935, "num_tokens": 736557547.0, "reward": 0.6361607313156128, "reward_std": 0.1557818502187729, "rewards/verify_math_reward/mean": 0.6361607313156128, "rewards/verify_math_reward/std": 0.4813718795776367, "step": 1259 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1160714285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3408.0, "completions/mean_length": 980.2098388671875, "completions/mean_terminated_length": 571.065673828125, "completions/min_length": 176.0, "completions/min_terminated_length": 176.0, "epoch": 11.774344023323614, "grad_norm": 0.14527100324630737, "learning_rate": 1e-06, "loss": -0.0472, "num_tokens": 737094071.0, "reward": 0.7031250596046448, "reward_std": 0.10344410687685013, "rewards/verify_math_reward/mean": 0.703125, "rewards/verify_math_reward/std": 0.4571361541748047, "step": 1260 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1227678571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3577.0, "completions/mean_length": 965.427490234375, "completions/mean_terminated_length": 527.306640625, "completions/min_length": 167.0, "completions/min_terminated_length": 167.0, "epoch": 11.783673469387756, "grad_norm": 0.18137522041797638, "learning_rate": 1e-06, "loss": -0.1077, "num_tokens": 737591590.0, "reward": 0.7031250596046448, "reward_std": 0.16675932705402374, "rewards/verify_math_reward/mean": 0.703125, "rewards/verify_math_reward/std": 0.4571361541748047, "step": 1261 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1439732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4006.0, "completions/mean_length": 1069.5592041015625, "completions/mean_terminated_length": 560.5488891601562, "completions/min_length": 123.0, "completions/min_terminated_length": 123.0, "epoch": 11.793002915451895, "grad_norm": 0.17344211041927338, "learning_rate": 1e-06, "loss": -0.0853, "num_tokens": 738107835.0, "reward": 0.7165178656578064, "reward_std": 0.13185282051563263, "rewards/verify_math_reward/mean": 0.7165178656578064, "rewards/verify_math_reward/std": 0.4509401023387909, "step": 1262 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1629464285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3313.0, "completions/mean_length": 1221.5926513671875, "completions/mean_terminated_length": 662.0413208007812, "completions/min_length": 166.0, "completions/min_terminated_length": 166.0, "epoch": 11.802332361516035, "grad_norm": 0.13384099304676056, "learning_rate": 1e-06, "loss": -0.102, "num_tokens": 738690086.0, "reward": 0.668526828289032, "reward_std": 0.12971526384353638, "rewards/verify_math_reward/mean": 0.6685267686843872, "rewards/verify_math_reward/std": 0.4710056483745575, "step": 1263 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1395089285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3739.0, "completions/mean_length": 1090.3013916015625, "completions/mean_terminated_length": 602.99609375, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 11.811661807580174, "grad_norm": 0.16794444620609283, "learning_rate": 1e-06, "loss": -0.1056, "num_tokens": 739242076.0, "reward": 0.7299107313156128, "reward_std": 0.14489160478115082, "rewards/verify_math_reward/mean": 0.7299107313156128, "rewards/verify_math_reward/std": 0.44425368309020996, "step": 1264 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1785714285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3056.0, "completions/mean_length": 1239.03125, "completions/mean_terminated_length": 617.9511108398438, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 11.820991253644316, "grad_norm": 0.16162048280239105, "learning_rate": 1e-06, "loss": -0.1131, "num_tokens": 739785552.0, "reward": 0.6462053656578064, "reward_std": 0.14728990197181702, "rewards/verify_math_reward/mean": 0.6462053656578064, "rewards/verify_math_reward/std": 0.478413462638855, "step": 1265 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1350446428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3972.0, "completions/mean_length": 1130.3013916015625, "completions/mean_terminated_length": 667.2697143554688, "completions/min_length": 193.0, "completions/min_terminated_length": 193.0, "epoch": 11.830320699708455, "grad_norm": 0.1715167611837387, "learning_rate": 1e-06, "loss": -0.0913, "num_tokens": 740385726.0, "reward": 0.6238839626312256, "reward_std": 0.15623538196086884, "rewards/verify_math_reward/mean": 0.6238839030265808, "rewards/verify_math_reward/std": 0.48468026518821716, "step": 1266 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1361607142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3002.0, "completions/mean_length": 1083.3270263671875, "completions/mean_terminated_length": 608.4612426757812, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 11.839650145772595, "grad_norm": 0.14971394836902618, "learning_rate": 1e-06, "loss": -0.0644, "num_tokens": 740949067.0, "reward": 0.6808035969734192, "reward_std": 0.10382387042045593, "rewards/verify_math_reward/mean": 0.6808035969734192, "rewards/verify_math_reward/std": 0.46642565727233887, "step": 1267 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1171875, "completions/max_length": 4096.0, "completions/max_terminated_length": 2904.0, "completions/mean_length": 1000.9598388671875, "completions/mean_terminated_length": 590.1138305664062, "completions/min_length": 156.0, "completions/min_terminated_length": 156.0, "epoch": 11.848979591836734, "grad_norm": 0.1726934313774109, "learning_rate": 1e-06, "loss": -0.0681, "num_tokens": 741518895.0, "reward": 0.6618303656578064, "reward_std": 0.14713652431964874, "rewards/verify_math_reward/mean": 0.6618303656578064, "rewards/verify_math_reward/std": 0.4733508229255676, "step": 1268 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1149553571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 2939.0, "completions/mean_length": 980.0535888671875, "completions/mean_terminated_length": 575.3341674804688, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 11.858309037900874, "grad_norm": 0.1491033136844635, "learning_rate": 1e-06, "loss": -0.0598, "num_tokens": 742063103.0, "reward": 0.6796875596046448, "reward_std": 0.14432819187641144, "rewards/verify_math_reward/mean": 0.6796875, "rewards/verify_math_reward/std": 0.4668572247028351, "step": 1269 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1171875, "completions/max_length": 4096.0, "completions/max_terminated_length": 3839.0, "completions/mean_length": 1025.4788818359375, "completions/mean_terminated_length": 617.8875122070312, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 11.867638483965015, "grad_norm": 0.1738765686750412, "learning_rate": 1e-06, "loss": -0.068, "num_tokens": 742643388.0, "reward": 0.6618303656578064, "reward_std": 0.16352704167366028, "rewards/verify_math_reward/mean": 0.6618303656578064, "rewards/verify_math_reward/std": 0.4733508229255676, "step": 1270 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1272321428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 2302.0, "completions/mean_length": 1015.8248291015625, "completions/mean_terminated_length": 566.7966918945312, "completions/min_length": 156.0, "completions/min_terminated_length": 156.0, "epoch": 11.876967930029155, "grad_norm": 0.18321920931339264, "learning_rate": 1e-06, "loss": -0.0846, "num_tokens": 743183023.0, "reward": 0.691964328289032, "reward_std": 0.15349414944648743, "rewards/verify_math_reward/mean": 0.6919642686843872, "rewards/verify_math_reward/std": 0.4619392454624176, "step": 1271 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1127232142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3067.0, "completions/mean_length": 1014.185302734375, "completions/mean_terminated_length": 622.6591186523438, "completions/min_length": 190.0, "completions/min_terminated_length": 190.0, "epoch": 11.886297376093294, "grad_norm": 0.16701650619506836, "learning_rate": 1e-06, "loss": -0.0693, "num_tokens": 743773133.0, "reward": 0.6629464626312256, "reward_std": 0.16330133378505707, "rewards/verify_math_reward/mean": 0.6629464030265808, "rewards/verify_math_reward/std": 0.47296738624572754, "step": 1272 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1707589285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 4091.0, "completions/mean_length": 1210.6116943359375, "completions/mean_terminated_length": 616.4468383789062, "completions/min_length": 161.0, "completions/min_terminated_length": 161.0, "epoch": 11.895626822157434, "grad_norm": 0.1822567880153656, "learning_rate": 1e-06, "loss": -0.0881, "num_tokens": 744319833.0, "reward": 0.6495535969734192, "reward_std": 0.143612802028656, "rewards/verify_math_reward/mean": 0.6495535969734192, "rewards/verify_math_reward/std": 0.477376252412796, "step": 1273 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1417410714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3199.0, "completions/mean_length": 1136.5703125, "completions/mean_terminated_length": 647.8218383789062, "completions/min_length": 158.0, "completions/min_terminated_length": 158.0, "epoch": 11.904956268221575, "grad_norm": 0.22753936052322388, "learning_rate": 1e-06, "loss": -0.0717, "num_tokens": 744907336.0, "reward": 0.6383928656578064, "reward_std": 0.15785619616508484, "rewards/verify_math_reward/mean": 0.6383928656578064, "rewards/verify_math_reward/std": 0.4807341992855072, "step": 1274 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1573660714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3380.0, "completions/mean_length": 1187.5145263671875, "completions/mean_terminated_length": 644.3403930664062, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 11.914285714285715, "grad_norm": 0.165482297539711, "learning_rate": 1e-06, "loss": -0.0867, "num_tokens": 745484989.0, "reward": 0.6629464626312256, "reward_std": 0.15518732368946075, "rewards/verify_math_reward/mean": 0.6629464030265808, "rewards/verify_math_reward/std": 0.47296738624572754, "step": 1275 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1361607142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4050.0, "completions/mean_length": 1082.474365234375, "completions/mean_terminated_length": 607.4741821289062, "completions/min_length": 120.0, "completions/min_terminated_length": 120.0, "epoch": 11.923615160349854, "grad_norm": 0.17899391055107117, "learning_rate": 1e-06, "loss": -0.098, "num_tokens": 746046854.0, "reward": 0.640625, "reward_std": 0.15857228636741638, "rewards/verify_math_reward/mean": 0.640625, "rewards/verify_math_reward/std": 0.48008525371551514, "step": 1276 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0948660714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3311.0, "completions/mean_length": 945.3449096679688, "completions/mean_terminated_length": 615.1282348632812, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 11.932944606413994, "grad_norm": 0.14702017605304718, "learning_rate": 1e-06, "loss": -0.0662, "num_tokens": 746636195.0, "reward": 0.7120535969734192, "reward_std": 0.1410936564207077, "rewards/verify_math_reward/mean": 0.7120535969734192, "rewards/verify_math_reward/std": 0.4530589282512665, "step": 1277 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1484375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3058.0, "completions/mean_length": 1152.7890625, "completions/mean_terminated_length": 639.7523193359375, "completions/min_length": 183.0, "completions/min_terminated_length": 183.0, "epoch": 11.942274052478133, "grad_norm": 0.1499701738357544, "learning_rate": 1e-06, "loss": -0.105, "num_tokens": 747208342.0, "reward": 0.625, "reward_std": 0.13591037690639496, "rewards/verify_math_reward/mean": 0.625, "rewards/verify_math_reward/std": 0.48439329862594604, "step": 1278 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1473214285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3220.0, "completions/mean_length": 1110.583740234375, "completions/mean_terminated_length": 594.77880859375, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 11.951603498542275, "grad_norm": 0.17317593097686768, "learning_rate": 1e-06, "loss": -0.0545, "num_tokens": 747744369.0, "reward": 0.6674107313156128, "reward_std": 0.13726474344730377, "rewards/verify_math_reward/mean": 0.6674107313156128, "rewards/verify_math_reward/std": 0.47140392661094666, "step": 1279 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1316964285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3289.0, "completions/mean_length": 1026.282470703125, "completions/mean_terminated_length": 560.6953735351562, "completions/min_length": 116.0, "completions/min_terminated_length": 116.0, "epoch": 11.960932944606414, "grad_norm": 0.17023083567619324, "learning_rate": 1e-06, "loss": -0.0866, "num_tokens": 748274374.0, "reward": 0.7220982313156128, "reward_std": 0.13827574253082275, "rewards/verify_math_reward/mean": 0.7220982313156128, "rewards/verify_math_reward/std": 0.44821488857269287, "step": 1280 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1272321428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 2443.0, "completions/mean_length": 1021.966552734375, "completions/mean_terminated_length": 573.833740234375, "completions/min_length": 176.0, "completions/min_terminated_length": 176.0, "epoch": 11.970262390670554, "grad_norm": 0.18156953155994415, "learning_rate": 1e-06, "loss": -0.0975, "num_tokens": 748815280.0, "reward": 0.7220982313156128, "reward_std": 0.16209599375724792, "rewards/verify_math_reward/mean": 0.7220982313156128, "rewards/verify_math_reward/std": 0.44821488857269287, "step": 1281 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3055.0, "completions/mean_length": 1062.44091796875, "completions/mean_terminated_length": 629.0752563476562, "completions/min_length": 156.0, "completions/min_terminated_length": 156.0, "epoch": 11.979591836734693, "grad_norm": 0.15937910974025726, "learning_rate": 1e-06, "loss": -0.0678, "num_tokens": 749402003.0, "reward": 0.6941964626312256, "reward_std": 0.14733155071735382, "rewards/verify_math_reward/mean": 0.6941964030265808, "rewards/verify_math_reward/std": 0.46100425720214844, "step": 1282 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1383928571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3242.0, "completions/mean_length": 1133.294677734375, "completions/mean_terminated_length": 657.419677734375, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 11.988921282798835, "grad_norm": 0.1433684229850769, "learning_rate": 1e-06, "loss": -0.0757, "num_tokens": 750002723.0, "reward": 0.6584821939468384, "reward_std": 0.12741659581661224, "rewards/verify_math_reward/mean": 0.6584821343421936, "rewards/verify_math_reward/std": 0.4744836091995239, "step": 1283 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.19034090909090906, "completions/max_length": 4096.0, "completions/max_terminated_length": 2724.0, "completions/mean_length": 1256.84375, "completions/mean_terminated_length": 589.3930053710938, "completions/min_length": 170.0, "completions/min_terminated_length": 170.0, "epoch": 11.998250728862974, "grad_norm": 0.17795808613300323, "learning_rate": 1e-06, "loss": -0.078, "num_tokens": 750565713.0, "reward": 0.6875000596046448, "reward_std": 0.14102061092853546, "rewards/verify_math_reward/mean": 0.6875, "rewards/verify_math_reward/std": 0.4637712836265564, "step": 1284 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1261160714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2969.0, "completions/mean_length": 1007.1663208007812, "completions/mean_terminated_length": 561.3958740234375, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 12.00932944606414, "grad_norm": 0.16885869204998016, "learning_rate": 1e-06, "loss": -0.0403, "num_tokens": 751092782.0, "reward": 0.6551339626312256, "reward_std": 0.1293700933456421, "rewards/verify_math_reward/mean": 0.6551339030265808, "rewards/verify_math_reward/std": 0.4755900502204895, "step": 1285 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1283482142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2520.0, "completions/mean_length": 1026.368408203125, "completions/mean_terminated_length": 574.3739013671875, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 12.018658892128279, "grad_norm": 0.17770910263061523, "learning_rate": 1e-06, "loss": -0.0745, "num_tokens": 751629448.0, "reward": 0.699776828289032, "reward_std": 0.1338074505329132, "rewards/verify_math_reward/mean": 0.6997767686843872, "rewards/verify_math_reward/std": 0.4586109220981598, "step": 1286 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1127232142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3398.0, "completions/mean_length": 974.47216796875, "completions/mean_terminated_length": 577.900634765625, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 12.02798833819242, "grad_norm": 0.1872461885213852, "learning_rate": 1e-06, "loss": -0.045, "num_tokens": 752180247.0, "reward": 0.7321428656578064, "reward_std": 0.1294114738702774, "rewards/verify_math_reward/mean": 0.7321428656578064, "rewards/verify_math_reward/std": 0.4430900514125824, "step": 1287 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0982142857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3137.0, "completions/mean_length": 950.8225708007812, "completions/mean_terminated_length": 608.2784423828125, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 12.03731778425656, "grad_norm": 0.1401851773262024, "learning_rate": 1e-06, "loss": -0.0535, "num_tokens": 752760056.0, "reward": 0.7332589626312256, "reward_std": 0.13455308973789215, "rewards/verify_math_reward/mean": 0.7332589030265808, "rewards/verify_math_reward/std": 0.4425028860569, "step": 1288 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1116071428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3172.0, "completions/mean_length": 966.3928833007812, "completions/mean_terminated_length": 573.2261352539062, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 12.0466472303207, "grad_norm": 0.15536810457706451, "learning_rate": 1e-06, "loss": -0.064, "num_tokens": 753294280.0, "reward": 0.7477678656578064, "reward_std": 0.1104736328125, "rewards/verify_math_reward/mean": 0.7477678656578064, "rewards/verify_math_reward/std": 0.4345363676548004, "step": 1289 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1227678571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3675.0, "completions/mean_length": 1040.2132568359375, "completions/mean_terminated_length": 612.5585327148438, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 12.055976676384839, "grad_norm": 0.1755959689617157, "learning_rate": 1e-06, "loss": -0.0522, "num_tokens": 753862343.0, "reward": 0.7020089626312256, "reward_std": 0.14789748191833496, "rewards/verify_math_reward/mean": 0.7020089030265808, "rewards/verify_math_reward/std": 0.45763099193573, "step": 1290 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1149553571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3198.0, "completions/mean_length": 1001.052490234375, "completions/mean_terminated_length": 599.060546875, "completions/min_length": 122.0, "completions/min_terminated_length": 122.0, "epoch": 12.06530612244898, "grad_norm": 0.1526235193014145, "learning_rate": 1e-06, "loss": -0.0484, "num_tokens": 754428006.0, "reward": 0.6573660969734192, "reward_std": 0.14406605064868927, "rewards/verify_math_reward/mean": 0.6573660969734192, "rewards/verify_math_reward/std": 0.47485536336898804, "step": 1291 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3308.0, "completions/mean_length": 1024.805908203125, "completions/mean_terminated_length": 586.0637817382812, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 12.07463556851312, "grad_norm": 0.1724972277879715, "learning_rate": 1e-06, "loss": -0.0757, "num_tokens": 754982016.0, "reward": 0.6718750596046448, "reward_std": 0.14835324883460999, "rewards/verify_math_reward/mean": 0.671875, "rewards/verify_math_reward/std": 0.46979284286499023, "step": 1292 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1227678571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 2994.0, "completions/mean_length": 1059.7020263671875, "completions/mean_terminated_length": 634.7748413085938, "completions/min_length": 175.0, "completions/min_terminated_length": 175.0, "epoch": 12.08396501457726, "grad_norm": 0.1610293835401535, "learning_rate": 1e-06, "loss": -0.0627, "num_tokens": 755571205.0, "reward": 0.6886160969734192, "reward_std": 0.14702913165092468, "rewards/verify_math_reward/mean": 0.6886160969734192, "rewards/verify_math_reward/std": 0.46331802010536194, "step": 1293 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1417410714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 4017.0, "completions/mean_length": 1100.349365234375, "completions/mean_terminated_length": 605.6189575195312, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 12.093294460641399, "grad_norm": 0.17970708012580872, "learning_rate": 1e-06, "loss": -0.0558, "num_tokens": 756131374.0, "reward": 0.6383928656578064, "reward_std": 0.1629229635000229, "rewards/verify_math_reward/mean": 0.6383928656578064, "rewards/verify_math_reward/std": 0.4807341992855072, "step": 1294 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1194196428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3686.0, "completions/mean_length": 1041.83154296875, "completions/mean_terminated_length": 627.6412963867188, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 12.102623906705539, "grad_norm": 0.16404816508293152, "learning_rate": 1e-06, "loss": -0.0829, "num_tokens": 756719287.0, "reward": 0.6796875596046448, "reward_std": 0.1419203132390976, "rewards/verify_math_reward/mean": 0.6796875, "rewards/verify_math_reward/std": 0.4668572247028351, "step": 1295 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1138392857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2505.0, "completions/mean_length": 977.099365234375, "completions/mean_terminated_length": 576.4345092773438, "completions/min_length": 122.0, "completions/min_terminated_length": 122.0, "epoch": 12.11195335276968, "grad_norm": 0.15405990183353424, "learning_rate": 1e-06, "loss": -0.0458, "num_tokens": 757260056.0, "reward": 0.699776828289032, "reward_std": 0.11765359342098236, "rewards/verify_math_reward/mean": 0.6997767686843872, "rewards/verify_math_reward/std": 0.4586109220981598, "step": 1296 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1060267857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3015.0, "completions/mean_length": 994.7745971679688, "completions/mean_terminated_length": 626.9638061523438, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 12.12128279883382, "grad_norm": 0.16399167478084564, "learning_rate": 1e-06, "loss": -0.0757, "num_tokens": 757848358.0, "reward": 0.7120535969734192, "reward_std": 0.15793149173259735, "rewards/verify_math_reward/mean": 0.7120535969734192, "rewards/verify_math_reward/std": 0.4530589282512665, "step": 1297 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1372767857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3403.0, "completions/mean_length": 1122.6015625, "completions/mean_terminated_length": 649.4735107421875, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 12.130612244897959, "grad_norm": 0.15027697384357452, "learning_rate": 1e-06, "loss": -0.0641, "num_tokens": 758443937.0, "reward": 0.6495535969734192, "reward_std": 0.13842841982841492, "rewards/verify_math_reward/mean": 0.6495535969734192, "rewards/verify_math_reward/std": 0.477376252412796, "step": 1298 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0982142857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3964.0, "completions/mean_length": 935.35498046875, "completions/mean_terminated_length": 591.126220703125, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 12.139941690962099, "grad_norm": 0.15242789685726166, "learning_rate": 1e-06, "loss": -0.0292, "num_tokens": 759006887.0, "reward": 0.7287946939468384, "reward_std": 0.10716353356838226, "rewards/verify_math_reward/mean": 0.7287946343421936, "rewards/verify_math_reward/std": 0.44483017921447754, "step": 1299 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1573660714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3537.0, "completions/mean_length": 1186.8359375, "completions/mean_terminated_length": 643.5350952148438, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 12.14927113702624, "grad_norm": 0.17300738394260406, "learning_rate": 1e-06, "loss": -0.1027, "num_tokens": 759592748.0, "reward": 0.660714328289032, "reward_std": 0.16435259580612183, "rewards/verify_math_reward/mean": 0.6607142686843872, "rewards/verify_math_reward/std": 0.4737313687801361, "step": 1300 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3942.0, "completions/mean_length": 839.6484985351562, "completions/mean_terminated_length": 563.6864624023438, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 12.15860058309038, "grad_norm": 0.16081883013248444, "learning_rate": 1e-06, "loss": -0.0505, "num_tokens": 760142881.0, "reward": 0.7477678656578064, "reward_std": 0.11434461921453476, "rewards/verify_math_reward/mean": 0.7477678656578064, "rewards/verify_math_reward/std": 0.434536337852478, "step": 1301 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.109375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3316.0, "completions/mean_length": 1012.5647583007812, "completions/mean_terminated_length": 633.897216796875, "completions/min_length": 153.0, "completions/min_terminated_length": 153.0, "epoch": 12.167930029154519, "grad_norm": 0.16975706815719604, "learning_rate": 1e-06, "loss": -0.0453, "num_tokens": 760739171.0, "reward": 0.6930803656578064, "reward_std": 0.15251773595809937, "rewards/verify_math_reward/mean": 0.6930803656578064, "rewards/verify_math_reward/std": 0.46147334575653076, "step": 1302 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1350446428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3561.0, "completions/mean_length": 1040.6663818359375, "completions/mean_terminated_length": 563.6400146484375, "completions/min_length": 164.0, "completions/min_terminated_length": 164.0, "epoch": 12.177259475218658, "grad_norm": 0.1844092309474945, "learning_rate": 1e-06, "loss": -0.0611, "num_tokens": 761258704.0, "reward": 0.7209821939468384, "reward_std": 0.14628168940544128, "rewards/verify_math_reward/mean": 0.7209821343421936, "rewards/verify_math_reward/std": 0.448766827583313, "step": 1303 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1037946428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 2628.0, "completions/mean_length": 914.5982666015625, "completions/mean_terminated_length": 546.1419677734375, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 12.186588921282798, "grad_norm": 0.1265084594488144, "learning_rate": 1e-06, "loss": -0.0502, "num_tokens": 761776936.0, "reward": 0.746651828289032, "reward_std": 0.0867268368601799, "rewards/verify_math_reward/mean": 0.7466517686843872, "rewards/verify_math_reward/std": 0.435171514749527, "step": 1304 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1227678571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 2983.0, "completions/mean_length": 1044.993408203125, "completions/mean_terminated_length": 618.0076293945312, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 12.19591836734694, "grad_norm": 0.13551369309425354, "learning_rate": 1e-06, "loss": -0.0642, "num_tokens": 762353386.0, "reward": 0.707589328289032, "reward_std": 0.11152489483356476, "rewards/verify_math_reward/mean": 0.7075892686843872, "rewards/verify_math_reward/std": 0.45512402057647705, "step": 1305 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0848214285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3493.0, "completions/mean_length": 899.0469360351562, "completions/mean_terminated_length": 602.743896484375, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 12.205247813411079, "grad_norm": 0.16129587590694427, "learning_rate": 1e-06, "loss": -0.0273, "num_tokens": 762934004.0, "reward": 0.6908482313156128, "reward_std": 0.13745088875293732, "rewards/verify_math_reward/mean": 0.6908482313156128, "rewards/verify_math_reward/std": 0.46240198612213135, "step": 1306 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1607142857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3596.0, "completions/mean_length": 1171.62060546875, "completions/mean_terminated_length": 611.6329345703125, "completions/min_length": 110.0, "completions/min_terminated_length": 110.0, "epoch": 12.214577259475218, "grad_norm": 0.13189013302326202, "learning_rate": 1e-06, "loss": -0.0717, "num_tokens": 763482600.0, "reward": 0.6908482313156128, "reward_std": 0.11208830773830414, "rewards/verify_math_reward/mean": 0.6908482313156128, "rewards/verify_math_reward/std": 0.46240198612213135, "step": 1307 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1618303571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3406.0, "completions/mean_length": 1209.1373291015625, "completions/mean_terminated_length": 651.753662109375, "completions/min_length": 176.0, "completions/min_terminated_length": 176.0, "epoch": 12.223906705539358, "grad_norm": 0.1550699770450592, "learning_rate": 1e-06, "loss": -0.0749, "num_tokens": 764057955.0, "reward": 0.6473214626312256, "reward_std": 0.1360626220703125, "rewards/verify_math_reward/mean": 0.6473214030265808, "rewards/verify_math_reward/std": 0.47807058691978455, "step": 1308 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1662946428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3482.0, "completions/mean_length": 1193.4921875, "completions/mean_terminated_length": 614.5448608398438, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 12.2332361516035, "grad_norm": 0.16716431081295013, "learning_rate": 1e-06, "loss": -0.0596, "num_tokens": 764603828.0, "reward": 0.637276828289032, "reward_std": 0.13778719305992126, "rewards/verify_math_reward/mean": 0.6372767686843872, "rewards/verify_math_reward/std": 0.481054425239563, "step": 1309 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1328125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3157.0, "completions/mean_length": 1084.1395263671875, "completions/mean_terminated_length": 622.8635864257812, "completions/min_length": 174.0, "completions/min_terminated_length": 174.0, "epoch": 12.242565597667639, "grad_norm": 0.13719680905342102, "learning_rate": 1e-06, "loss": -0.0996, "num_tokens": 765175065.0, "reward": 0.7042410969734192, "reward_std": 0.1506737321615219, "rewards/verify_math_reward/mean": 0.7042410969734192, "rewards/verify_math_reward/std": 0.45663803815841675, "step": 1310 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0970982142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2353.0, "completions/mean_length": 890.0413208007812, "completions/mean_terminated_length": 545.2719116210938, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 12.251895043731778, "grad_norm": 0.16699302196502686, "learning_rate": 1e-06, "loss": -0.0749, "num_tokens": 765696534.0, "reward": 0.7265625596046448, "reward_std": 0.1439163088798523, "rewards/verify_math_reward/mean": 0.7265625, "rewards/verify_math_reward/std": 0.4459724426269531, "step": 1311 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1428571428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3246.0, "completions/mean_length": 1121.9576416015625, "completions/mean_terminated_length": 626.2838745117188, "completions/min_length": 160.0, "completions/min_terminated_length": 160.0, "epoch": 12.261224489795918, "grad_norm": 0.15327630937099457, "learning_rate": 1e-06, "loss": -0.0915, "num_tokens": 766269864.0, "reward": 0.645089328289032, "reward_std": 0.14623567461967468, "rewards/verify_math_reward/mean": 0.6450892686843872, "rewards/verify_math_reward/std": 0.4787535071372986, "step": 1312 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1227678571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 2790.0, "completions/mean_length": 1061.8404541015625, "completions/mean_terminated_length": 637.2124633789062, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 12.270553935860057, "grad_norm": 0.16404862701892853, "learning_rate": 1e-06, "loss": -0.0649, "num_tokens": 766856337.0, "reward": 0.6752232313156128, "reward_std": 0.14661727845668793, "rewards/verify_math_reward/mean": 0.6752232313156128, "rewards/verify_math_reward/std": 0.46855294704437256, "step": 1313 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1149553571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3308.0, "completions/mean_length": 991.9464721679688, "completions/mean_terminated_length": 588.771728515625, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 12.279883381924199, "grad_norm": 0.16358880698680878, "learning_rate": 1e-06, "loss": -0.0804, "num_tokens": 767414265.0, "reward": 0.707589328289032, "reward_std": 0.1519557535648346, "rewards/verify_math_reward/mean": 0.7075892686843872, "rewards/verify_math_reward/std": 0.45512402057647705, "step": 1314 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1026785714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 4017.0, "completions/mean_length": 910.4654541015625, "completions/mean_terminated_length": 545.9514770507812, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 12.289212827988338, "grad_norm": 0.16791094839572906, "learning_rate": 1e-06, "loss": -0.0533, "num_tokens": 767933426.0, "reward": 0.7276785969734192, "reward_std": 0.126667320728302, "rewards/verify_math_reward/mean": 0.7276785969734192, "rewards/verify_math_reward/std": 0.4454030692577362, "step": 1315 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1183035714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3946.0, "completions/mean_length": 1009.638427734375, "completions/mean_terminated_length": 595.5189819335938, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 12.298542274052478, "grad_norm": 0.14293956756591797, "learning_rate": 1e-06, "loss": -0.0599, "num_tokens": 768496558.0, "reward": 0.6796875596046448, "reward_std": 0.11892352253198624, "rewards/verify_math_reward/mean": 0.6796875, "rewards/verify_math_reward/std": 0.4668572247028351, "step": 1316 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0926339285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3452.0, "completions/mean_length": 940.8917846679688, "completions/mean_terminated_length": 618.7835083007812, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 12.307871720116617, "grad_norm": 0.1451500505208969, "learning_rate": 1e-06, "loss": -0.0699, "num_tokens": 769086773.0, "reward": 0.7142857313156128, "reward_std": 0.15123826265335083, "rewards/verify_math_reward/mean": 0.7142857313156128, "rewards/verify_math_reward/std": 0.4520062506198883, "step": 1317 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1607142857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3710.0, "completions/mean_length": 1201.2879638671875, "completions/mean_terminated_length": 646.9813842773438, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 12.317201166180759, "grad_norm": 0.18425531685352325, "learning_rate": 1e-06, "loss": -0.0944, "num_tokens": 769672855.0, "reward": 0.6517857313156128, "reward_std": 0.16149938106536865, "rewards/verify_math_reward/mean": 0.6517857313156128, "rewards/verify_math_reward/std": 0.47667041420936584, "step": 1318 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1004464285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3025.0, "completions/mean_length": 937.6730346679688, "completions/mean_terminated_length": 585.0062255859375, "completions/min_length": 174.0, "completions/min_terminated_length": 174.0, "epoch": 12.326530612244898, "grad_norm": 0.1543116569519043, "learning_rate": 1e-06, "loss": -0.0459, "num_tokens": 770227658.0, "reward": 0.7366071939468384, "reward_std": 0.1331227421760559, "rewards/verify_math_reward/mean": 0.7366071343421936, "rewards/verify_math_reward/std": 0.44071969389915466, "step": 1319 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1283482142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3912.0, "completions/mean_length": 1100.864990234375, "completions/mean_terminated_length": 659.8399658203125, "completions/min_length": 157.0, "completions/min_terminated_length": 157.0, "epoch": 12.335860058309038, "grad_norm": 0.11531640589237213, "learning_rate": 1e-06, "loss": -0.0922, "num_tokens": 770829393.0, "reward": 0.6439732313156128, "reward_std": 0.11388886719942093, "rewards/verify_math_reward/mean": 0.6439732313156128, "rewards/verify_math_reward/std": 0.47909072041511536, "step": 1320 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1796875, "completions/max_length": 4096.0, "completions/max_terminated_length": 3950.0, "completions/mean_length": 1277.0101318359375, "completions/mean_terminated_length": 659.5170288085938, "completions/min_length": 153.0, "completions/min_terminated_length": 153.0, "epoch": 12.345189504373177, "grad_norm": 0.181630477309227, "learning_rate": 1e-06, "loss": -0.0907, "num_tokens": 771398762.0, "reward": 0.6227678656578064, "reward_std": 0.14327509701251984, "rewards/verify_math_reward/mean": 0.6227678656578064, "rewards/verify_math_reward/std": 0.4849644601345062, "step": 1321 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1607142857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3082.0, "completions/mean_length": 1147.716552734375, "completions/mean_terminated_length": 583.1515502929688, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 12.354518950437317, "grad_norm": 0.1867125928401947, "learning_rate": 1e-06, "loss": -0.0749, "num_tokens": 771924100.0, "reward": 0.6584821939468384, "reward_std": 0.1230238527059555, "rewards/verify_math_reward/mean": 0.6584821343421936, "rewards/verify_math_reward/std": 0.4744836091995239, "step": 1322 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1551339285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2390.0, "completions/mean_length": 1138.462158203125, "completions/mean_terminated_length": 595.4002685546875, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 12.363848396501458, "grad_norm": 0.14314530789852142, "learning_rate": 1e-06, "loss": -0.0666, "num_tokens": 772462098.0, "reward": 0.6662946939468384, "reward_std": 0.1176842749118805, "rewards/verify_math_reward/mean": 0.6662946343421936, "rewards/verify_math_reward/std": 0.47179925441741943, "step": 1323 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1194196428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3459.0, "completions/mean_length": 1088.2879638671875, "completions/mean_terminated_length": 680.39794921875, "completions/min_length": 167.0, "completions/min_terminated_length": 167.0, "epoch": 12.373177842565598, "grad_norm": 0.17088662087917328, "learning_rate": 1e-06, "loss": -0.0926, "num_tokens": 773095028.0, "reward": 0.6272321939468384, "reward_std": 0.1732172816991806, "rewards/verify_math_reward/mean": 0.6272321343421936, "rewards/verify_math_reward/std": 0.4838111698627472, "step": 1324 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1875, "completions/max_length": 4096.0, "completions/max_terminated_length": 3560.0, "completions/mean_length": 1295.83154296875, "completions/mean_terminated_length": 649.6387329101562, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 12.382507288629737, "grad_norm": 0.1721210479736328, "learning_rate": 1e-06, "loss": -0.1125, "num_tokens": 773653757.0, "reward": 0.6283482313156128, "reward_std": 0.13654935359954834, "rewards/verify_math_reward/mean": 0.6283482313156128, "rewards/verify_math_reward/std": 0.4835159480571747, "step": 1325 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1863839285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3510.0, "completions/mean_length": 1335.341552734375, "completions/mean_terminated_length": 702.9273071289062, "completions/min_length": 188.0, "completions/min_terminated_length": 188.0, "epoch": 12.391836734693877, "grad_norm": 0.1628490537405014, "learning_rate": 1e-06, "loss": -0.087, "num_tokens": 774259151.0, "reward": 0.5870535969734192, "reward_std": 0.1614226996898651, "rewards/verify_math_reward/mean": 0.5870535969734192, "rewards/verify_math_reward/std": 0.49263837933540344, "step": 1326 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1261160714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3174.0, "completions/mean_length": 1026.2723388671875, "completions/mean_terminated_length": 583.2592163085938, "completions/min_length": 178.0, "completions/min_terminated_length": 178.0, "epoch": 12.401166180758018, "grad_norm": 0.16476882994174957, "learning_rate": 1e-06, "loss": -0.0607, "num_tokens": 774794467.0, "reward": 0.6796875596046448, "reward_std": 0.13711318373680115, "rewards/verify_math_reward/mean": 0.6796875, "rewards/verify_math_reward/std": 0.4668572247028351, "step": 1327 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1194196428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3819.0, "completions/mean_length": 1035.35498046875, "completions/mean_terminated_length": 620.2864379882812, "completions/min_length": 173.0, "completions/min_terminated_length": 173.0, "epoch": 12.410495626822158, "grad_norm": 0.17252616584300995, "learning_rate": 1e-06, "loss": -0.0739, "num_tokens": 775373065.0, "reward": 0.7209821939468384, "reward_std": 0.1640915721654892, "rewards/verify_math_reward/mean": 0.7209821343421936, "rewards/verify_math_reward/std": 0.448766827583313, "step": 1328 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1183035714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3075.0, "completions/mean_length": 994.8449096679688, "completions/mean_terminated_length": 578.7405395507812, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 12.419825072886297, "grad_norm": 0.15286733210086823, "learning_rate": 1e-06, "loss": -0.0523, "num_tokens": 775920454.0, "reward": 0.707589328289032, "reward_std": 0.12287301570177078, "rewards/verify_math_reward/mean": 0.7075892686843872, "rewards/verify_math_reward/std": 0.45512405037879944, "step": 1329 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1361607142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2681.0, "completions/mean_length": 1097.44873046875, "completions/mean_terminated_length": 624.8087768554688, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 12.429154518950437, "grad_norm": 0.16014844179153442, "learning_rate": 1e-06, "loss": -0.0933, "num_tokens": 776505160.0, "reward": 0.6506696939468384, "reward_std": 0.14774663746356964, "rewards/verify_math_reward/mean": 0.6506696343421936, "rewards/verify_math_reward/std": 0.47702476382255554, "step": 1330 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1238839285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3433.0, "completions/mean_length": 1037.69873046875, "completions/mean_terminated_length": 605.2509765625, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 12.438483965014576, "grad_norm": 0.179546520113945, "learning_rate": 1e-06, "loss": -0.0963, "num_tokens": 777067834.0, "reward": 0.6964285969734192, "reward_std": 0.1568765640258789, "rewards/verify_math_reward/mean": 0.6964285969734192, "rewards/verify_math_reward/std": 0.4600566029548645, "step": 1331 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0825892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4069.0, "completions/mean_length": 865.763427734375, "completions/mean_terminated_length": 574.9635009765625, "completions/min_length": 172.0, "completions/min_terminated_length": 172.0, "epoch": 12.447813411078718, "grad_norm": 0.1690395474433899, "learning_rate": 1e-06, "loss": -0.039, "num_tokens": 777626478.0, "reward": 0.7578125596046448, "reward_std": 0.14909958839416504, "rewards/verify_math_reward/mean": 0.7578125, "rewards/verify_math_reward/std": 0.428646445274353, "step": 1332 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1607142857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3603.0, "completions/mean_length": 1169.364990234375, "completions/mean_terminated_length": 608.9454345703125, "completions/min_length": 173.0, "completions/min_terminated_length": 173.0, "epoch": 12.457142857142857, "grad_norm": 0.16161946952342987, "learning_rate": 1e-06, "loss": -0.0618, "num_tokens": 778185405.0, "reward": 0.6439732313156128, "reward_std": 0.12831632792949677, "rewards/verify_math_reward/mean": 0.6439732313156128, "rewards/verify_math_reward/std": 0.47909072041511536, "step": 1333 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1450892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3180.0, "completions/mean_length": 1130.149658203125, "completions/mean_terminated_length": 626.8068237304688, "completions/min_length": 203.0, "completions/min_terminated_length": 203.0, "epoch": 12.466472303206997, "grad_norm": 0.20989133417606354, "learning_rate": 1e-06, "loss": -0.0848, "num_tokens": 778757603.0, "reward": 0.6808035969734192, "reward_std": 0.1842365711927414, "rewards/verify_math_reward/mean": 0.6808035969734192, "rewards/verify_math_reward/std": 0.4664256274700165, "step": 1334 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0747767857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4027.0, "completions/mean_length": 845.6183471679688, "completions/mean_terminated_length": 582.9215698242188, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 12.475801749271136, "grad_norm": 0.13798098266124725, "learning_rate": 1e-06, "loss": -0.0558, "num_tokens": 779321461.0, "reward": 0.7566964626312256, "reward_std": 0.12467243522405624, "rewards/verify_math_reward/mean": 0.7566964030265808, "rewards/verify_math_reward/std": 0.4293164908885956, "step": 1335 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1104910714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3313.0, "completions/mean_length": 989.76123046875, "completions/mean_terminated_length": 603.9171752929688, "completions/min_length": 112.0, "completions/min_terminated_length": 112.0, "epoch": 12.485131195335278, "grad_norm": 0.1599721610546112, "learning_rate": 1e-06, "loss": -0.0569, "num_tokens": 779897551.0, "reward": 0.707589328289032, "reward_std": 0.15213866531848907, "rewards/verify_math_reward/mean": 0.7075892686843872, "rewards/verify_math_reward/std": 0.45512402057647705, "step": 1336 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1138392857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4001.0, "completions/mean_length": 992.7935791015625, "completions/mean_terminated_length": 594.1448364257812, "completions/min_length": 159.0, "completions/min_terminated_length": 159.0, "epoch": 12.494460641399417, "grad_norm": 0.15016426146030426, "learning_rate": 1e-06, "loss": -0.0592, "num_tokens": 780461654.0, "reward": 0.7220982313156128, "reward_std": 0.12772038578987122, "rewards/verify_math_reward/mean": 0.7220982313156128, "rewards/verify_math_reward/std": 0.44821488857269287, "step": 1337 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1026785714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3224.0, "completions/mean_length": 985.8348388671875, "completions/mean_terminated_length": 629.9452514648438, "completions/min_length": 179.0, "completions/min_terminated_length": 179.0, "epoch": 12.503790087463557, "grad_norm": 0.1614190638065338, "learning_rate": 1e-06, "loss": -0.0595, "num_tokens": 781057906.0, "reward": 0.6707589626312256, "reward_std": 0.15349344909191132, "rewards/verify_math_reward/mean": 0.6707589030265808, "rewards/verify_math_reward/std": 0.4702001214027405, "step": 1338 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1361607142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3702.0, "completions/mean_length": 1130.333740234375, "completions/mean_terminated_length": 662.8772583007812, "completions/min_length": 167.0, "completions/min_terminated_length": 167.0, "epoch": 12.513119533527696, "grad_norm": 0.1414966583251953, "learning_rate": 1e-06, "loss": -0.066, "num_tokens": 781671941.0, "reward": 0.6517857313156128, "reward_std": 0.12843577563762665, "rewards/verify_math_reward/mean": 0.6517857313156128, "rewards/verify_math_reward/std": 0.47667041420936584, "step": 1339 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1261160714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3799.0, "completions/mean_length": 1101.6138916015625, "completions/mean_terminated_length": 669.4738159179688, "completions/min_length": 121.0, "completions/min_terminated_length": 121.0, "epoch": 12.522448979591836, "grad_norm": 0.15916673839092255, "learning_rate": 1e-06, "loss": -0.0725, "num_tokens": 782286107.0, "reward": 0.6908482313156128, "reward_std": 0.16003471612930298, "rewards/verify_math_reward/mean": 0.6908482313156128, "rewards/verify_math_reward/std": 0.46240198612213135, "step": 1340 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1138392857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3153.0, "completions/mean_length": 1056.875, "completions/mean_terminated_length": 666.4584350585938, "completions/min_length": 162.0, "completions/min_terminated_length": 162.0, "epoch": 12.531778425655977, "grad_norm": 0.15327374637126923, "learning_rate": 1e-06, "loss": -0.0727, "num_tokens": 782898571.0, "reward": 0.691964328289032, "reward_std": 0.13132219016551971, "rewards/verify_math_reward/mean": 0.6919642686843872, "rewards/verify_math_reward/std": 0.4619392454624176, "step": 1341 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1171875, "completions/max_length": 4096.0, "completions/max_terminated_length": 3555.0, "completions/mean_length": 1058.51123046875, "completions/mean_terminated_length": 655.3046875, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 12.541107871720117, "grad_norm": 0.1456425040960312, "learning_rate": 1e-06, "loss": -0.0339, "num_tokens": 783505549.0, "reward": 0.6785714626312256, "reward_std": 0.10836746543645859, "rewards/verify_math_reward/mean": 0.6785714030265808, "rewards/verify_math_reward/std": 0.46728572249412537, "step": 1342 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1439732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4049.0, "completions/mean_length": 1128.040283203125, "completions/mean_terminated_length": 628.86572265625, "completions/min_length": 122.0, "completions/min_terminated_length": 122.0, "epoch": 12.550437317784256, "grad_norm": 0.17717291414737701, "learning_rate": 1e-06, "loss": -0.12, "num_tokens": 784070337.0, "reward": 0.6662946939468384, "reward_std": 0.20249111950397491, "rewards/verify_math_reward/mean": 0.6662946343421936, "rewards/verify_math_reward/std": 0.47179922461509705, "step": 1343 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1004464285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2402.0, "completions/mean_length": 934.82373046875, "completions/mean_terminated_length": 581.8386840820312, "completions/min_length": 165.0, "completions/min_terminated_length": 165.0, "epoch": 12.559766763848396, "grad_norm": 0.14286023378372192, "learning_rate": 1e-06, "loss": -0.0509, "num_tokens": 784619763.0, "reward": 0.7299107313156128, "reward_std": 0.12043054401874542, "rewards/verify_math_reward/mean": 0.7299107313156128, "rewards/verify_math_reward/std": 0.44425368309020996, "step": 1344 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1395089285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3826.0, "completions/mean_length": 1145.3035888671875, "completions/mean_terminated_length": 666.9157104492188, "completions/min_length": 163.0, "completions/min_terminated_length": 163.0, "epoch": 12.569096209912537, "grad_norm": 0.19437940418720245, "learning_rate": 1e-06, "loss": -0.0767, "num_tokens": 785227363.0, "reward": 0.6328125, "reward_std": 0.14864563941955566, "rewards/verify_math_reward/mean": 0.6328125, "rewards/verify_math_reward/std": 0.48230743408203125, "step": 1345 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0825892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4016.0, "completions/mean_length": 869.9185791015625, "completions/mean_terminated_length": 579.49267578125, "completions/min_length": 105.0, "completions/min_terminated_length": 105.0, "epoch": 12.578425655976677, "grad_norm": 0.1386461853981018, "learning_rate": 1e-06, "loss": -0.0496, "num_tokens": 785798018.0, "reward": 0.7578125596046448, "reward_std": 0.1179899051785469, "rewards/verify_math_reward/mean": 0.7578125, "rewards/verify_math_reward/std": 0.428646445274353, "step": 1346 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1060267857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3017.0, "completions/mean_length": 982.2366333007812, "completions/mean_terminated_length": 612.9388427734375, "completions/min_length": 121.0, "completions/min_terminated_length": 121.0, "epoch": 12.587755102040816, "grad_norm": 0.14988870918750763, "learning_rate": 1e-06, "loss": -0.0518, "num_tokens": 786379310.0, "reward": 0.7031250596046448, "reward_std": 0.1299756020307541, "rewards/verify_math_reward/mean": 0.703125, "rewards/verify_math_reward/std": 0.4571361541748047, "step": 1347 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1060267857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3508.0, "completions/mean_length": 971.935302734375, "completions/mean_terminated_length": 601.4157104492188, "completions/min_length": 123.0, "completions/min_terminated_length": 123.0, "epoch": 12.597084548104956, "grad_norm": 0.15420491993427277, "learning_rate": 1e-06, "loss": -0.0538, "num_tokens": 786949004.0, "reward": 0.7209821939468384, "reward_std": 0.11629742383956909, "rewards/verify_math_reward/mean": 0.7209821343421936, "rewards/verify_math_reward/std": 0.448766827583313, "step": 1348 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1261160714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3264.0, "completions/mean_length": 1083.8348388671875, "completions/mean_terminated_length": 649.1289672851562, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 12.606413994169095, "grad_norm": 0.16972795128822327, "learning_rate": 1e-06, "loss": -0.0685, "num_tokens": 787555400.0, "reward": 0.6361607313156128, "reward_std": 0.16476556658744812, "rewards/verify_math_reward/mean": 0.6361607313156128, "rewards/verify_math_reward/std": 0.4813718795776367, "step": 1349 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1227678571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3121.0, "completions/mean_length": 999.50341796875, "completions/mean_terminated_length": 566.1514282226562, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 12.615743440233237, "grad_norm": 0.1987408697605133, "learning_rate": 1e-06, "loss": -0.0782, "num_tokens": 788093763.0, "reward": 0.6808035969734192, "reward_std": 0.14650921523571014, "rewards/verify_math_reward/mean": 0.6808035969734192, "rewards/verify_math_reward/std": 0.4664256274700165, "step": 1350 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 4096.0, "completions/max_terminated_length": 2623.0, "completions/mean_length": 1067.04248046875, "completions/mean_terminated_length": 634.3341674804688, "completions/min_length": 173.0, "completions/min_terminated_length": 173.0, "epoch": 12.625072886297376, "grad_norm": 0.16759316623210907, "learning_rate": 1e-06, "loss": -0.06, "num_tokens": 788680577.0, "reward": 0.625, "reward_std": 0.15161804854869843, "rewards/verify_math_reward/mean": 0.625, "rewards/verify_math_reward/std": 0.48439329862594604, "step": 1351 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1372767857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3229.0, "completions/mean_length": 1084.805908203125, "completions/mean_terminated_length": 605.6636352539062, "completions/min_length": 162.0, "completions/min_terminated_length": 162.0, "epoch": 12.634402332361516, "grad_norm": 0.15915155410766602, "learning_rate": 1e-06, "loss": -0.0656, "num_tokens": 789236667.0, "reward": 0.6283482313156128, "reward_std": 0.12839441001415253, "rewards/verify_math_reward/mean": 0.6283482313156128, "rewards/verify_math_reward/std": 0.4835159182548523, "step": 1352 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1540178571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3880.0, "completions/mean_length": 1142.34716796875, "completions/mean_terminated_length": 604.61083984375, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 12.643731778425655, "grad_norm": 0.16895677149295807, "learning_rate": 1e-06, "loss": -0.0636, "num_tokens": 789788994.0, "reward": 0.691964328289032, "reward_std": 0.11675135791301727, "rewards/verify_math_reward/mean": 0.6919642686843872, "rewards/verify_math_reward/std": 0.4619392454624176, "step": 1353 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0904017857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3380.0, "completions/mean_length": 841.7656860351562, "completions/mean_terminated_length": 518.338623046875, "completions/min_length": 169.0, "completions/min_terminated_length": 169.0, "epoch": 12.653061224489797, "grad_norm": 0.14239118993282318, "learning_rate": 1e-06, "loss": -0.0478, "num_tokens": 790288776.0, "reward": 0.7433035969734192, "reward_std": 0.11288176476955414, "rewards/verify_math_reward/mean": 0.7433035969734192, "rewards/verify_math_reward/std": 0.43705445528030396, "step": 1354 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1026785714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3616.0, "completions/mean_length": 980.6116333007812, "completions/mean_terminated_length": 624.1243896484375, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 12.662390670553936, "grad_norm": 0.18198366463184357, "learning_rate": 1e-06, "loss": -0.0801, "num_tokens": 790883732.0, "reward": 0.691964328289032, "reward_std": 0.1643179953098297, "rewards/verify_math_reward/mean": 0.6919642686843872, "rewards/verify_math_reward/std": 0.4619392454624176, "step": 1355 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1584821428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3823.0, "completions/mean_length": 1191.4342041015625, "completions/mean_terminated_length": 644.42041015625, "completions/min_length": 116.0, "completions/min_terminated_length": 116.0, "epoch": 12.671720116618076, "grad_norm": 0.15740332007408142, "learning_rate": 1e-06, "loss": -0.0993, "num_tokens": 791471297.0, "reward": 0.6395089626312256, "reward_std": 0.1413985788822174, "rewards/verify_math_reward/mean": 0.6395089030265808, "rewards/verify_math_reward/std": 0.4804111421108246, "step": 1356 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1584821428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 4054.0, "completions/mean_length": 1186.83935546875, "completions/mean_terminated_length": 638.960205078125, "completions/min_length": 173.0, "completions/min_terminated_length": 173.0, "epoch": 12.681049562682215, "grad_norm": 0.1690467894077301, "learning_rate": 1e-06, "loss": -0.0744, "num_tokens": 792039857.0, "reward": 0.6462053656578064, "reward_std": 0.1469959318637848, "rewards/verify_math_reward/mean": 0.6462053656578064, "rewards/verify_math_reward/std": 0.478413462638855, "step": 1357 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1060267857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3684.0, "completions/mean_length": 931.4576416015625, "completions/mean_terminated_length": 556.1373291015625, "completions/min_length": 123.0, "completions/min_terminated_length": 123.0, "epoch": 12.690379008746355, "grad_norm": 0.15457387268543243, "learning_rate": 1e-06, "loss": -0.0274, "num_tokens": 792571555.0, "reward": 0.7455357313156128, "reward_std": 0.10581875592470169, "rewards/verify_math_reward/mean": 0.7455357313156128, "rewards/verify_math_reward/std": 0.4358029067516327, "step": 1358 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1004464285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3765.0, "completions/mean_length": 935.700927734375, "completions/mean_terminated_length": 582.8139038085938, "completions/min_length": 165.0, "completions/min_terminated_length": 165.0, "epoch": 12.699708454810496, "grad_norm": 0.1606937199831009, "learning_rate": 1e-06, "loss": -0.0737, "num_tokens": 793131359.0, "reward": 0.7745535969734192, "reward_std": 0.13354459404945374, "rewards/verify_math_reward/mean": 0.7745535969734192, "rewards/verify_math_reward/std": 0.41810935735702515, "step": 1359 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1227678571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3880.0, "completions/mean_length": 1071.7020263671875, "completions/mean_terminated_length": 648.4542236328125, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 12.709037900874636, "grad_norm": 0.15520969033241272, "learning_rate": 1e-06, "loss": -0.0459, "num_tokens": 793724324.0, "reward": 0.6495535969734192, "reward_std": 0.13064706325531006, "rewards/verify_math_reward/mean": 0.6495535969734192, "rewards/verify_math_reward/std": 0.477376252412796, "step": 1360 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0915178571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 2891.0, "completions/mean_length": 922.3192138671875, "completions/mean_terminated_length": 602.6117553710938, "completions/min_length": 167.0, "completions/min_terminated_length": 167.0, "epoch": 12.718367346938775, "grad_norm": 0.17056338489055634, "learning_rate": 1e-06, "loss": -0.0416, "num_tokens": 794296146.0, "reward": 0.7008928656578064, "reward_std": 0.13241805136203766, "rewards/verify_math_reward/mean": 0.7008928656578064, "rewards/verify_math_reward/std": 0.458122581243515, "step": 1361 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1305803571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3659.0, "completions/mean_length": 1150.1942138671875, "completions/mean_terminated_length": 707.756103515625, "completions/min_length": 173.0, "completions/min_terminated_length": 173.0, "epoch": 12.727696793002915, "grad_norm": 0.1725529283285141, "learning_rate": 1e-06, "loss": -0.0432, "num_tokens": 794938752.0, "reward": 0.5814732313156128, "reward_std": 0.15011100471019745, "rewards/verify_math_reward/mean": 0.5814732313156128, "rewards/verify_math_reward/std": 0.4935929775238037, "step": 1362 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.140625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3533.0, "completions/mean_length": 1127.302490234375, "completions/mean_terminated_length": 641.5155639648438, "completions/min_length": 105.0, "completions/min_terminated_length": 105.0, "epoch": 12.737026239067056, "grad_norm": 0.1569979339838028, "learning_rate": 1e-06, "loss": -0.0809, "num_tokens": 795526039.0, "reward": 0.6395089626312256, "reward_std": 0.1566508710384369, "rewards/verify_math_reward/mean": 0.6395089030265808, "rewards/verify_math_reward/std": 0.4804111421108246, "step": 1363 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0982142857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3809.0, "completions/mean_length": 966.9944458007812, "completions/mean_terminated_length": 626.2116088867188, "completions/min_length": 172.0, "completions/min_terminated_length": 172.0, "epoch": 12.746355685131196, "grad_norm": 0.17467598617076874, "learning_rate": 1e-06, "loss": -0.0522, "num_tokens": 796133290.0, "reward": 0.6718750596046448, "reward_std": 0.17374537885189056, "rewards/verify_math_reward/mean": 0.671875, "rewards/verify_math_reward/std": 0.46979284286499023, "step": 1364 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1138392857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3222.0, "completions/mean_length": 974.9375610351562, "completions/mean_terminated_length": 573.9949340820312, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 12.755685131195335, "grad_norm": 0.15648187696933746, "learning_rate": 1e-06, "loss": -0.0602, "num_tokens": 796669122.0, "reward": 0.7176339626312256, "reward_std": 0.12666912376880646, "rewards/verify_math_reward/mean": 0.7176339030265808, "rewards/verify_math_reward/std": 0.4504019320011139, "step": 1365 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1283482142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3885.0, "completions/mean_length": 1094.693115234375, "completions/mean_terminated_length": 652.75927734375, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 12.765014577259475, "grad_norm": 0.1611330360174179, "learning_rate": 1e-06, "loss": -0.057, "num_tokens": 797269383.0, "reward": 0.6774553656578064, "reward_std": 0.12192729860544205, "rewards/verify_math_reward/mean": 0.6774553656578064, "rewards/verify_math_reward/std": 0.4677111804485321, "step": 1366 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1540178571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3783.0, "completions/mean_length": 1148.703125, "completions/mean_terminated_length": 612.1240234375, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 12.774344023323614, "grad_norm": 0.14559553563594818, "learning_rate": 1e-06, "loss": -0.0435, "num_tokens": 797819461.0, "reward": 0.6875000596046448, "reward_std": 0.09416642040014267, "rewards/verify_math_reward/mean": 0.6875, "rewards/verify_math_reward/std": 0.4637712836265564, "step": 1367 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1171875, "completions/max_length": 4096.0, "completions/max_terminated_length": 3336.0, "completions/mean_length": 989.9141235351562, "completions/mean_terminated_length": 577.601806640625, "completions/min_length": 115.0, "completions/min_terminated_length": 115.0, "epoch": 12.783673469387756, "grad_norm": 0.14515119791030884, "learning_rate": 1e-06, "loss": -0.0485, "num_tokens": 798362488.0, "reward": 0.6930803656578064, "reward_std": 0.11245782673358917, "rewards/verify_math_reward/mean": 0.6930803656578064, "rewards/verify_math_reward/std": 0.46147337555885315, "step": 1368 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1049107142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2791.0, "completions/mean_length": 987.0123291015625, "completions/mean_terminated_length": 622.6172485351562, "completions/min_length": 182.0, "completions/min_terminated_length": 182.0, "epoch": 12.793002915451895, "grad_norm": 0.15400585532188416, "learning_rate": 1e-06, "loss": -0.038, "num_tokens": 798963635.0, "reward": 0.731026828289032, "reward_std": 0.1411721557378769, "rewards/verify_math_reward/mean": 0.7310267686843872, "rewards/verify_math_reward/std": 0.44367367029190063, "step": 1369 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1339285714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3830.0, "completions/mean_length": 1085.3248291015625, "completions/mean_terminated_length": 619.7564086914062, "completions/min_length": 160.0, "completions/min_terminated_length": 160.0, "epoch": 12.802332361516035, "grad_norm": 0.14347673952579498, "learning_rate": 1e-06, "loss": -0.044, "num_tokens": 799535326.0, "reward": 0.7064732313156128, "reward_std": 0.11144751310348511, "rewards/verify_math_reward/mean": 0.7064732313156128, "rewards/verify_math_reward/std": 0.4556320011615753, "step": 1370 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1383928571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3040.0, "completions/mean_length": 1125.685302734375, "completions/mean_terminated_length": 648.5880737304688, "completions/min_length": 137.0, "completions/min_terminated_length": 137.0, "epoch": 12.811661807580174, "grad_norm": 0.17199894785881042, "learning_rate": 1e-06, "loss": -0.0769, "num_tokens": 800139044.0, "reward": 0.6383928656578064, "reward_std": 0.1446651816368103, "rewards/verify_math_reward/mean": 0.6383928656578064, "rewards/verify_math_reward/std": 0.4807341992855072, "step": 1371 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1127232142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4031.0, "completions/mean_length": 1044.76123046875, "completions/mean_terminated_length": 657.1195068359375, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 12.820991253644316, "grad_norm": 0.1765907108783722, "learning_rate": 1e-06, "loss": -0.066, "num_tokens": 800754854.0, "reward": 0.6272321939468384, "reward_std": 0.16281278431415558, "rewards/verify_math_reward/mean": 0.6272321343421936, "rewards/verify_math_reward/std": 0.4838111698627472, "step": 1372 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1450892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3554.0, "completions/mean_length": 1136.5546875, "completions/mean_terminated_length": 634.2989501953125, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 12.830320699708455, "grad_norm": 0.12316008657217026, "learning_rate": 1e-06, "loss": -0.039, "num_tokens": 801337439.0, "reward": 0.6752232313156128, "reward_std": 0.08090193569660187, "rewards/verify_math_reward/mean": 0.6752232313156128, "rewards/verify_math_reward/std": 0.46855294704437256, "step": 1373 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1183035714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3276.0, "completions/mean_length": 999.1116333007812, "completions/mean_terminated_length": 583.5797729492188, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 12.839650145772595, "grad_norm": 0.16492483019828796, "learning_rate": 1e-06, "loss": -0.0626, "num_tokens": 801885483.0, "reward": 0.6897321939468384, "reward_std": 0.1338823139667511, "rewards/verify_math_reward/mean": 0.6897321343421936, "rewards/verify_math_reward/std": 0.4628615975379944, "step": 1374 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3014.0, "completions/mean_length": 937.76123046875, "completions/mean_terminated_length": 580.7428588867188, "completions/min_length": 109.0, "completions/min_terminated_length": 109.0, "epoch": 12.848979591836734, "grad_norm": 0.16988950967788696, "learning_rate": 1e-06, "loss": -0.0443, "num_tokens": 802444965.0, "reward": 0.7343750596046448, "reward_std": 0.133134126663208, "rewards/verify_math_reward/mean": 0.734375, "rewards/verify_math_reward/std": 0.44191211462020874, "step": 1375 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.109375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3596.0, "completions/mean_length": 1011.1506958007812, "completions/mean_terminated_length": 632.3095092773438, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 12.858309037900874, "grad_norm": 0.17253868281841278, "learning_rate": 1e-06, "loss": -0.0664, "num_tokens": 803037588.0, "reward": 0.6540178656578064, "reward_std": 0.16157494485378265, "rewards/verify_math_reward/mean": 0.6540178656578064, "rewards/verify_math_reward/std": 0.4759531021118164, "step": 1376 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1116071428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 4080.0, "completions/mean_length": 1051.7991943359375, "completions/mean_terminated_length": 669.36181640625, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 12.867638483965015, "grad_norm": 0.1794116348028183, "learning_rate": 1e-06, "loss": -0.0778, "num_tokens": 803655384.0, "reward": 0.7142857313156128, "reward_std": 0.17630550265312195, "rewards/verify_math_reward/mean": 0.7142857313156128, "rewards/verify_math_reward/std": 0.4520062506198883, "step": 1377 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1361607142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3605.0, "completions/mean_length": 1151.548095703125, "completions/mean_terminated_length": 687.4354248046875, "completions/min_length": 190.0, "completions/min_terminated_length": 190.0, "epoch": 12.876967930029155, "grad_norm": 0.1660955399274826, "learning_rate": 1e-06, "loss": -0.0599, "num_tokens": 804290515.0, "reward": 0.6037946939468384, "reward_std": 0.1363229602575302, "rewards/verify_math_reward/mean": 0.6037946343421936, "rewards/verify_math_reward/std": 0.48938122391700745, "step": 1378 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0825892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2397.0, "completions/mean_length": 881.24560546875, "completions/mean_terminated_length": 591.8394165039062, "completions/min_length": 162.0, "completions/min_terminated_length": 162.0, "epoch": 12.886297376093294, "grad_norm": 0.1700073927640915, "learning_rate": 1e-06, "loss": -0.0436, "num_tokens": 804873999.0, "reward": 0.7087053656578064, "reward_std": 0.12133026868104935, "rewards/verify_math_reward/mean": 0.7087053656578064, "rewards/verify_math_reward/std": 0.45461276173591614, "step": 1379 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1194196428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3873.0, "completions/mean_length": 1056.1082763671875, "completions/mean_terminated_length": 643.854248046875, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 12.895626822157434, "grad_norm": 0.16197142004966736, "learning_rate": 1e-06, "loss": -0.0584, "num_tokens": 805468488.0, "reward": 0.6808035969734192, "reward_std": 0.12644091248512268, "rewards/verify_math_reward/mean": 0.6808035969734192, "rewards/verify_math_reward/std": 0.46642565727233887, "step": 1380 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1361607142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3025.0, "completions/mean_length": 1116.5023193359375, "completions/mean_terminated_length": 646.8656616210938, "completions/min_length": 160.0, "completions/min_terminated_length": 160.0, "epoch": 12.904956268221575, "grad_norm": 0.19323408603668213, "learning_rate": 1e-06, "loss": -0.0675, "num_tokens": 806066674.0, "reward": 0.6517857313156128, "reward_std": 0.17228113114833832, "rewards/verify_math_reward/mean": 0.6517857313156128, "rewards/verify_math_reward/std": 0.47667041420936584, "step": 1381 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0747767857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3866.0, "completions/mean_length": 858.4553833007812, "completions/mean_terminated_length": 596.796142578125, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 12.914285714285715, "grad_norm": 0.16140006482601166, "learning_rate": 1e-06, "loss": -0.0609, "num_tokens": 806641962.0, "reward": 0.7209821939468384, "reward_std": 0.13527238368988037, "rewards/verify_math_reward/mean": 0.7209821343421936, "rewards/verify_math_reward/std": 0.448766827583313, "step": 1382 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0948660714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3261.0, "completions/mean_length": 898.3404541015625, "completions/mean_terminated_length": 563.197265625, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 12.923615160349854, "grad_norm": 0.21663758158683777, "learning_rate": 1e-06, "loss": -0.0501, "num_tokens": 807196603.0, "reward": 0.7433035969734192, "reward_std": 0.15800705552101135, "rewards/verify_math_reward/mean": 0.7433035969734192, "rewards/verify_math_reward/std": 0.43705442547798157, "step": 1383 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1294642857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2941.0, "completions/mean_length": 1038.4107666015625, "completions/mean_terminated_length": 583.6923217773438, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 12.932944606413994, "grad_norm": 0.16240949928760529, "learning_rate": 1e-06, "loss": -0.075, "num_tokens": 807744307.0, "reward": 0.6986607313156128, "reward_std": 0.13226580619812012, "rewards/verify_math_reward/mean": 0.6986607313156128, "rewards/verify_math_reward/std": 0.4590960443019867, "step": 1384 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3557.0, "completions/mean_length": 950.5335083007812, "completions/mean_terminated_length": 594.958984375, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 12.942274052478133, "grad_norm": 0.16597980260849, "learning_rate": 1e-06, "loss": -0.0557, "num_tokens": 808312457.0, "reward": 0.7421875596046448, "reward_std": 0.15123896300792694, "rewards/verify_math_reward/mean": 0.7421875, "rewards/verify_math_reward/std": 0.43767455220222473, "step": 1385 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 4072.0, "completions/mean_length": 991.810302734375, "completions/mean_terminated_length": 640.90185546875, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 12.951603498542275, "grad_norm": 0.18450036644935608, "learning_rate": 1e-06, "loss": -0.0523, "num_tokens": 808914599.0, "reward": 0.6796875596046448, "reward_std": 0.15612910687923431, "rewards/verify_math_reward/mean": 0.6796875, "rewards/verify_math_reward/std": 0.4668572247028351, "step": 1386 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.109375, "completions/max_length": 4096.0, "completions/max_terminated_length": 2970.0, "completions/mean_length": 932.1272583007812, "completions/mean_terminated_length": 543.5814208984375, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 12.960932944606414, "grad_norm": 0.17737141251564026, "learning_rate": 1e-06, "loss": -0.0453, "num_tokens": 809444961.0, "reward": 0.6852678656578064, "reward_std": 0.12140876054763794, "rewards/verify_math_reward/mean": 0.6852678656578064, "rewards/verify_math_reward/std": 0.46466848254203796, "step": 1387 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0870535714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3306.0, "completions/mean_length": 892.8772583007812, "completions/mean_terminated_length": 587.4450073242188, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 12.970262390670554, "grad_norm": 0.14486269652843475, "learning_rate": 1e-06, "loss": -0.0197, "num_tokens": 810012707.0, "reward": 0.6718750596046448, "reward_std": 0.10554774105548859, "rewards/verify_math_reward/mean": 0.671875, "rewards/verify_math_reward/std": 0.46979284286499023, "step": 1388 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3823.0, "completions/mean_length": 1022.1172485351562, "completions/mean_terminated_length": 582.9910888671875, "completions/min_length": 157.0, "completions/min_terminated_length": 157.0, "epoch": 12.979591836734693, "grad_norm": 0.16290737688541412, "learning_rate": 1e-06, "loss": -0.0611, "num_tokens": 810560988.0, "reward": 0.6785714626312256, "reward_std": 0.10772695392370224, "rewards/verify_math_reward/mean": 0.6785714030265808, "rewards/verify_math_reward/std": 0.46728572249412537, "step": 1389 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1328125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3630.0, "completions/mean_length": 1108.2176513671875, "completions/mean_terminated_length": 650.6293334960938, "completions/min_length": 117.0, "completions/min_terminated_length": 117.0, "epoch": 12.988921282798835, "grad_norm": 0.16302025318145752, "learning_rate": 1e-06, "loss": -0.0589, "num_tokens": 811155095.0, "reward": 0.6662946939468384, "reward_std": 0.13200436532497406, "rewards/verify_math_reward/mean": 0.6662946343421936, "rewards/verify_math_reward/std": 0.47179925441741943, "step": 1390 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.11931818181818177, "completions/max_length": 4096.0, "completions/max_terminated_length": 3962.0, "completions/mean_length": 1054.977294921875, "completions/mean_terminated_length": 642.9677124023438, "completions/min_length": 159.0, "completions/min_terminated_length": 159.0, "epoch": 12.998250728862974, "grad_norm": 0.13653789460659027, "learning_rate": 1e-06, "loss": -0.0568, "num_tokens": 811746142.0, "reward": 0.6696428656578064, "reward_std": 0.10843275487422943, "rewards/verify_math_reward/mean": 0.6696428656578064, "rewards/verify_math_reward/std": 0.47060438990592957, "step": 1391 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0825892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3752.0, "completions/mean_length": 895.8449096679688, "completions/mean_terminated_length": 607.7530517578125, "completions/min_length": 158.0, "completions/min_terminated_length": 158.0, "epoch": 13.00932944606414, "grad_norm": 0.1684548407793045, "learning_rate": 1e-06, "loss": -0.0389, "num_tokens": 812334363.0, "reward": 0.6752232313156128, "reward_std": 0.15364569425582886, "rewards/verify_math_reward/mean": 0.6752232313156128, "rewards/verify_math_reward/std": 0.46855294704437256, "step": 1392 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0837053571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 4058.0, "completions/mean_length": 921.6116333007812, "completions/mean_terminated_length": 631.6248779296875, "completions/min_length": 170.0, "completions/min_terminated_length": 170.0, "epoch": 13.018658892128279, "grad_norm": 0.14841940999031067, "learning_rate": 1e-06, "loss": -0.0493, "num_tokens": 812932295.0, "reward": 0.7511160969734192, "reward_std": 0.12967318296432495, "rewards/verify_math_reward/mean": 0.7511160969734192, "rewards/verify_math_reward/std": 0.43260788917541504, "step": 1393 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1305803571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3050.0, "completions/mean_length": 1115.2723388671875, "completions/mean_terminated_length": 667.5892333984375, "completions/min_length": 156.0, "completions/min_terminated_length": 156.0, "epoch": 13.02798833819242, "grad_norm": 0.16736483573913574, "learning_rate": 1e-06, "loss": -0.0389, "num_tokens": 813542875.0, "reward": 0.59375, "reward_std": 0.14169208705425262, "rewards/verify_math_reward/mean": 0.59375, "rewards/verify_math_reward/std": 0.4914066195487976, "step": 1394 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1037946428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 4096.0, "completions/mean_length": 935.89404296875, "completions/mean_terminated_length": 569.9041137695312, "completions/min_length": 109.0, "completions/min_terminated_length": 109.0, "epoch": 13.03731778425656, "grad_norm": 0.1944524496793747, "learning_rate": 1e-06, "loss": -0.0419, "num_tokens": 814088036.0, "reward": 0.6986607313156128, "reward_std": 0.12711238861083984, "rewards/verify_math_reward/mean": 0.6986607313156128, "rewards/verify_math_reward/std": 0.4590960443019867, "step": 1395 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 4096.0, "completions/max_terminated_length": 4048.0, "completions/mean_length": 1033.958740234375, "completions/mean_terminated_length": 596.5242309570312, "completions/min_length": 159.0, "completions/min_terminated_length": 159.0, "epoch": 13.0466472303207, "grad_norm": 0.15413354337215424, "learning_rate": 1e-06, "loss": -0.0942, "num_tokens": 814647751.0, "reward": 0.691964328289032, "reward_std": 0.12749329209327698, "rewards/verify_math_reward/mean": 0.6919642686843872, "rewards/verify_math_reward/std": 0.4619392454624176, "step": 1396 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0814732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2966.0, "completions/mean_length": 846.9766235351562, "completions/mean_terminated_length": 558.78857421875, "completions/min_length": 98.0, "completions/min_terminated_length": 98.0, "epoch": 13.055976676384839, "grad_norm": 0.15785396099090576, "learning_rate": 1e-06, "loss": -0.0572, "num_tokens": 815200586.0, "reward": 0.754464328289032, "reward_std": 0.1263321340084076, "rewards/verify_math_reward/mean": 0.7544642686843872, "rewards/verify_math_reward/std": 0.4306447505950928, "step": 1397 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0770089285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3236.0, "completions/mean_length": 838.9967041015625, "completions/mean_terminated_length": 567.2515258789062, "completions/min_length": 156.0, "completions/min_terminated_length": 156.0, "epoch": 13.06530612244898, "grad_norm": 0.1844933182001114, "learning_rate": 1e-06, "loss": -0.0308, "num_tokens": 815747591.0, "reward": 0.7812500596046448, "reward_std": 0.14556418359279633, "rewards/verify_math_reward/mean": 0.78125, "rewards/verify_math_reward/std": 0.41362953186035156, "step": 1398 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1127232142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2729.0, "completions/mean_length": 996.3449096679688, "completions/mean_terminated_length": 602.5521850585938, "completions/min_length": 178.0, "completions/min_terminated_length": 178.0, "epoch": 13.07463556851312, "grad_norm": 0.14525890350341797, "learning_rate": 1e-06, "loss": -0.0599, "num_tokens": 816315044.0, "reward": 0.7142857313156128, "reward_std": 0.12583360075950623, "rewards/verify_math_reward/mean": 0.7142857313156128, "rewards/verify_math_reward/std": 0.4520062506198883, "step": 1399 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1037946428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3853.0, "completions/mean_length": 975.450927734375, "completions/mean_terminated_length": 614.0422973632812, "completions/min_length": 173.0, "completions/min_terminated_length": 173.0, "epoch": 13.08396501457726, "grad_norm": 0.1412966400384903, "learning_rate": 1e-06, "loss": -0.0654, "num_tokens": 816905056.0, "reward": 0.699776828289032, "reward_std": 0.11283759027719498, "rewards/verify_math_reward/mean": 0.6997767686843872, "rewards/verify_math_reward/std": 0.4586109220981598, "step": 1400 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1037946428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3544.0, "completions/mean_length": 983.53466796875, "completions/mean_terminated_length": 623.062255859375, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 13.093294460641399, "grad_norm": 0.18408381938934326, "learning_rate": 1e-06, "loss": -0.0631, "num_tokens": 817487207.0, "reward": 0.691964328289032, "reward_std": 0.14045649766921997, "rewards/verify_math_reward/mean": 0.6919642686843872, "rewards/verify_math_reward/std": 0.4619392454624176, "step": 1401 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0837053571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3700.0, "completions/mean_length": 898.67529296875, "completions/mean_terminated_length": 606.5932006835938, "completions/min_length": 167.0, "completions/min_terminated_length": 167.0, "epoch": 13.102623906705539, "grad_norm": 0.16722357273101807, "learning_rate": 1e-06, "loss": -0.0437, "num_tokens": 818077444.0, "reward": 0.7332589626312256, "reward_std": 0.1280987709760666, "rewards/verify_math_reward/mean": 0.7332589030265808, "rewards/verify_math_reward/std": 0.4425028860569, "step": 1402 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1082589285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3756.0, "completions/mean_length": 969.7254638671875, "completions/mean_terminated_length": 590.1902465820312, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 13.11195335276968, "grad_norm": 0.17252375185489655, "learning_rate": 1e-06, "loss": -0.0413, "num_tokens": 818644366.0, "reward": 0.6551339626312256, "reward_std": 0.1385364830493927, "rewards/verify_math_reward/mean": 0.6551339030265808, "rewards/verify_math_reward/std": 0.4755900800228119, "step": 1403 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.140625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3850.0, "completions/mean_length": 1111.3773193359375, "completions/mean_terminated_length": 622.984375, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 13.12128279883382, "grad_norm": 0.16588982939720154, "learning_rate": 1e-06, "loss": -0.0521, "num_tokens": 819215584.0, "reward": 0.6741071939468384, "reward_std": 0.13387978076934814, "rewards/verify_math_reward/mean": 0.6741071343421936, "rewards/verify_math_reward/std": 0.4689692556858063, "step": 1404 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1551339285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2689.0, "completions/mean_length": 1221.2991943359375, "completions/mean_terminated_length": 693.4478149414062, "completions/min_length": 171.0, "completions/min_terminated_length": 171.0, "epoch": 13.130612244897959, "grad_norm": 0.14792756736278534, "learning_rate": 1e-06, "loss": -0.0558, "num_tokens": 819836444.0, "reward": 0.6026785969734192, "reward_std": 0.14838281273841858, "rewards/verify_math_reward/mean": 0.6026785969734192, "rewards/verify_math_reward/std": 0.48961687088012695, "step": 1405 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 4055.0, "completions/mean_length": 996.1785888671875, "completions/mean_terminated_length": 645.7639770507812, "completions/min_length": 157.0, "completions/min_terminated_length": 157.0, "epoch": 13.139941690962099, "grad_norm": 0.15390346944332123, "learning_rate": 1e-06, "loss": -0.0838, "num_tokens": 820450428.0, "reward": 0.7031250596046448, "reward_std": 0.16604435443878174, "rewards/verify_math_reward/mean": 0.703125, "rewards/verify_math_reward/std": 0.4571361541748047, "step": 1406 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0837053571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3018.0, "completions/mean_length": 875.4486694335938, "completions/mean_terminated_length": 581.244873046875, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 13.14927113702624, "grad_norm": 0.17589004337787628, "learning_rate": 1e-06, "loss": -0.0609, "num_tokens": 821008510.0, "reward": 0.7366071939468384, "reward_std": 0.14838533103466034, "rewards/verify_math_reward/mean": 0.7366071343421936, "rewards/verify_math_reward/std": 0.44071969389915466, "step": 1407 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 4096.0, "completions/max_terminated_length": 2914.0, "completions/mean_length": 1055.90966796875, "completions/mean_terminated_length": 621.6109619140625, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 13.15860058309038, "grad_norm": 0.15691091120243073, "learning_rate": 1e-06, "loss": -0.0646, "num_tokens": 821589757.0, "reward": 0.6662946939468384, "reward_std": 0.13410800695419312, "rewards/verify_math_reward/mean": 0.6662946343421936, "rewards/verify_math_reward/std": 0.47179922461509705, "step": 1408 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1026785714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3801.0, "completions/mean_length": 955.224365234375, "completions/mean_terminated_length": 595.8320922851562, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 13.167930029154519, "grad_norm": 0.14748361706733704, "learning_rate": 1e-06, "loss": -0.0478, "num_tokens": 822175398.0, "reward": 0.6339285969734192, "reward_std": 0.11144820600748062, "rewards/verify_math_reward/mean": 0.6339285969734192, "rewards/verify_math_reward/std": 0.48199835419654846, "step": 1409 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3712.0, "completions/mean_length": 907.6998291015625, "completions/mean_terminated_length": 577.8756103515625, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 13.177259475218658, "grad_norm": 0.14740511775016785, "learning_rate": 1e-06, "loss": -0.0557, "num_tokens": 822738833.0, "reward": 0.7812500596046448, "reward_std": 0.11257615685462952, "rewards/verify_math_reward/mean": 0.78125, "rewards/verify_math_reward/std": 0.41362953186035156, "step": 1410 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0837053571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3468.0, "completions/mean_length": 849.0692138671875, "completions/mean_terminated_length": 552.45556640625, "completions/min_length": 119.0, "completions/min_terminated_length": 119.0, "epoch": 13.186588921282798, "grad_norm": 0.17583271861076355, "learning_rate": 1e-06, "loss": -0.0719, "num_tokens": 823276783.0, "reward": 0.7845982313156128, "reward_std": 0.1518462747335434, "rewards/verify_math_reward/mean": 0.7845982313156128, "rewards/verify_math_reward/std": 0.4113304018974304, "step": 1411 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1183035714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3816.0, "completions/mean_length": 1038.404052734375, "completions/mean_terminated_length": 628.144287109375, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 13.19591836734694, "grad_norm": 0.17455369234085083, "learning_rate": 1e-06, "loss": -0.0501, "num_tokens": 823867865.0, "reward": 0.6573660969734192, "reward_std": 0.15567517280578613, "rewards/verify_math_reward/mean": 0.6573660969734192, "rewards/verify_math_reward/std": 0.47485533356666565, "step": 1412 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0982142857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3561.0, "completions/mean_length": 933.5859985351562, "completions/mean_terminated_length": 589.1646118164062, "completions/min_length": 133.0, "completions/min_terminated_length": 133.0, "epoch": 13.205247813411079, "grad_norm": 0.14539392292499542, "learning_rate": 1e-06, "loss": -0.0689, "num_tokens": 824433182.0, "reward": 0.691964328289032, "reward_std": 0.12418389320373535, "rewards/verify_math_reward/mean": 0.6919642686843872, "rewards/verify_math_reward/std": 0.4619392454624176, "step": 1413 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.15625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3761.0, "completions/mean_length": 1150.328125, "completions/mean_terminated_length": 604.8333129882812, "completions/min_length": 110.0, "completions/min_terminated_length": 110.0, "epoch": 13.214577259475218, "grad_norm": 0.17322084307670593, "learning_rate": 1e-06, "loss": -0.0611, "num_tokens": 824979132.0, "reward": 0.6517857313156128, "reward_std": 0.14079168438911438, "rewards/verify_math_reward/mean": 0.6517857313156128, "rewards/verify_math_reward/std": 0.47667041420936584, "step": 1414 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1283482142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2682.0, "completions/mean_length": 1060.2701416015625, "completions/mean_terminated_length": 613.2676391601562, "completions/min_length": 164.0, "completions/min_terminated_length": 164.0, "epoch": 13.223906705539358, "grad_norm": 0.1663852334022522, "learning_rate": 1e-06, "loss": -0.0528, "num_tokens": 825554590.0, "reward": 0.6808035969734192, "reward_std": 0.14458851516246796, "rewards/verify_math_reward/mean": 0.6808035969734192, "rewards/verify_math_reward/std": 0.4664256274700165, "step": 1415 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1049107142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3395.0, "completions/mean_length": 928.8516235351562, "completions/mean_terminated_length": 557.6396484375, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 13.2332361516035, "grad_norm": 0.17487259209156036, "learning_rate": 1e-06, "loss": -0.081, "num_tokens": 826097681.0, "reward": 0.6886160969734192, "reward_std": 0.14139604568481445, "rewards/verify_math_reward/mean": 0.6886160969734192, "rewards/verify_math_reward/std": 0.46331802010536194, "step": 1416 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0837053571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 2846.0, "completions/mean_length": 908.1138916015625, "completions/mean_terminated_length": 616.89404296875, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 13.242565597667639, "grad_norm": 0.16165295243263245, "learning_rate": 1e-06, "loss": -0.0461, "num_tokens": 826684391.0, "reward": 0.7053571939468384, "reward_std": 0.13380561769008636, "rewards/verify_math_reward/mean": 0.7053571343421936, "rewards/verify_math_reward/std": 0.45613664388656616, "step": 1417 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0948660714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3473.0, "completions/mean_length": 953.974365234375, "completions/mean_terminated_length": 624.662109375, "completions/min_length": 153.0, "completions/min_terminated_length": 153.0, "epoch": 13.251895043731778, "grad_norm": 0.14920338988304138, "learning_rate": 1e-06, "loss": -0.0528, "num_tokens": 827284216.0, "reward": 0.7198660969734192, "reward_std": 0.13373075425624847, "rewards/verify_math_reward/mean": 0.7198660969734192, "rewards/verify_math_reward/std": 0.44931530952453613, "step": 1418 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0792410714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3311.0, "completions/mean_length": 935.3761596679688, "completions/mean_terminated_length": 663.3709106445312, "completions/min_length": 169.0, "completions/min_terminated_length": 169.0, "epoch": 13.261224489795918, "grad_norm": 0.21821512281894684, "learning_rate": 1e-06, "loss": -0.0246, "num_tokens": 827920321.0, "reward": 0.6774553656578064, "reward_std": 0.1482323855161667, "rewards/verify_math_reward/mean": 0.6774553656578064, "rewards/verify_math_reward/std": 0.4677111804485321, "step": 1419 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1194196428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3519.0, "completions/mean_length": 979.3527221679688, "completions/mean_terminated_length": 556.689453125, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 13.270553935860057, "grad_norm": 0.18146897852420807, "learning_rate": 1e-06, "loss": -0.0551, "num_tokens": 828454429.0, "reward": 0.7064732313156128, "reward_std": 0.1554897278547287, "rewards/verify_math_reward/mean": 0.7064732313156128, "rewards/verify_math_reward/std": 0.4556320011615753, "step": 1420 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0792410714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3690.0, "completions/mean_length": 849.8895263671875, "completions/mean_terminated_length": 570.5272827148438, "completions/min_length": 166.0, "completions/min_terminated_length": 166.0, "epoch": 13.279883381924199, "grad_norm": 0.1580151617527008, "learning_rate": 1e-06, "loss": -0.0229, "num_tokens": 829005962.0, "reward": 0.6830357313156128, "reward_std": 0.10554774850606918, "rewards/verify_math_reward/mean": 0.6830357313156128, "rewards/verify_math_reward/std": 0.46555325388908386, "step": 1421 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0770089285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 4048.0, "completions/mean_length": 897.138427734375, "completions/mean_terminated_length": 630.2442626953125, "completions/min_length": 115.0, "completions/min_terminated_length": 115.0, "epoch": 13.289212827988338, "grad_norm": 0.1608230322599411, "learning_rate": 1e-06, "loss": -0.0536, "num_tokens": 829619246.0, "reward": 0.7176339626312256, "reward_std": 0.14771202206611633, "rewards/verify_math_reward/mean": 0.7176339030265808, "rewards/verify_math_reward/std": 0.4504019320011139, "step": 1422 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0870535714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3960.0, "completions/mean_length": 892.9810791015625, "completions/mean_terminated_length": 587.5587158203125, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 13.298542274052478, "grad_norm": 0.1555834412574768, "learning_rate": 1e-06, "loss": -0.0328, "num_tokens": 830189197.0, "reward": 0.7087053656578064, "reward_std": 0.12569092214107513, "rewards/verify_math_reward/mean": 0.7087053656578064, "rewards/verify_math_reward/std": 0.45461276173591614, "step": 1423 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1127232142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3872.0, "completions/mean_length": 1030.685302734375, "completions/mean_terminated_length": 641.2553100585938, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 13.307871720116617, "grad_norm": 0.15356653928756714, "learning_rate": 1e-06, "loss": -0.0609, "num_tokens": 830792419.0, "reward": 0.6718750596046448, "reward_std": 0.12456478923559189, "rewards/verify_math_reward/mean": 0.671875, "rewards/verify_math_reward/std": 0.46979284286499023, "step": 1424 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1071428571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3138.0, "completions/mean_length": 998.91748046875, "completions/mean_terminated_length": 627.2674560546875, "completions/min_length": 133.0, "completions/min_terminated_length": 133.0, "epoch": 13.317201166180759, "grad_norm": 0.17770841717720032, "learning_rate": 1e-06, "loss": -0.0504, "num_tokens": 831374457.0, "reward": 0.6417410969734192, "reward_std": 0.16908234357833862, "rewards/verify_math_reward/mean": 0.6417410969734192, "rewards/verify_math_reward/std": 0.47975659370422363, "step": 1425 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1138392857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3226.0, "completions/mean_length": 1007.7969360351562, "completions/mean_terminated_length": 611.0755615234375, "completions/min_length": 110.0, "completions/min_terminated_length": 110.0, "epoch": 13.326530612244898, "grad_norm": 0.14215829968452454, "learning_rate": 1e-06, "loss": -0.0463, "num_tokens": 831944227.0, "reward": 0.7265625596046448, "reward_std": 0.12651436030864716, "rewards/verify_math_reward/mean": 0.7265625, "rewards/verify_math_reward/std": 0.4459724426269531, "step": 1426 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3535.0, "completions/mean_length": 1112.1663818359375, "completions/mean_terminated_length": 685.904296875, "completions/min_length": 117.0, "completions/min_terminated_length": 117.0, "epoch": 13.335860058309038, "grad_norm": 0.17089352011680603, "learning_rate": 1e-06, "loss": -0.0852, "num_tokens": 832568840.0, "reward": 0.6696428656578064, "reward_std": 0.15852628648281097, "rewards/verify_math_reward/mean": 0.6696428656578064, "rewards/verify_math_reward/std": 0.47060438990592957, "step": 1427 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0725446428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3207.0, "completions/mean_length": 888.7422485351562, "completions/mean_terminated_length": 637.8736572265625, "completions/min_length": 118.0, "completions/min_terminated_length": 118.0, "epoch": 13.345189504373177, "grad_norm": 0.17929132282733917, "learning_rate": 1e-06, "loss": -0.0526, "num_tokens": 833187193.0, "reward": 0.6953125596046448, "reward_std": 0.15654322504997253, "rewards/verify_math_reward/mean": 0.6953125, "rewards/verify_math_reward/std": 0.4605320394039154, "step": 1428 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0993303571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 2486.0, "completions/mean_length": 929.239990234375, "completions/mean_terminated_length": 579.9938354492188, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 13.354518950437317, "grad_norm": 0.1622866839170456, "learning_rate": 1e-06, "loss": -0.0746, "num_tokens": 833737184.0, "reward": 0.7500000596046448, "reward_std": 0.1112217977643013, "rewards/verify_math_reward/mean": 0.75, "rewards/verify_math_reward/std": 0.43325456976890564, "step": 1429 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1060267857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3710.0, "completions/mean_length": 965.6194458007812, "completions/mean_terminated_length": 594.3507690429688, "completions/min_length": 161.0, "completions/min_terminated_length": 161.0, "epoch": 13.363848396501458, "grad_norm": 0.1701904684305191, "learning_rate": 1e-06, "loss": -0.0371, "num_tokens": 834293323.0, "reward": 0.6785714626312256, "reward_std": 0.15390853583812714, "rewards/verify_math_reward/mean": 0.6785714030265808, "rewards/verify_math_reward/std": 0.46728572249412537, "step": 1430 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1004464285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2721.0, "completions/mean_length": 885.99560546875, "completions/mean_terminated_length": 527.5582885742188, "completions/min_length": 122.0, "completions/min_terminated_length": 122.0, "epoch": 13.373177842565598, "grad_norm": 0.12884975969791412, "learning_rate": 1e-06, "loss": -0.0665, "num_tokens": 834813631.0, "reward": 0.7399553656578064, "reward_std": 0.11498472094535828, "rewards/verify_math_reward/mean": 0.7399553656578064, "rewards/verify_math_reward/std": 0.43890365958213806, "step": 1431 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1138392857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3674.0, "completions/mean_length": 1021.4620971679688, "completions/mean_terminated_length": 626.4962158203125, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 13.382507288629737, "grad_norm": 0.1437826305627823, "learning_rate": 1e-06, "loss": -0.0391, "num_tokens": 835403077.0, "reward": 0.6618303656578064, "reward_std": 0.12726393342018127, "rewards/verify_math_reward/mean": 0.6618303656578064, "rewards/verify_math_reward/std": 0.4733508229255676, "step": 1432 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0970982142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3291.0, "completions/mean_length": 914.6741333007812, "completions/mean_terminated_length": 572.5537719726562, "completions/min_length": 170.0, "completions/min_terminated_length": 170.0, "epoch": 13.391836734693877, "grad_norm": 0.17380942404270172, "learning_rate": 1e-06, "loss": -0.0476, "num_tokens": 835960745.0, "reward": 0.7198660969734192, "reward_std": 0.15161871910095215, "rewards/verify_math_reward/mean": 0.7198660969734192, "rewards/verify_math_reward/std": 0.44931530952453613, "step": 1433 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0904017857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3736.0, "completions/mean_length": 881.3058471679688, "completions/mean_terminated_length": 561.80859375, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 13.401166180758018, "grad_norm": 0.15893049538135529, "learning_rate": 1e-06, "loss": -0.058, "num_tokens": 836510995.0, "reward": 0.738839328289032, "reward_std": 0.1144213080406189, "rewards/verify_math_reward/mean": 0.7388392686843872, "rewards/verify_math_reward/std": 0.439512699842453, "step": 1434 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0736607142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3607.0, "completions/mean_length": 854.0067138671875, "completions/mean_terminated_length": 596.2096557617188, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 13.410495626822158, "grad_norm": 0.14369086921215057, "learning_rate": 1e-06, "loss": -0.0541, "num_tokens": 837085057.0, "reward": 0.785714328289032, "reward_std": 0.14083515107631683, "rewards/verify_math_reward/mean": 0.7857142686843872, "rewards/verify_math_reward/std": 0.41055506467819214, "step": 1435 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1004464285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3175.0, "completions/mean_length": 951.8516235351562, "completions/mean_terminated_length": 600.7680053710938, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 13.419825072886297, "grad_norm": 0.1766524761915207, "learning_rate": 1e-06, "loss": -0.0494, "num_tokens": 837667964.0, "reward": 0.6986607313156128, "reward_std": 0.13075695931911469, "rewards/verify_math_reward/mean": 0.6986607313156128, "rewards/verify_math_reward/std": 0.4590960443019867, "step": 1436 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0915178571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3731.0, "completions/mean_length": 959.271240234375, "completions/mean_terminated_length": 643.2862548828125, "completions/min_length": 182.0, "completions/min_terminated_length": 182.0, "epoch": 13.429154518950437, "grad_norm": 0.1514643430709839, "learning_rate": 1e-06, "loss": -0.0578, "num_tokens": 838278415.0, "reward": 0.7020089626312256, "reward_std": 0.14560948312282562, "rewards/verify_math_reward/mean": 0.7020089030265808, "rewards/verify_math_reward/std": 0.45763099193573, "step": 1437 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1004464285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3858.0, "completions/mean_length": 929.825927734375, "completions/mean_terminated_length": 576.2828979492188, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 13.438483965014576, "grad_norm": 0.15670251846313477, "learning_rate": 1e-06, "loss": -0.0727, "num_tokens": 838834467.0, "reward": 0.7254464626312256, "reward_std": 0.1256481409072876, "rewards/verify_math_reward/mean": 0.7254464030265808, "rewards/verify_math_reward/std": 0.4465382993221283, "step": 1438 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1350446428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3390.0, "completions/mean_length": 1107.852783203125, "completions/mean_terminated_length": 641.316162109375, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 13.447813411078718, "grad_norm": 0.14905303716659546, "learning_rate": 1e-06, "loss": -0.0463, "num_tokens": 839427023.0, "reward": 0.65625, "reward_std": 0.1370062381029129, "rewards/verify_math_reward/mean": 0.65625, "rewards/verify_math_reward/std": 0.4752241373062134, "step": 1439 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.109375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3549.0, "completions/mean_length": 1020.1652221679688, "completions/mean_terminated_length": 642.4310913085938, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 13.457142857142857, "grad_norm": 0.1549973040819168, "learning_rate": 1e-06, "loss": -0.0441, "num_tokens": 840023035.0, "reward": 0.7343750596046448, "reward_std": 0.1150607094168663, "rewards/verify_math_reward/mean": 0.734375, "rewards/verify_math_reward/std": 0.44191211462020874, "step": 1440 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1305803571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3567.0, "completions/mean_length": 1083.302490234375, "completions/mean_terminated_length": 630.8177490234375, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 13.466472303206997, "grad_norm": 0.1533796489238739, "learning_rate": 1e-06, "loss": -0.0441, "num_tokens": 840606146.0, "reward": 0.6696428656578064, "reward_std": 0.1196403056383133, "rewards/verify_math_reward/mean": 0.6696428656578064, "rewards/verify_math_reward/std": 0.47060438990592957, "step": 1441 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1261160714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3679.0, "completions/mean_length": 1055.1429443359375, "completions/mean_terminated_length": 616.2962646484375, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 13.475801749271136, "grad_norm": 0.14904369413852692, "learning_rate": 1e-06, "loss": -0.0299, "num_tokens": 841175442.0, "reward": 0.6897321939468384, "reward_std": 0.11509279161691666, "rewards/verify_math_reward/mean": 0.6897321343421936, "rewards/verify_math_reward/std": 0.4628615975379944, "step": 1442 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1026785714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3493.0, "completions/mean_length": 964.5011596679688, "completions/mean_terminated_length": 606.17041015625, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 13.485131195335278, "grad_norm": 0.17566823959350586, "learning_rate": 1e-06, "loss": -0.0528, "num_tokens": 841762619.0, "reward": 0.6941964626312256, "reward_std": 0.12223109602928162, "rewards/verify_math_reward/mean": 0.6941964030265808, "rewards/verify_math_reward/std": 0.4610042870044708, "step": 1443 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1138392857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2613.0, "completions/mean_length": 997.9442138671875, "completions/mean_terminated_length": 599.9571533203125, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 13.494460641399417, "grad_norm": 0.16535988450050354, "learning_rate": 1e-06, "loss": -0.0717, "num_tokens": 842327073.0, "reward": 0.7354910969734192, "reward_std": 0.15297168493270874, "rewards/verify_math_reward/mean": 0.7354910969734192, "rewards/verify_math_reward/std": 0.44131770730018616, "step": 1444 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1082589285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3836.0, "completions/mean_length": 994.22998046875, "completions/mean_terminated_length": 617.6696166992188, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 13.503790087463557, "grad_norm": 0.14937201142311096, "learning_rate": 1e-06, "loss": -0.0528, "num_tokens": 842911535.0, "reward": 0.6986607313156128, "reward_std": 0.12324914336204529, "rewards/verify_math_reward/mean": 0.6986607313156128, "rewards/verify_math_reward/std": 0.4590960443019867, "step": 1445 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1216517857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3827.0, "completions/mean_length": 1036.6273193359375, "completions/mean_terminated_length": 612.9021606445312, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 13.513119533527696, "grad_norm": 0.1665908843278885, "learning_rate": 1e-06, "loss": -0.0476, "num_tokens": 843477753.0, "reward": 0.6484375, "reward_std": 0.10960599780082703, "rewards/verify_math_reward/mean": 0.6484375, "rewards/verify_math_reward/std": 0.4777248501777649, "step": 1446 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1272321428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3296.0, "completions/mean_length": 1044.5413818359375, "completions/mean_terminated_length": 599.699462890625, "completions/min_length": 156.0, "completions/min_terminated_length": 156.0, "epoch": 13.522448979591836, "grad_norm": 0.14764945209026337, "learning_rate": 1e-06, "loss": -0.0493, "num_tokens": 844048662.0, "reward": 0.6774553656578064, "reward_std": 0.11591833829879761, "rewards/verify_math_reward/mean": 0.6774553656578064, "rewards/verify_math_reward/std": 0.4677111804485321, "step": 1447 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1037946428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3980.0, "completions/mean_length": 967.3404541015625, "completions/mean_terminated_length": 604.9924926757812, "completions/min_length": 118.0, "completions/min_terminated_length": 118.0, "epoch": 13.531778425655977, "grad_norm": 0.12813781201839447, "learning_rate": 1e-06, "loss": -0.0178, "num_tokens": 844615471.0, "reward": 0.7176339626312256, "reward_std": 0.09453663229942322, "rewards/verify_math_reward/mean": 0.7176339030265808, "rewards/verify_math_reward/std": 0.4504019320011139, "step": 1448 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0892857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3590.0, "completions/mean_length": 892.5636596679688, "completions/mean_terminated_length": 578.501220703125, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 13.541107871720117, "grad_norm": 0.13819770514965057, "learning_rate": 1e-06, "loss": -0.0672, "num_tokens": 845177208.0, "reward": 0.731026828289032, "reward_std": 0.11858721822500229, "rewards/verify_math_reward/mean": 0.7310267686843872, "rewards/verify_math_reward/std": 0.44367367029190063, "step": 1449 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1082589285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2395.0, "completions/mean_length": 965.0960083007812, "completions/mean_terminated_length": 584.998779296875, "completions/min_length": 120.0, "completions/min_terminated_length": 120.0, "epoch": 13.550437317784256, "grad_norm": 0.13451385498046875, "learning_rate": 1e-06, "loss": -0.0218, "num_tokens": 845745110.0, "reward": 0.6975446939468384, "reward_std": 0.09337659925222397, "rewards/verify_math_reward/mean": 0.6975446343421936, "rewards/verify_math_reward/std": 0.45957788825035095, "step": 1450 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1060267857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3837.0, "completions/mean_length": 968.864990234375, "completions/mean_terminated_length": 597.9812622070312, "completions/min_length": 158.0, "completions/min_terminated_length": 158.0, "epoch": 13.559766763848396, "grad_norm": 0.1683139204978943, "learning_rate": 1e-06, "loss": -0.0743, "num_tokens": 846304357.0, "reward": 0.7243303656578064, "reward_std": 0.1473347544670105, "rewards/verify_math_reward/mean": 0.7243303656578064, "rewards/verify_math_reward/std": 0.4471006691455841, "step": 1451 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1462053571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3013.0, "completions/mean_length": 1088.844970703125, "completions/mean_terminated_length": 573.8941650390625, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 13.569096209912537, "grad_norm": 0.19807444512844086, "learning_rate": 1e-06, "loss": -0.0701, "num_tokens": 846826618.0, "reward": 0.7243303656578064, "reward_std": 0.14060692489147186, "rewards/verify_math_reward/mean": 0.7243303656578064, "rewards/verify_math_reward/std": 0.4471006691455841, "step": 1452 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.109375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3370.0, "completions/mean_length": 969.638427734375, "completions/mean_terminated_length": 585.69921875, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 13.578425655976677, "grad_norm": 0.16037462651729584, "learning_rate": 1e-06, "loss": -0.0825, "num_tokens": 847380254.0, "reward": 0.7176339626312256, "reward_std": 0.14448042213916779, "rewards/verify_math_reward/mean": 0.7176339030265808, "rewards/verify_math_reward/std": 0.4504019320011139, "step": 1453 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.109375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3158.0, "completions/mean_length": 969.98779296875, "completions/mean_terminated_length": 586.0914916992188, "completions/min_length": 171.0, "completions/min_terminated_length": 171.0, "epoch": 13.587755102040816, "grad_norm": 0.15379683673381805, "learning_rate": 1e-06, "loss": -0.0293, "num_tokens": 847941467.0, "reward": 0.6886160969734192, "reward_std": 0.1311374455690384, "rewards/verify_math_reward/mean": 0.6886160969734192, "rewards/verify_math_reward/std": 0.46331802010536194, "step": 1454 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1116071428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3250.0, "completions/mean_length": 987.4688110351562, "completions/mean_terminated_length": 596.9497680664062, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 13.597084548104956, "grad_norm": 0.15186743438243866, "learning_rate": 1e-06, "loss": -0.0819, "num_tokens": 848512911.0, "reward": 0.6752232313156128, "reward_std": 0.14402256906032562, "rewards/verify_math_reward/mean": 0.6752232313156128, "rewards/verify_math_reward/std": 0.46855294704437256, "step": 1455 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1227678571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3168.0, "completions/mean_length": 1055.6082763671875, "completions/mean_terminated_length": 630.108154296875, "completions/min_length": 112.0, "completions/min_terminated_length": 112.0, "epoch": 13.606413994169095, "grad_norm": 0.15735474228858948, "learning_rate": 1e-06, "loss": -0.0273, "num_tokens": 849104552.0, "reward": 0.6696428656578064, "reward_std": 0.13842478394508362, "rewards/verify_math_reward/mean": 0.6696428656578064, "rewards/verify_math_reward/std": 0.47060438990592957, "step": 1456 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1428571428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 4067.0, "completions/mean_length": 1121.7601318359375, "completions/mean_terminated_length": 626.0534057617188, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 13.615743440233237, "grad_norm": 0.20069833099842072, "learning_rate": 1e-06, "loss": -0.0616, "num_tokens": 849671857.0, "reward": 0.6651785969734192, "reward_std": 0.16506867110729218, "rewards/verify_math_reward/mean": 0.6651785969734192, "rewards/verify_math_reward/std": 0.47219157218933105, "step": 1457 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0647321428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3162.0, "completions/mean_length": 767.9263916015625, "completions/mean_terminated_length": 537.5823364257812, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 13.625072886297376, "grad_norm": 0.1525256186723709, "learning_rate": 1e-06, "loss": -0.0295, "num_tokens": 850211447.0, "reward": 0.7555803656578064, "reward_std": 0.10525421798229218, "rewards/verify_math_reward/mean": 0.7555803656578064, "rewards/verify_math_reward/std": 0.42998260259628296, "step": 1458 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1361607142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3788.0, "completions/mean_length": 1094.540283203125, "completions/mean_terminated_length": 621.44189453125, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 13.634402332361516, "grad_norm": 0.15941284596920013, "learning_rate": 1e-06, "loss": -0.0592, "num_tokens": 850788275.0, "reward": 0.6618303656578064, "reward_std": 0.14440374076366425, "rewards/verify_math_reward/mean": 0.6618303656578064, "rewards/verify_math_reward/std": 0.4733508229255676, "step": 1459 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1216517857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3863.0, "completions/mean_length": 1075.3148193359375, "completions/mean_terminated_length": 656.9478759765625, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 13.643731778425655, "grad_norm": 0.14771853387355804, "learning_rate": 1e-06, "loss": -0.0519, "num_tokens": 851400117.0, "reward": 0.6897321939468384, "reward_std": 0.14128021895885468, "rewards/verify_math_reward/mean": 0.6897321343421936, "rewards/verify_math_reward/std": 0.4628615975379944, "step": 1460 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1160714285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3597.0, "completions/mean_length": 1057.4029541015625, "completions/mean_terminated_length": 658.3952026367188, "completions/min_length": 133.0, "completions/min_terminated_length": 133.0, "epoch": 13.653061224489797, "grad_norm": 0.17699874937534332, "learning_rate": 1e-06, "loss": -0.0922, "num_tokens": 852002078.0, "reward": 0.7299107313156128, "reward_std": 0.1521807461977005, "rewards/verify_math_reward/mean": 0.7299107313156128, "rewards/verify_math_reward/std": 0.44425368309020996, "step": 1461 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1183035714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3692.0, "completions/mean_length": 1023.5156860351562, "completions/mean_terminated_length": 611.2582397460938, "completions/min_length": 165.0, "completions/min_terminated_length": 165.0, "epoch": 13.662390670553936, "grad_norm": 0.1677607297897339, "learning_rate": 1e-06, "loss": -0.0651, "num_tokens": 852576300.0, "reward": 0.707589328289032, "reward_std": 0.136213481426239, "rewards/verify_math_reward/mean": 0.7075892686843872, "rewards/verify_math_reward/std": 0.45512402057647705, "step": 1462 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1517857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3914.0, "completions/mean_length": 1151.743408203125, "completions/mean_terminated_length": 624.8763427734375, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 13.671720116618076, "grad_norm": 0.18875309824943542, "learning_rate": 1e-06, "loss": -0.0868, "num_tokens": 853138646.0, "reward": 0.6618303656578064, "reward_std": 0.16078400611877441, "rewards/verify_math_reward/mean": 0.6618303656578064, "rewards/verify_math_reward/std": 0.4733508229255676, "step": 1463 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1428571428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3613.0, "completions/mean_length": 1097.15625, "completions/mean_terminated_length": 597.3489990234375, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 13.681049562682215, "grad_norm": 0.1729128062725067, "learning_rate": 1e-06, "loss": -0.0508, "num_tokens": 853682098.0, "reward": 0.6462053656578064, "reward_std": 0.11884935945272446, "rewards/verify_math_reward/mean": 0.6462053656578064, "rewards/verify_math_reward/std": 0.478413462638855, "step": 1464 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0904017857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2944.0, "completions/mean_length": 910.21435546875, "completions/mean_terminated_length": 593.5902099609375, "completions/min_length": 176.0, "completions/min_terminated_length": 176.0, "epoch": 13.690379008746355, "grad_norm": 0.18528950214385986, "learning_rate": 1e-06, "loss": -0.0693, "num_tokens": 854262314.0, "reward": 0.7042410969734192, "reward_std": 0.1732921302318573, "rewards/verify_math_reward/mean": 0.7042410969734192, "rewards/verify_math_reward/std": 0.45663803815841675, "step": 1465 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1205357142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3452.0, "completions/mean_length": 989.396240234375, "completions/mean_terminated_length": 563.6179809570312, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 13.699708454810496, "grad_norm": 0.13456867635250092, "learning_rate": 1e-06, "loss": -0.0538, "num_tokens": 854784813.0, "reward": 0.7522321939468384, "reward_std": 0.10498391091823578, "rewards/verify_math_reward/mean": 0.7522321343421936, "rewards/verify_math_reward/std": 0.4319573938846588, "step": 1466 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1138392857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3645.0, "completions/mean_length": 1025.8125, "completions/mean_terminated_length": 631.405517578125, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 13.709037900874636, "grad_norm": 0.1496770977973938, "learning_rate": 1e-06, "loss": -0.0423, "num_tokens": 855370133.0, "reward": 0.7031250596046448, "reward_std": 0.1021307036280632, "rewards/verify_math_reward/mean": 0.703125, "rewards/verify_math_reward/std": 0.4571361541748047, "step": 1467 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1294642857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3862.0, "completions/mean_length": 1059.364990234375, "completions/mean_terminated_length": 607.7628173828125, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 13.718367346938775, "grad_norm": 0.15127938985824585, "learning_rate": 1e-06, "loss": -0.0707, "num_tokens": 855945708.0, "reward": 0.6350446939468384, "reward_std": 0.12640699744224548, "rewards/verify_math_reward/mean": 0.6350446343421936, "rewards/verify_math_reward/std": 0.4816865026950836, "step": 1468 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1183035714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3119.0, "completions/mean_length": 1113.1942138671875, "completions/mean_terminated_length": 712.9696044921875, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 13.727696793002915, "grad_norm": 0.14797398447990417, "learning_rate": 1e-06, "loss": -0.0609, "num_tokens": 856599562.0, "reward": 0.6473214626312256, "reward_std": 0.12527835369110107, "rewards/verify_math_reward/mean": 0.6473214030265808, "rewards/verify_math_reward/std": 0.47807058691978455, "step": 1469 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1316964285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3671.0, "completions/mean_length": 1066.50341796875, "completions/mean_terminated_length": 607.0167236328125, "completions/min_length": 168.0, "completions/min_terminated_length": 168.0, "epoch": 13.737026239067056, "grad_norm": 0.16321104764938354, "learning_rate": 1e-06, "loss": -0.0891, "num_tokens": 857151533.0, "reward": 0.7098214626312256, "reward_std": 0.1455295979976654, "rewards/verify_math_reward/mean": 0.7098214030265808, "rewards/verify_math_reward/std": 0.454098105430603, "step": 1470 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1339285714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 4055.0, "completions/mean_length": 1056.72998046875, "completions/mean_terminated_length": 586.7396850585938, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 13.746355685131196, "grad_norm": 0.17276650667190552, "learning_rate": 1e-06, "loss": -0.0828, "num_tokens": 857695179.0, "reward": 0.7087053656578064, "reward_std": 0.14974148571491241, "rewards/verify_math_reward/mean": 0.7087053656578064, "rewards/verify_math_reward/std": 0.45461276173591614, "step": 1471 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1328125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3638.0, "completions/mean_length": 1088.8538818359375, "completions/mean_terminated_length": 628.2998657226562, "completions/min_length": 168.0, "completions/min_terminated_length": 168.0, "epoch": 13.755685131195335, "grad_norm": 0.14320360124111176, "learning_rate": 1e-06, "loss": -0.0908, "num_tokens": 858282112.0, "reward": 0.7020089626312256, "reward_std": 0.13192880153656006, "rewards/verify_math_reward/mean": 0.7020089030265808, "rewards/verify_math_reward/std": 0.45763102173805237, "step": 1472 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1462053571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3606.0, "completions/mean_length": 1129.376220703125, "completions/mean_terminated_length": 621.3660278320312, "completions/min_length": 167.0, "completions/min_terminated_length": 167.0, "epoch": 13.765014577259475, "grad_norm": 0.18304169178009033, "learning_rate": 1e-06, "loss": -0.0561, "num_tokens": 858841449.0, "reward": 0.6729910969734192, "reward_std": 0.14327581226825714, "rewards/verify_math_reward/mean": 0.6729910969734192, "rewards/verify_math_reward/std": 0.46938255429267883, "step": 1473 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1607142857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4082.0, "completions/mean_length": 1172.068115234375, "completions/mean_terminated_length": 612.1661987304688, "completions/min_length": 156.0, "completions/min_terminated_length": 156.0, "epoch": 13.774344023323614, "grad_norm": 0.16333621740341187, "learning_rate": 1e-06, "loss": -0.0835, "num_tokens": 859399702.0, "reward": 0.6551339626312256, "reward_std": 0.1410187929868698, "rewards/verify_math_reward/mean": 0.6551339030265808, "rewards/verify_math_reward/std": 0.4755900800228119, "step": 1474 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.140625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2577.0, "completions/mean_length": 1155.44873046875, "completions/mean_terminated_length": 674.2675170898438, "completions/min_length": 169.0, "completions/min_terminated_length": 169.0, "epoch": 13.783673469387756, "grad_norm": 0.16109581291675568, "learning_rate": 1e-06, "loss": -0.0839, "num_tokens": 860012632.0, "reward": 0.6283482313156128, "reward_std": 0.15793219208717346, "rewards/verify_math_reward/mean": 0.6283482313156128, "rewards/verify_math_reward/std": 0.4835159182548523, "step": 1475 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0926339285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3238.0, "completions/mean_length": 882.7969360351562, "completions/mean_terminated_length": 554.7576904296875, "completions/min_length": 172.0, "completions/min_terminated_length": 172.0, "epoch": 13.793002915451895, "grad_norm": 0.1812966763973236, "learning_rate": 1e-06, "loss": -0.0665, "num_tokens": 860560178.0, "reward": 0.7455357313156128, "reward_std": 0.13947714865207672, "rewards/verify_math_reward/mean": 0.7455357313156128, "rewards/verify_math_reward/std": 0.4358029067516327, "step": 1476 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1517857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3735.0, "completions/mean_length": 1202.2154541015625, "completions/mean_terminated_length": 684.3803100585938, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 13.802332361516035, "grad_norm": 0.14447076618671417, "learning_rate": 1e-06, "loss": -0.0794, "num_tokens": 861175619.0, "reward": 0.6428571939468384, "reward_std": 0.1272311508655548, "rewards/verify_math_reward/mean": 0.6428571343421936, "rewards/verify_math_reward/std": 0.4794250428676605, "step": 1477 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1305803571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3861.0, "completions/mean_length": 1061.063720703125, "completions/mean_terminated_length": 605.23876953125, "completions/min_length": 161.0, "completions/min_terminated_length": 161.0, "epoch": 13.811661807580174, "grad_norm": 0.16366831958293915, "learning_rate": 1e-06, "loss": -0.0854, "num_tokens": 861725540.0, "reward": 0.7399553656578064, "reward_std": 0.1310618817806244, "rewards/verify_math_reward/mean": 0.7399553656578064, "rewards/verify_math_reward/std": 0.43890365958213806, "step": 1478 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1462053571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 2612.0, "completions/mean_length": 1131.1060791015625, "completions/mean_terminated_length": 623.3921508789062, "completions/min_length": 180.0, "completions/min_terminated_length": 180.0, "epoch": 13.820991253644316, "grad_norm": 0.16854460537433624, "learning_rate": 1e-06, "loss": -0.0516, "num_tokens": 862289971.0, "reward": 0.6417410969734192, "reward_std": 0.13921934366226196, "rewards/verify_math_reward/mean": 0.6417410969734192, "rewards/verify_math_reward/std": 0.47975659370422363, "step": 1479 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1138392857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2919.0, "completions/mean_length": 976.02685546875, "completions/mean_terminated_length": 575.2241821289062, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 13.830320699708455, "grad_norm": 0.1460832953453064, "learning_rate": 1e-06, "loss": -0.055, "num_tokens": 862831107.0, "reward": 0.7566964626312256, "reward_std": 0.11126276105642319, "rewards/verify_math_reward/mean": 0.7566964030265808, "rewards/verify_math_reward/std": 0.4293164908885956, "step": 1480 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0993303571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3637.0, "completions/mean_length": 972.7422485351562, "completions/mean_terminated_length": 628.293701171875, "completions/min_length": 137.0, "completions/min_terminated_length": 137.0, "epoch": 13.839650145772595, "grad_norm": 0.1542253941297531, "learning_rate": 1e-06, "loss": -0.0643, "num_tokens": 863420932.0, "reward": 0.7399553656578064, "reward_std": 0.1373753398656845, "rewards/verify_math_reward/mean": 0.7399553656578064, "rewards/verify_math_reward/std": 0.43890365958213806, "step": 1481 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.2243303571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3626.0, "completions/mean_length": 1476.0592041015625, "completions/mean_terminated_length": 718.349609375, "completions/min_length": 99.0, "completions/min_terminated_length": 99.0, "epoch": 13.848979591836734, "grad_norm": 0.1680951565504074, "learning_rate": 1e-06, "loss": -0.1322, "num_tokens": 864019969.0, "reward": 0.5792410969734192, "reward_std": 0.14060552418231964, "rewards/verify_math_reward/mean": 0.5792410969734192, "rewards/verify_math_reward/std": 0.49395665526390076, "step": 1482 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1316964285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3645.0, "completions/mean_length": 1083.993408203125, "completions/mean_terminated_length": 627.1593627929688, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 13.858309037900874, "grad_norm": 0.17295648157596588, "learning_rate": 1e-06, "loss": -0.0516, "num_tokens": 864600995.0, "reward": 0.6473214626312256, "reward_std": 0.14128021895885468, "rewards/verify_math_reward/mean": 0.6473214030265808, "rewards/verify_math_reward/std": 0.47807058691978455, "step": 1483 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1060267857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3003.0, "completions/mean_length": 989.58935546875, "completions/mean_terminated_length": 621.1635131835938, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 13.867638483965015, "grad_norm": 0.14881660044193268, "learning_rate": 1e-06, "loss": -0.0691, "num_tokens": 865188115.0, "reward": 0.7042410969734192, "reward_std": 0.11486707627773285, "rewards/verify_math_reward/mean": 0.7042410969734192, "rewards/verify_math_reward/std": 0.45663803815841675, "step": 1484 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1171875, "completions/max_length": 4096.0, "completions/max_terminated_length": 3379.0, "completions/mean_length": 1001.87841796875, "completions/mean_terminated_length": 591.1542358398438, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 13.876967930029155, "grad_norm": 0.1812649667263031, "learning_rate": 1e-06, "loss": -0.0522, "num_tokens": 865737118.0, "reward": 0.7020089626312256, "reward_std": 0.13775509595870972, "rewards/verify_math_reward/mean": 0.7020089030265808, "rewards/verify_math_reward/std": 0.45763099193573, "step": 1485 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1796875, "completions/max_length": 4096.0, "completions/max_terminated_length": 3854.0, "completions/mean_length": 1271.9398193359375, "completions/mean_terminated_length": 653.3360595703125, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 13.886297376093294, "grad_norm": 0.16636329889297485, "learning_rate": 1e-06, "loss": -0.0691, "num_tokens": 866304232.0, "reward": 0.6305803656578064, "reward_std": 0.12260904908180237, "rewards/verify_math_reward/mean": 0.6305803656578064, "rewards/verify_math_reward/std": 0.4829172194004059, "step": 1486 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1238839285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3936.0, "completions/mean_length": 1036.560302734375, "completions/mean_terminated_length": 603.9515991210938, "completions/min_length": 105.0, "completions/min_terminated_length": 105.0, "epoch": 13.895626822157434, "grad_norm": 0.1734764277935028, "learning_rate": 1e-06, "loss": -0.0529, "num_tokens": 866867702.0, "reward": 0.6808035969734192, "reward_std": 0.1443263590335846, "rewards/verify_math_reward/mean": 0.6808035969734192, "rewards/verify_math_reward/std": 0.4664256274700165, "step": 1487 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1037946428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3216.0, "completions/mean_length": 960.5692138671875, "completions/mean_terminated_length": 597.4370727539062, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 13.904956268221575, "grad_norm": 0.12487823516130447, "learning_rate": 1e-06, "loss": -0.0207, "num_tokens": 867425180.0, "reward": 0.7433035969734192, "reward_std": 0.0913478210568428, "rewards/verify_math_reward/mean": 0.7433035969734192, "rewards/verify_math_reward/std": 0.43705442547798157, "step": 1488 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1049107142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3535.0, "completions/mean_length": 977.3616333007812, "completions/mean_terminated_length": 611.83544921875, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 13.914285714285715, "grad_norm": 0.17821316421031952, "learning_rate": 1e-06, "loss": -0.0811, "num_tokens": 868010784.0, "reward": 0.7031250596046448, "reward_std": 0.1578880101442337, "rewards/verify_math_reward/mean": 0.703125, "rewards/verify_math_reward/std": 0.4571361541748047, "step": 1489 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1383928571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 2722.0, "completions/mean_length": 1081.360595703125, "completions/mean_terminated_length": 597.1437377929688, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 13.923615160349854, "grad_norm": 0.17223955690860748, "learning_rate": 1e-06, "loss": -0.0922, "num_tokens": 868560179.0, "reward": 0.7142857313156128, "reward_std": 0.13771234452724457, "rewards/verify_math_reward/mean": 0.7142857313156128, "rewards/verify_math_reward/std": 0.4520062506198883, "step": 1490 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1339285714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3865.0, "completions/mean_length": 1132.7254638671875, "completions/mean_terminated_length": 674.4871215820312, "completions/min_length": 183.0, "completions/min_terminated_length": 183.0, "epoch": 13.932944606413994, "grad_norm": 0.16000701487064362, "learning_rate": 1e-06, "loss": -0.0865, "num_tokens": 869170829.0, "reward": 0.640625, "reward_std": 0.15995843708515167, "rewards/verify_math_reward/mean": 0.640625, "rewards/verify_math_reward/std": 0.48008525371551514, "step": 1491 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1450892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2537.0, "completions/mean_length": 1114.935302734375, "completions/mean_terminated_length": 609.0104370117188, "completions/min_length": 161.0, "completions/min_terminated_length": 161.0, "epoch": 13.942274052478133, "grad_norm": 0.172801673412323, "learning_rate": 1e-06, "loss": -0.1032, "num_tokens": 869726499.0, "reward": 0.6484375, "reward_std": 0.155143141746521, "rewards/verify_math_reward/mean": 0.6484375, "rewards/verify_math_reward/std": 0.4777248501777649, "step": 1492 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0881696428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3448.0, "completions/mean_length": 908.5167846679688, "completions/mean_terminated_length": 600.3023071289062, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 13.951603498542275, "grad_norm": 0.13795799016952515, "learning_rate": 1e-06, "loss": -0.059, "num_tokens": 870304250.0, "reward": 0.7265625596046448, "reward_std": 0.10551635921001434, "rewards/verify_math_reward/mean": 0.7265625, "rewards/verify_math_reward/std": 0.4459724426269531, "step": 1493 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.15625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3150.0, "completions/mean_length": 1200.2467041015625, "completions/mean_terminated_length": 663.9960327148438, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 13.960932944606414, "grad_norm": 0.14509567618370056, "learning_rate": 1e-06, "loss": -0.0742, "num_tokens": 870902495.0, "reward": 0.6629464626312256, "reward_std": 0.11727311462163925, "rewards/verify_math_reward/mean": 0.6629464030265808, "rewards/verify_math_reward/std": 0.47296738624572754, "step": 1494 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1685267857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3320.0, "completions/mean_length": 1239.3248291015625, "completions/mean_terminated_length": 660.3208618164062, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 13.970262390670554, "grad_norm": 0.182939812541008, "learning_rate": 1e-06, "loss": -0.0798, "num_tokens": 871475522.0, "reward": 0.625, "reward_std": 0.13685287535190582, "rewards/verify_math_reward/mean": 0.625, "rewards/verify_math_reward/std": 0.48439329862594604, "step": 1495 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1171875, "completions/max_length": 4096.0, "completions/max_terminated_length": 3149.0, "completions/mean_length": 982.6741333007812, "completions/mean_terminated_length": 569.4007568359375, "completions/min_length": 104.0, "completions/min_terminated_length": 104.0, "epoch": 13.979591836734693, "grad_norm": 0.18029490113258362, "learning_rate": 1e-06, "loss": -0.0241, "num_tokens": 872011118.0, "reward": 0.715401828289032, "reward_std": 0.12076866626739502, "rewards/verify_math_reward/mean": 0.7154017686843872, "rewards/verify_math_reward/std": 0.4514748752117157, "step": 1496 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1328125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3952.0, "completions/mean_length": 1170.0301513671875, "completions/mean_terminated_length": 721.9086303710938, "completions/min_length": 166.0, "completions/min_terminated_length": 166.0, "epoch": 13.988921282798835, "grad_norm": 0.1886465698480606, "learning_rate": 1e-06, "loss": -0.0671, "num_tokens": 872665089.0, "reward": 0.640625, "reward_std": 0.18220455944538116, "rewards/verify_math_reward/mean": 0.640625, "rewards/verify_math_reward/std": 0.48008525371551514, "step": 1497 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.16193181818181823, "completions/max_length": 4096.0, "completions/max_terminated_length": 3782.0, "completions/mean_length": 1186.0994873046875, "completions/mean_terminated_length": 623.8474731445312, "completions/min_length": 195.0, "completions/min_terminated_length": 195.0, "epoch": 13.998250728862974, "grad_norm": 0.13559775054454803, "learning_rate": 1e-06, "loss": -0.075, "num_tokens": 873257060.0, "reward": 0.7254464626312256, "reward_std": 0.12050722539424896, "rewards/verify_math_reward/mean": 0.7254464030265808, "rewards/verify_math_reward/std": 0.4465382993221283, "step": 1498 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1618303571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3601.0, "completions/mean_length": 1204.75341796875, "completions/mean_terminated_length": 646.5232543945312, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 14.00932944606414, "grad_norm": 0.16991333663463593, "learning_rate": 1e-06, "loss": -0.0815, "num_tokens": 873830511.0, "reward": 0.6506696939468384, "reward_std": 0.15500116348266602, "rewards/verify_math_reward/mean": 0.6506696343421936, "rewards/verify_math_reward/std": 0.47702476382255554, "step": 1499 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1049107142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3950.0, "completions/mean_length": 1000.825927734375, "completions/mean_terminated_length": 638.0499267578125, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 14.018658892128279, "grad_norm": 0.15517744421958923, "learning_rate": 1e-06, "loss": -0.0723, "num_tokens": 874437915.0, "reward": 0.7399553656578064, "reward_std": 0.15308158099651337, "rewards/verify_math_reward/mean": 0.7399553656578064, "rewards/verify_math_reward/std": 0.43890365958213806, "step": 1500 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.140625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3814.0, "completions/mean_length": 1165.8895263671875, "completions/mean_terminated_length": 686.4168701171875, "completions/min_length": 172.0, "completions/min_terminated_length": 172.0, "epoch": 14.02798833819242, "grad_norm": 0.1750682294368744, "learning_rate": 1e-06, "loss": -0.0783, "num_tokens": 875064984.0, "reward": 0.6082589626312256, "reward_std": 0.14733335375785828, "rewards/verify_math_reward/mean": 0.6082589030265808, "rewards/verify_math_reward/std": 0.48841196298599243, "step": 1501 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1194196428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3283.0, "completions/mean_length": 1093.552490234375, "completions/mean_terminated_length": 686.3764038085938, "completions/min_length": 101.0, "completions/min_terminated_length": 101.0, "epoch": 14.03731778425656, "grad_norm": 0.18799026310443878, "learning_rate": 1e-06, "loss": -0.0757, "num_tokens": 875687663.0, "reward": 0.7053571939468384, "reward_std": 0.18655773997306824, "rewards/verify_math_reward/mean": 0.7053571343421936, "rewards/verify_math_reward/std": 0.45613667368888855, "step": 1502 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0725446428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3575.0, "completions/mean_length": 905.5491333007812, "completions/mean_terminated_length": 655.9951782226562, "completions/min_length": 175.0, "completions/min_terminated_length": 175.0, "epoch": 14.0466472303207, "grad_norm": 0.1436869353055954, "learning_rate": 1e-06, "loss": -0.0244, "num_tokens": 876322875.0, "reward": 0.7433035969734192, "reward_std": 0.1352698653936386, "rewards/verify_math_reward/mean": 0.7433035969734192, "rewards/verify_math_reward/std": 0.43705442547798157, "step": 1503 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0959821428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3835.0, "completions/mean_length": 970.1395263671875, "completions/mean_terminated_length": 638.258056640625, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 14.055976676384839, "grad_norm": 0.16448047757148743, "learning_rate": 1e-06, "loss": -0.0412, "num_tokens": 876925024.0, "reward": 0.6886160969734192, "reward_std": 0.14556489884853363, "rewards/verify_math_reward/mean": 0.6886160969734192, "rewards/verify_math_reward/std": 0.46331802010536194, "step": 1504 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0959821428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3696.0, "completions/mean_length": 993.7623291015625, "completions/mean_terminated_length": 664.388916015625, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 14.06530612244898, "grad_norm": 0.1584680825471878, "learning_rate": 1e-06, "loss": -0.0469, "num_tokens": 877550619.0, "reward": 0.6852678656578064, "reward_std": 0.12084353715181351, "rewards/verify_math_reward/mean": 0.6852678656578064, "rewards/verify_math_reward/std": 0.46466848254203796, "step": 1505 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1316964285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3945.0, "completions/mean_length": 1075.9654541015625, "completions/mean_terminated_length": 617.9138793945312, "completions/min_length": 200.0, "completions/min_terminated_length": 200.0, "epoch": 14.07463556851312, "grad_norm": 0.15366147458553314, "learning_rate": 1e-06, "loss": -0.0471, "num_tokens": 878126124.0, "reward": 0.6875000596046448, "reward_std": 0.10081617534160614, "rewards/verify_math_reward/mean": 0.6875, "rewards/verify_math_reward/std": 0.4637712836265564, "step": 1506 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1450892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3310.0, "completions/mean_length": 1155.24560546875, "completions/mean_terminated_length": 656.161865234375, "completions/min_length": 109.0, "completions/min_terminated_length": 109.0, "epoch": 14.08396501457726, "grad_norm": 0.20653975009918213, "learning_rate": 1e-06, "loss": -0.0896, "num_tokens": 878716920.0, "reward": 0.699776828289032, "reward_std": 0.1604897677898407, "rewards/verify_math_reward/mean": 0.6997767686843872, "rewards/verify_math_reward/std": 0.4586109220981598, "step": 1507 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1227678571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3942.0, "completions/mean_length": 1037.23779296875, "completions/mean_terminated_length": 609.1666870117188, "completions/min_length": 166.0, "completions/min_terminated_length": 166.0, "epoch": 14.093294460641399, "grad_norm": 0.17542661726474762, "learning_rate": 1e-06, "loss": -0.0817, "num_tokens": 879283629.0, "reward": 0.7254464626312256, "reward_std": 0.11956332623958588, "rewards/verify_math_reward/mean": 0.7254464030265808, "rewards/verify_math_reward/std": 0.4465382993221283, "step": 1508 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1328125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3865.0, "completions/mean_length": 1094.8460693359375, "completions/mean_terminated_length": 635.2097778320312, "completions/min_length": 171.0, "completions/min_terminated_length": 171.0, "epoch": 14.102623906705539, "grad_norm": 0.15363480150699615, "learning_rate": 1e-06, "loss": -0.0577, "num_tokens": 879868619.0, "reward": 0.723214328289032, "reward_std": 0.12283728271722794, "rewards/verify_math_reward/mean": 0.7232142686843872, "rewards/verify_math_reward/std": 0.44765952229499817, "step": 1509 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0993303571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 4085.0, "completions/mean_length": 952.7857666015625, "completions/mean_terminated_length": 606.1362915039062, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 14.11195335276968, "grad_norm": 0.1422143131494522, "learning_rate": 1e-06, "loss": -0.0893, "num_tokens": 880440003.0, "reward": 0.7566964626312256, "reward_std": 0.13737492263317108, "rewards/verify_math_reward/mean": 0.7566964030265808, "rewards/verify_math_reward/std": 0.4293164908885956, "step": 1510 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1395089285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 4035.0, "completions/mean_length": 1175.766845703125, "completions/mean_terminated_length": 702.3177490234375, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 14.12128279883382, "grad_norm": 0.16948352754116058, "learning_rate": 1e-06, "loss": -0.0889, "num_tokens": 881065042.0, "reward": 0.6696428656578064, "reward_std": 0.14553099870681763, "rewards/verify_math_reward/mean": 0.6696428656578064, "rewards/verify_math_reward/std": 0.47060438990592957, "step": 1511 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0770089285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3901.0, "completions/mean_length": 874.0279541015625, "completions/mean_terminated_length": 605.20556640625, "completions/min_length": 157.0, "completions/min_terminated_length": 157.0, "epoch": 14.130612244897959, "grad_norm": 0.12992741167545319, "learning_rate": 1e-06, "loss": -0.0284, "num_tokens": 881645867.0, "reward": 0.770089328289032, "reward_std": 0.09897328168153763, "rewards/verify_math_reward/mean": 0.7700892686843872, "rewards/verify_math_reward/std": 0.42101022601127625, "step": 1512 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1004464285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2722.0, "completions/mean_length": 982.9777221679688, "completions/mean_terminated_length": 635.3697509765625, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 14.139941690962099, "grad_norm": 0.15940600633621216, "learning_rate": 1e-06, "loss": -0.0724, "num_tokens": 882251823.0, "reward": 0.6707589626312256, "reward_std": 0.1349353790283203, "rewards/verify_math_reward/mean": 0.6707589030265808, "rewards/verify_math_reward/std": 0.4702001214027405, "step": 1513 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1417410714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3935.0, "completions/mean_length": 1167.6239013671875, "completions/mean_terminated_length": 684.00390625, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 14.14927113702624, "grad_norm": 0.13552004098892212, "learning_rate": 1e-06, "loss": -0.0607, "num_tokens": 882871990.0, "reward": 0.6897321939468384, "reward_std": 0.11257727444171906, "rewards/verify_math_reward/mean": 0.6897321343421936, "rewards/verify_math_reward/std": 0.462861567735672, "step": 1514 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1026785714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3499.0, "completions/mean_length": 1069.2054443359375, "completions/mean_terminated_length": 722.855712890625, "completions/min_length": 121.0, "completions/min_terminated_length": 121.0, "epoch": 14.15860058309038, "grad_norm": 0.14533638954162598, "learning_rate": 1e-06, "loss": -0.055, "num_tokens": 883543606.0, "reward": 0.6830357313156128, "reward_std": 0.12610459327697754, "rewards/verify_math_reward/mean": 0.6830357313156128, "rewards/verify_math_reward/std": 0.46555325388908386, "step": 1515 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.109375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3376.0, "completions/mean_length": 998.6563110351562, "completions/mean_terminated_length": 618.2807006835938, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 14.167930029154519, "grad_norm": 0.1805475354194641, "learning_rate": 1e-06, "loss": -0.0808, "num_tokens": 884126258.0, "reward": 0.7455357313156128, "reward_std": 0.11415030062198639, "rewards/verify_math_reward/mean": 0.7455357313156128, "rewards/verify_math_reward/std": 0.4358029067516327, "step": 1516 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 2965.0, "completions/mean_length": 895.5413208007812, "completions/mean_terminated_length": 624.3159790039062, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 14.177259475218658, "grad_norm": 0.16091904044151306, "learning_rate": 1e-06, "loss": -0.0626, "num_tokens": 884730135.0, "reward": 0.7444196939468384, "reward_std": 0.14353542029857635, "rewards/verify_math_reward/mean": 0.7444196343421936, "rewards/verify_math_reward/std": 0.43643051385879517, "step": 1517 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1316964285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3298.0, "completions/mean_length": 1129.2210693359375, "completions/mean_terminated_length": 679.2467651367188, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 14.186588921282798, "grad_norm": 0.14764942228794098, "learning_rate": 1e-06, "loss": -0.0643, "num_tokens": 885344029.0, "reward": 0.6629464626312256, "reward_std": 0.12869539856910706, "rewards/verify_math_reward/mean": 0.6629464030265808, "rewards/verify_math_reward/std": 0.47296738624572754, "step": 1518 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1506696428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3612.0, "completions/mean_length": 1137.0670166015625, "completions/mean_terminated_length": 612.1576538085938, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 14.19591836734694, "grad_norm": 0.1579299420118332, "learning_rate": 1e-06, "loss": -0.067, "num_tokens": 885902873.0, "reward": 0.6729910969734192, "reward_std": 0.13421790301799774, "rewards/verify_math_reward/mean": 0.6729910969734192, "rewards/verify_math_reward/std": 0.46938255429267883, "step": 1519 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0948660714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2083.0, "completions/mean_length": 909.7801513671875, "completions/mean_terminated_length": 575.8359985351562, "completions/min_length": 120.0, "completions/min_terminated_length": 120.0, "epoch": 14.205247813411079, "grad_norm": 0.17138025164604187, "learning_rate": 1e-06, "loss": -0.0638, "num_tokens": 886459188.0, "reward": 0.7555803656578064, "reward_std": 0.1418115496635437, "rewards/verify_math_reward/mean": 0.7555803656578064, "rewards/verify_math_reward/std": 0.42998260259628296, "step": 1520 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 4096.0, "completions/max_terminated_length": 2619.0, "completions/mean_length": 1050.2913818359375, "completions/mean_terminated_length": 615.1900634765625, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 14.214577259475218, "grad_norm": 0.16122666001319885, "learning_rate": 1e-06, "loss": -0.088, "num_tokens": 887026833.0, "reward": 0.7008928656578064, "reward_std": 0.14591076970100403, "rewards/verify_math_reward/mean": 0.7008928656578064, "rewards/verify_math_reward/std": 0.4581226110458374, "step": 1521 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1183035714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3673.0, "completions/mean_length": 1079.5491943359375, "completions/mean_terminated_length": 674.8101196289062, "completions/min_length": 168.0, "completions/min_terminated_length": 168.0, "epoch": 14.223906705539358, "grad_norm": 0.1701480895280838, "learning_rate": 1e-06, "loss": -0.0556, "num_tokens": 887641469.0, "reward": 0.7098214626312256, "reward_std": 0.14504244923591614, "rewards/verify_math_reward/mean": 0.7098214030265808, "rewards/verify_math_reward/std": 0.454098105430603, "step": 1522 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3045.0, "completions/mean_length": 1110.8270263671875, "completions/mean_terminated_length": 684.3737182617188, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 14.2332361516035, "grad_norm": 0.1491316556930542, "learning_rate": 1e-06, "loss": -0.0637, "num_tokens": 888266314.0, "reward": 0.6674107313156128, "reward_std": 0.12756884098052979, "rewards/verify_math_reward/mean": 0.6674107313156128, "rewards/verify_math_reward/std": 0.47140392661094666, "step": 1523 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1194196428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 4047.0, "completions/mean_length": 1054.857177734375, "completions/mean_terminated_length": 642.4334716796875, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 14.242565597667639, "grad_norm": 0.14813874661922455, "learning_rate": 1e-06, "loss": -0.05, "num_tokens": 888861650.0, "reward": 0.6941964626312256, "reward_std": 0.13451918959617615, "rewards/verify_math_reward/mean": 0.6941964030265808, "rewards/verify_math_reward/std": 0.4610042870044708, "step": 1524 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1171875, "completions/max_length": 4096.0, "completions/max_terminated_length": 3804.0, "completions/mean_length": 1051.654052734375, "completions/mean_terminated_length": 647.5372924804688, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 14.251895043731778, "grad_norm": 0.15813909471035004, "learning_rate": 1e-06, "loss": -0.0418, "num_tokens": 889474732.0, "reward": 0.7209821939468384, "reward_std": 0.11509419232606888, "rewards/verify_math_reward/mean": 0.7209821343421936, "rewards/verify_math_reward/std": 0.448766827583313, "step": 1525 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1004464285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3036.0, "completions/mean_length": 962.4832763671875, "completions/mean_terminated_length": 612.5868530273438, "completions/min_length": 113.0, "completions/min_terminated_length": 113.0, "epoch": 14.261224489795918, "grad_norm": 0.16052603721618652, "learning_rate": 1e-06, "loss": -0.0601, "num_tokens": 890058717.0, "reward": 0.707589328289032, "reward_std": 0.12787306308746338, "rewards/verify_math_reward/mean": 0.7075892686843872, "rewards/verify_math_reward/std": 0.45512402057647705, "step": 1526 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1082589285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3720.0, "completions/mean_length": 1040.884033203125, "completions/mean_terminated_length": 669.9874877929688, "completions/min_length": 120.0, "completions/min_terminated_length": 120.0, "epoch": 14.270553935860057, "grad_norm": 0.1593150645494461, "learning_rate": 1e-06, "loss": -0.0609, "num_tokens": 890691853.0, "reward": 0.707589328289032, "reward_std": 0.15379583835601807, "rewards/verify_math_reward/mean": 0.7075892686843872, "rewards/verify_math_reward/std": 0.45512402057647705, "step": 1527 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.2131696428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3330.0, "completions/mean_length": 1411.5001220703125, "completions/mean_terminated_length": 684.2098999023438, "completions/min_length": 165.0, "completions/min_terminated_length": 165.0, "epoch": 14.279883381924199, "grad_norm": 0.17011617124080658, "learning_rate": 1e-06, "loss": -0.0652, "num_tokens": 891266933.0, "reward": 0.5915178656578064, "reward_std": 0.13072487711906433, "rewards/verify_math_reward/mean": 0.5915178656578064, "rewards/verify_math_reward/std": 0.49182766675949097, "step": 1528 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1484375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3369.0, "completions/mean_length": 1188.8638916015625, "completions/mean_terminated_length": 682.1153564453125, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 14.289212827988338, "grad_norm": 0.15505503118038177, "learning_rate": 1e-06, "loss": -0.103, "num_tokens": 891868107.0, "reward": 0.6741071939468384, "reward_std": 0.1557818502187729, "rewards/verify_math_reward/mean": 0.6741071343421936, "rewards/verify_math_reward/std": 0.4689692556858063, "step": 1529 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0859375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3279.0, "completions/mean_length": 896.1663208007812, "completions/mean_terminated_length": 595.3272705078125, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 14.298542274052478, "grad_norm": 0.16419577598571777, "learning_rate": 1e-06, "loss": -0.0766, "num_tokens": 892428248.0, "reward": 0.7968750596046448, "reward_std": 0.1342952847480774, "rewards/verify_math_reward/mean": 0.796875, "rewards/verify_math_reward/std": 0.40254947543144226, "step": 1530 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3749.0, "completions/mean_length": 1001.2266235351562, "completions/mean_terminated_length": 651.382568359375, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 14.307871720116617, "grad_norm": 0.17623595893383026, "learning_rate": 1e-06, "loss": -0.0362, "num_tokens": 893040115.0, "reward": 0.7321428656578064, "reward_std": 0.13906781375408173, "rewards/verify_math_reward/mean": 0.7321428656578064, "rewards/verify_math_reward/std": 0.4430900514125824, "step": 1531 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3595.0, "completions/mean_length": 979.8917846679688, "completions/mean_terminated_length": 657.5357055664062, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 14.317201166180759, "grad_norm": 0.1409258395433426, "learning_rate": 1e-06, "loss": -0.0725, "num_tokens": 893667378.0, "reward": 0.7109375596046448, "reward_std": 0.12076614797115326, "rewards/verify_math_reward/mean": 0.7109375, "rewards/verify_math_reward/std": 0.45358020067214966, "step": 1532 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0904017857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4027.0, "completions/mean_length": 937.536865234375, "completions/mean_terminated_length": 623.6282348632812, "completions/min_length": 164.0, "completions/min_terminated_length": 164.0, "epoch": 14.326530612244898, "grad_norm": 0.14756691455841064, "learning_rate": 1e-06, "loss": -0.0823, "num_tokens": 894255699.0, "reward": 0.6941964626312256, "reward_std": 0.13955524563789368, "rewards/verify_math_reward/mean": 0.6941964030265808, "rewards/verify_math_reward/std": 0.4610042870044708, "step": 1533 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1216517857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3925.0, "completions/mean_length": 1114.8973388671875, "completions/mean_terminated_length": 702.0126953125, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 14.335860058309038, "grad_norm": 0.1667661964893341, "learning_rate": 1e-06, "loss": -0.0709, "num_tokens": 894896527.0, "reward": 0.6863839626312256, "reward_std": 0.16645805537700653, "rewards/verify_math_reward/mean": 0.6863839030265808, "rewards/verify_math_reward/std": 0.46422144770622253, "step": 1534 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1395089285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3012.0, "completions/mean_length": 1143.118408203125, "completions/mean_terminated_length": 664.3761596679688, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 14.345189504373177, "grad_norm": 0.1445925533771515, "learning_rate": 1e-06, "loss": -0.0445, "num_tokens": 895499881.0, "reward": 0.6796875596046448, "reward_std": 0.13151581585407257, "rewards/verify_math_reward/mean": 0.6796875, "rewards/verify_math_reward/std": 0.4668572247028351, "step": 1535 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1071428571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3562.0, "completions/mean_length": 999.8381958007812, "completions/mean_terminated_length": 628.2987060546875, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 14.354518950437317, "grad_norm": 0.14818841218948364, "learning_rate": 1e-06, "loss": -0.0966, "num_tokens": 896085456.0, "reward": 0.7500000596046448, "reward_std": 0.15082639455795288, "rewards/verify_math_reward/mean": 0.75, "rewards/verify_math_reward/std": 0.43325456976890564, "step": 1536 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1071428571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3640.0, "completions/mean_length": 1057.69873046875, "completions/mean_terminated_length": 693.1024780273438, "completions/min_length": 190.0, "completions/min_terminated_length": 190.0, "epoch": 14.363848396501458, "grad_norm": 0.15976668894290924, "learning_rate": 1e-06, "loss": -0.045, "num_tokens": 896739170.0, "reward": 0.6495535969734192, "reward_std": 0.13305744528770447, "rewards/verify_math_reward/mean": 0.6495535969734192, "rewards/verify_math_reward/std": 0.477376252412796, "step": 1537 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1183035714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3771.0, "completions/mean_length": 1046.654052734375, "completions/mean_terminated_length": 637.5012817382812, "completions/min_length": 121.0, "completions/min_terminated_length": 121.0, "epoch": 14.373177842565598, "grad_norm": 0.16981826722621918, "learning_rate": 1e-06, "loss": -0.086, "num_tokens": 897323244.0, "reward": 0.699776828289032, "reward_std": 0.13842660188674927, "rewards/verify_math_reward/mean": 0.6997767686843872, "rewards/verify_math_reward/std": 0.4586109220981598, "step": 1538 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1216517857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4023.0, "completions/mean_length": 1058.48779296875, "completions/mean_terminated_length": 637.7903442382812, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 14.382507288629737, "grad_norm": 0.1528918594121933, "learning_rate": 1e-06, "loss": -0.0514, "num_tokens": 897909761.0, "reward": 0.7220982313156128, "reward_std": 0.11137335002422333, "rewards/verify_math_reward/mean": 0.7220982313156128, "rewards/verify_math_reward/std": 0.44821488857269287, "step": 1539 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0993303571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3565.0, "completions/mean_length": 916.654052734375, "completions/mean_terminated_length": 566.0198364257812, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 14.391836734693877, "grad_norm": 0.17690247297286987, "learning_rate": 1e-06, "loss": -0.0686, "num_tokens": 898449139.0, "reward": 0.7611607313156128, "reward_std": 0.14083515107631683, "rewards/verify_math_reward/mean": 0.7611607313156128, "rewards/verify_math_reward/std": 0.4266124963760376, "step": 1540 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1071428571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 2937.0, "completions/mean_length": 1058.8070068359375, "completions/mean_terminated_length": 694.34375, "completions/min_length": 174.0, "completions/min_terminated_length": 174.0, "epoch": 14.401166180758018, "grad_norm": 0.15754111111164093, "learning_rate": 1e-06, "loss": -0.0539, "num_tokens": 899099566.0, "reward": 0.6573660969734192, "reward_std": 0.15713688731193542, "rewards/verify_math_reward/mean": 0.6573660969734192, "rewards/verify_math_reward/std": 0.47485533356666565, "step": 1541 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1037946428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3921.0, "completions/mean_length": 992.7310791015625, "completions/mean_terminated_length": 633.3237915039062, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 14.410495626822158, "grad_norm": 0.17619124054908752, "learning_rate": 1e-06, "loss": -0.0619, "num_tokens": 899688805.0, "reward": 0.7008928656578064, "reward_std": 0.1445143222808838, "rewards/verify_math_reward/mean": 0.7008928656578064, "rewards/verify_math_reward/std": 0.4581226110458374, "step": 1542 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0736607142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3473.0, "completions/mean_length": 924.7980346679688, "completions/mean_terminated_length": 672.630126953125, "completions/min_length": 168.0, "completions/min_terminated_length": 168.0, "epoch": 14.419825072886297, "grad_norm": 0.14695429801940918, "learning_rate": 1e-06, "loss": -0.0651, "num_tokens": 900328752.0, "reward": 0.7332589626312256, "reward_std": 0.13801473379135132, "rewards/verify_math_reward/mean": 0.7332589030265808, "rewards/verify_math_reward/std": 0.4425029158592224, "step": 1543 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1361607142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4005.0, "completions/mean_length": 1109.009033203125, "completions/mean_terminated_length": 638.1912231445312, "completions/min_length": 133.0, "completions/min_terminated_length": 133.0, "epoch": 14.429154518950437, "grad_norm": 0.17023196816444397, "learning_rate": 1e-06, "loss": -0.0664, "num_tokens": 900924840.0, "reward": 0.6875000596046448, "reward_std": 0.1418108493089676, "rewards/verify_math_reward/mean": 0.6875, "rewards/verify_math_reward/std": 0.4637712836265564, "step": 1544 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1383928571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3260.0, "completions/mean_length": 1106.719970703125, "completions/mean_terminated_length": 626.576416015625, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 14.438483965014576, "grad_norm": 0.1465475857257843, "learning_rate": 1e-06, "loss": -0.0838, "num_tokens": 901497621.0, "reward": 0.6886160969734192, "reward_std": 0.13470646739006042, "rewards/verify_math_reward/mean": 0.6886160969734192, "rewards/verify_math_reward/std": 0.46331802010536194, "step": 1545 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1328125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3884.0, "completions/mean_length": 1126.852783203125, "completions/mean_terminated_length": 672.118408203125, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 14.447813411078718, "grad_norm": 0.1621505469083786, "learning_rate": 1e-06, "loss": -0.0399, "num_tokens": 902102737.0, "reward": 0.6584821939468384, "reward_std": 0.15003502368927002, "rewards/verify_math_reward/mean": 0.6584821343421936, "rewards/verify_math_reward/std": 0.4744836091995239, "step": 1546 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1037946428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3308.0, "completions/mean_length": 964.8482666015625, "completions/mean_terminated_length": 602.211669921875, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 14.457142857142857, "grad_norm": 0.15574316680431366, "learning_rate": 1e-06, "loss": -0.0396, "num_tokens": 902680385.0, "reward": 0.6595982313156128, "reward_std": 0.11707949638366699, "rewards/verify_math_reward/mean": 0.6595982313156128, "rewards/verify_math_reward/std": 0.4741089344024658, "step": 1547 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1350446428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3254.0, "completions/mean_length": 1123.61279296875, "completions/mean_terminated_length": 659.5368041992188, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 14.466472303206997, "grad_norm": 0.14102943241596222, "learning_rate": 1e-06, "loss": -0.0413, "num_tokens": 903293982.0, "reward": 0.6729910969734192, "reward_std": 0.11742466688156128, "rewards/verify_math_reward/mean": 0.6729910969734192, "rewards/verify_math_reward/std": 0.46938255429267883, "step": 1548 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1060267857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3676.0, "completions/mean_length": 997.1295166015625, "completions/mean_terminated_length": 629.5979614257812, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 14.475801749271136, "grad_norm": 0.18004238605499268, "learning_rate": 1e-06, "loss": -0.0886, "num_tokens": 903879570.0, "reward": 0.7131696939468384, "reward_std": 0.14571714401245117, "rewards/verify_math_reward/mean": 0.7131696343421936, "rewards/verify_math_reward/std": 0.4525342583656311, "step": 1549 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1037946428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3689.0, "completions/mean_length": 988.8761596679688, "completions/mean_terminated_length": 629.0223999023438, "completions/min_length": 204.0, "completions/min_terminated_length": 204.0, "epoch": 14.485131195335278, "grad_norm": 0.15630412101745605, "learning_rate": 1e-06, "loss": -0.0411, "num_tokens": 904483755.0, "reward": 0.7031250596046448, "reward_std": 0.12430264800786972, "rewards/verify_math_reward/mean": 0.703125, "rewards/verify_math_reward/std": 0.4571361541748047, "step": 1550 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1049107142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3797.0, "completions/mean_length": 998.4922485351562, "completions/mean_terminated_length": 635.4426879882812, "completions/min_length": 166.0, "completions/min_terminated_length": 166.0, "epoch": 14.494460641399417, "grad_norm": 0.14220628142356873, "learning_rate": 1e-06, "loss": -0.0558, "num_tokens": 905078524.0, "reward": 0.707589328289032, "reward_std": 0.12910866737365723, "rewards/verify_math_reward/mean": 0.7075892686843872, "rewards/verify_math_reward/std": 0.45512402057647705, "step": 1551 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.140625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3811.0, "completions/mean_length": 1129.1217041015625, "completions/mean_terminated_length": 643.6324462890625, "completions/min_length": 153.0, "completions/min_terminated_length": 153.0, "epoch": 14.503790087463557, "grad_norm": 0.15278203785419464, "learning_rate": 1e-06, "loss": -0.0466, "num_tokens": 905668137.0, "reward": 0.6729910969734192, "reward_std": 0.12497483193874359, "rewards/verify_math_reward/mean": 0.6729910969734192, "rewards/verify_math_reward/std": 0.46938255429267883, "step": 1552 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1350446428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3005.0, "completions/mean_length": 1088.0123291015625, "completions/mean_terminated_length": 618.3781127929688, "completions/min_length": 137.0, "completions/min_terminated_length": 137.0, "epoch": 14.513119533527696, "grad_norm": 0.16624118387699127, "learning_rate": 1e-06, "loss": -0.0698, "num_tokens": 906236044.0, "reward": 0.6953125596046448, "reward_std": 0.1433524787425995, "rewards/verify_math_reward/mean": 0.6953125, "rewards/verify_math_reward/std": 0.4605320394039154, "step": 1553 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1417410714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2782.0, "completions/mean_length": 1128.622802734375, "completions/mean_terminated_length": 638.561767578125, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 14.522448979591836, "grad_norm": 0.16740426421165466, "learning_rate": 1e-06, "loss": -0.0791, "num_tokens": 906817562.0, "reward": 0.6584821939468384, "reward_std": 0.16070660948753357, "rewards/verify_math_reward/mean": 0.6584821343421936, "rewards/verify_math_reward/std": 0.4744836091995239, "step": 1554 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1127232142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3801.0, "completions/mean_length": 1035.53466796875, "completions/mean_terminated_length": 646.7207641601562, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 14.531778425655977, "grad_norm": 0.14488689601421356, "learning_rate": 1e-06, "loss": -0.049, "num_tokens": 907430233.0, "reward": 0.6830357313156128, "reward_std": 0.09882382303476334, "rewards/verify_math_reward/mean": 0.6830357313156128, "rewards/verify_math_reward/std": 0.46555325388908386, "step": 1555 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1439732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3761.0, "completions/mean_length": 1152.173095703125, "completions/mean_terminated_length": 657.057373046875, "completions/min_length": 111.0, "completions/min_terminated_length": 111.0, "epoch": 14.541107871720117, "grad_norm": 0.1420653611421585, "learning_rate": 1e-06, "loss": -0.0542, "num_tokens": 908029540.0, "reward": 0.6473214626312256, "reward_std": 0.1204291433095932, "rewards/verify_math_reward/mean": 0.6473214030265808, "rewards/verify_math_reward/std": 0.47807058691978455, "step": 1556 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0904017857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2787.0, "completions/mean_length": 931.9989013671875, "completions/mean_terminated_length": 617.5398559570312, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 14.550437317784256, "grad_norm": 0.16833344101905823, "learning_rate": 1e-06, "loss": -0.0701, "num_tokens": 908619283.0, "reward": 0.7377232313156128, "reward_std": 0.15213686227798462, "rewards/verify_math_reward/mean": 0.7377232313156128, "rewards/verify_math_reward/std": 0.4401180148124695, "step": 1557 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0892857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4065.0, "completions/mean_length": 877.1685791015625, "completions/mean_terminated_length": 561.5968627929688, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 14.559766763848396, "grad_norm": 0.16206017136573792, "learning_rate": 1e-06, "loss": -0.0463, "num_tokens": 909156954.0, "reward": 0.7633928656578064, "reward_std": 0.11607100814580917, "rewards/verify_math_reward/mean": 0.7633928656578064, "rewards/verify_math_reward/std": 0.42523646354675293, "step": 1558 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.109375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3649.0, "completions/mean_length": 986.0670166015625, "completions/mean_terminated_length": 604.1453857421875, "completions/min_length": 174.0, "completions/min_terminated_length": 174.0, "epoch": 14.569096209912537, "grad_norm": 0.1452341228723526, "learning_rate": 1e-06, "loss": -0.0663, "num_tokens": 909729422.0, "reward": 0.6875000596046448, "reward_std": 0.12110455334186554, "rewards/verify_math_reward/mean": 0.6875, "rewards/verify_math_reward/std": 0.4637712836265564, "step": 1559 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1774553571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3493.0, "completions/mean_length": 1254.4453125, "completions/mean_terminated_length": 641.4097900390625, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 14.578425655976677, "grad_norm": 0.18581481277942657, "learning_rate": 1e-06, "loss": -0.044, "num_tokens": 910290309.0, "reward": 0.6551339626312256, "reward_std": 0.11114581674337387, "rewards/verify_math_reward/mean": 0.6551339030265808, "rewards/verify_math_reward/std": 0.4755900502204895, "step": 1560 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0959821428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3994.0, "completions/mean_length": 894.2522583007812, "completions/mean_terminated_length": 554.3135986328125, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 14.587755102040816, "grad_norm": 0.1467188447713852, "learning_rate": 1e-06, "loss": -0.0578, "num_tokens": 910826623.0, "reward": 0.770089328289032, "reward_std": 0.10212958604097366, "rewards/verify_math_reward/mean": 0.7700892686843872, "rewards/verify_math_reward/std": 0.42101022601127625, "step": 1561 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1060267857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3756.0, "completions/mean_length": 995.482177734375, "completions/mean_terminated_length": 627.7553100585938, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 14.597084548104956, "grad_norm": 0.15646781027317047, "learning_rate": 1e-06, "loss": -0.0431, "num_tokens": 911423431.0, "reward": 0.691964328289032, "reward_std": 0.13478271663188934, "rewards/verify_math_reward/mean": 0.6919642686843872, "rewards/verify_math_reward/std": 0.4619392454624176, "step": 1562 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1160714285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3537.0, "completions/mean_length": 1005.8582763671875, "completions/mean_terminated_length": 600.0820922851562, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 14.606413994169095, "grad_norm": 0.1558239459991455, "learning_rate": 1e-06, "loss": -0.0935, "num_tokens": 911984016.0, "reward": 0.7031250596046448, "reward_std": 0.13711389899253845, "rewards/verify_math_reward/mean": 0.703125, "rewards/verify_math_reward/std": 0.4571361541748047, "step": 1563 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1183035714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3643.0, "completions/mean_length": 1046.693115234375, "completions/mean_terminated_length": 637.5455932617188, "completions/min_length": 153.0, "completions/min_terminated_length": 153.0, "epoch": 14.615743440233237, "grad_norm": 0.16422036290168762, "learning_rate": 1e-06, "loss": -0.0585, "num_tokens": 912577133.0, "reward": 0.6897321939468384, "reward_std": 0.12749329209327698, "rewards/verify_math_reward/mean": 0.6897321343421936, "rewards/verify_math_reward/std": 0.4628615975379944, "step": 1564 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1049107142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3837.0, "completions/mean_length": 955.872802734375, "completions/mean_terminated_length": 587.8279418945312, "completions/min_length": 112.0, "completions/min_terminated_length": 112.0, "epoch": 14.625072886297376, "grad_norm": 0.16404128074645996, "learning_rate": 1e-06, "loss": -0.0553, "num_tokens": 913137131.0, "reward": 0.7500000596046448, "reward_std": 0.1285124570131302, "rewards/verify_math_reward/mean": 0.75, "rewards/verify_math_reward/std": 0.43325456976890564, "step": 1565 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1227678571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3736.0, "completions/mean_length": 1022.6160888671875, "completions/mean_terminated_length": 592.4987182617188, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 14.634402332361516, "grad_norm": 0.18624532222747803, "learning_rate": 1e-06, "loss": -0.0383, "num_tokens": 913687683.0, "reward": 0.7064732313156128, "reward_std": 0.11791320890188217, "rewards/verify_math_reward/mean": 0.7064732313156128, "rewards/verify_math_reward/std": 0.4556320011615753, "step": 1566 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0825892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3080.0, "completions/mean_length": 856.8516235351562, "completions/mean_terminated_length": 565.2493896484375, "completions/min_length": 174.0, "completions/min_terminated_length": 174.0, "epoch": 14.643731778425655, "grad_norm": 0.14966975152492523, "learning_rate": 1e-06, "loss": -0.0118, "num_tokens": 914248390.0, "reward": 0.7477678656578064, "reward_std": 0.09003441780805588, "rewards/verify_math_reward/mean": 0.7477678656578064, "rewards/verify_math_reward/std": 0.4345363676548004, "step": 1567 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1417410714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3904.0, "completions/mean_length": 1132.591552734375, "completions/mean_terminated_length": 643.1859741210938, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 14.653061224489797, "grad_norm": 0.19849668443202972, "learning_rate": 1e-06, "loss": -0.0756, "num_tokens": 914825608.0, "reward": 0.707589328289032, "reward_std": 0.15149927139282227, "rewards/verify_math_reward/mean": 0.7075892686843872, "rewards/verify_math_reward/std": 0.45512402057647705, "step": 1568 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1261160714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3862.0, "completions/mean_length": 1101.75341796875, "completions/mean_terminated_length": 669.6334228515625, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 14.662390670553936, "grad_norm": 0.15468859672546387, "learning_rate": 1e-06, "loss": -0.0713, "num_tokens": 915450907.0, "reward": 0.660714328289032, "reward_std": 0.13508442044258118, "rewards/verify_math_reward/mean": 0.6607142686843872, "rewards/verify_math_reward/std": 0.4737313687801361, "step": 1569 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1138392857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3328.0, "completions/mean_length": 1000.5625610351562, "completions/mean_terminated_length": 602.9118041992188, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 14.671720116618076, "grad_norm": 0.1583516001701355, "learning_rate": 1e-06, "loss": -0.0331, "num_tokens": 916016699.0, "reward": 0.6852678656578064, "reward_std": 0.11791251599788666, "rewards/verify_math_reward/mean": 0.6852678656578064, "rewards/verify_math_reward/std": 0.4646684527397156, "step": 1570 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0926339285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3829.0, "completions/mean_length": 940.700927734375, "completions/mean_terminated_length": 618.5731811523438, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 14.681049562682215, "grad_norm": 0.15199647843837738, "learning_rate": 1e-06, "loss": -0.0362, "num_tokens": 916611887.0, "reward": 0.7243303656578064, "reward_std": 0.11580956727266312, "rewards/verify_math_reward/mean": 0.7243303656578064, "rewards/verify_math_reward/std": 0.4471006691455841, "step": 1571 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1294642857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3351.0, "completions/mean_length": 1022.9319458007812, "completions/mean_terminated_length": 565.9115600585938, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 14.690379008746355, "grad_norm": 0.1610073745250702, "learning_rate": 1e-06, "loss": -0.0395, "num_tokens": 917150794.0, "reward": 0.6863839626312256, "reward_std": 0.12512819468975067, "rewards/verify_math_reward/mean": 0.6863839030265808, "rewards/verify_math_reward/std": 0.4642214775085449, "step": 1572 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1350446428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3761.0, "completions/mean_length": 1157.2054443359375, "completions/mean_terminated_length": 698.3742065429688, "completions/min_length": 122.0, "completions/min_terminated_length": 122.0, "epoch": 14.699708454810496, "grad_norm": 0.19162149727344513, "learning_rate": 1e-06, "loss": -0.0787, "num_tokens": 917783274.0, "reward": 0.578125, "reward_std": 0.18994270265102386, "rewards/verify_math_reward/mean": 0.578125, "rewards/verify_math_reward/std": 0.4941346049308777, "step": 1573 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1082589285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3203.0, "completions/mean_length": 959.8817138671875, "completions/mean_terminated_length": 579.1514282226562, "completions/min_length": 104.0, "completions/min_terminated_length": 104.0, "epoch": 14.709037900874636, "grad_norm": 0.16119384765625, "learning_rate": 1e-06, "loss": -0.0744, "num_tokens": 918333752.0, "reward": 0.723214328289032, "reward_std": 0.12069015204906464, "rewards/verify_math_reward/mean": 0.7232142686843872, "rewards/verify_math_reward/std": 0.44765952229499817, "step": 1574 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1060267857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3001.0, "completions/mean_length": 943.60498046875, "completions/mean_terminated_length": 569.725341796875, "completions/min_length": 115.0, "completions/min_terminated_length": 115.0, "epoch": 14.718367346938775, "grad_norm": 0.155434712767601, "learning_rate": 1e-06, "loss": -0.0396, "num_tokens": 918871678.0, "reward": 0.7343750596046448, "reward_std": 0.12125540524721146, "rewards/verify_math_reward/mean": 0.734375, "rewards/verify_math_reward/std": 0.44191211462020874, "step": 1575 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1261160714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3191.0, "completions/mean_length": 1045.1507568359375, "completions/mean_terminated_length": 604.862060546875, "completions/min_length": 107.0, "completions/min_terminated_length": 107.0, "epoch": 14.727696793002915, "grad_norm": 0.17898200452327728, "learning_rate": 1e-06, "loss": -0.0573, "num_tokens": 919451973.0, "reward": 0.676339328289032, "reward_std": 0.15406078100204468, "rewards/verify_math_reward/mean": 0.6763392686843872, "rewards/verify_math_reward/std": 0.4681335985660553, "step": 1576 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1049107142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3975.0, "completions/mean_length": 973.0402221679688, "completions/mean_terminated_length": 607.0075073242188, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 14.737026239067056, "grad_norm": 0.17064312100410461, "learning_rate": 1e-06, "loss": -0.0791, "num_tokens": 920038121.0, "reward": 0.7176339626312256, "reward_std": 0.13369613885879517, "rewards/verify_math_reward/mean": 0.7176339030265808, "rewards/verify_math_reward/std": 0.4504019320011139, "step": 1577 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0792410714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3654.0, "completions/mean_length": 840.5725708007812, "completions/mean_terminated_length": 560.4085083007812, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 14.746355685131196, "grad_norm": 0.18756458163261414, "learning_rate": 1e-06, "loss": -0.0492, "num_tokens": 920581010.0, "reward": 0.7500000596046448, "reward_std": 0.16217227280139923, "rewards/verify_math_reward/mean": 0.75, "rewards/verify_math_reward/std": 0.43325456976890564, "step": 1578 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0825892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3015.0, "completions/mean_length": 864.1495971679688, "completions/mean_terminated_length": 573.204345703125, "completions/min_length": 173.0, "completions/min_terminated_length": 173.0, "epoch": 14.755685131195335, "grad_norm": 0.1540251523256302, "learning_rate": 1e-06, "loss": -0.0542, "num_tokens": 921147272.0, "reward": 0.7611607313156128, "reward_std": 0.11565801501274109, "rewards/verify_math_reward/mean": 0.7611607313156128, "rewards/verify_math_reward/std": 0.4266124963760376, "step": 1579 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1004464285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2541.0, "completions/mean_length": 919.21435546875, "completions/mean_terminated_length": 564.486328125, "completions/min_length": 167.0, "completions/min_terminated_length": 167.0, "epoch": 14.765014577259475, "grad_norm": 0.14839625358581543, "learning_rate": 1e-06, "loss": -0.06, "num_tokens": 921693040.0, "reward": 0.723214328289032, "reward_std": 0.11637409776449203, "rewards/verify_math_reward/mean": 0.7232142686843872, "rewards/verify_math_reward/std": 0.44765952229499817, "step": 1580 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1361607142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3859.0, "completions/mean_length": 1147.1763916015625, "completions/mean_terminated_length": 682.3746948242188, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 14.774344023323614, "grad_norm": 0.18178090453147888, "learning_rate": 1e-06, "loss": -0.0747, "num_tokens": 922322734.0, "reward": 0.6116071939468384, "reward_std": 0.15785479545593262, "rewards/verify_math_reward/mean": 0.6116071343421936, "rewards/verify_math_reward/std": 0.4876568913459778, "step": 1581 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0970982142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3057.0, "completions/mean_length": 933.2076416015625, "completions/mean_terminated_length": 593.080322265625, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 14.783673469387756, "grad_norm": 0.1612182855606079, "learning_rate": 1e-06, "loss": -0.0352, "num_tokens": 922894904.0, "reward": 0.652901828289032, "reward_std": 0.11829410493373871, "rewards/verify_math_reward/mean": 0.6529017686843872, "rewards/verify_math_reward/std": 0.47631317377090454, "step": 1582 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1149553571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 2877.0, "completions/mean_length": 955.8984985351562, "completions/mean_terminated_length": 548.0416259765625, "completions/min_length": 178.0, "completions/min_terminated_length": 178.0, "epoch": 14.793002915451895, "grad_norm": 0.15704992413520813, "learning_rate": 1e-06, "loss": -0.0651, "num_tokens": 923414341.0, "reward": 0.7656250596046448, "reward_std": 0.11032027006149292, "rewards/verify_math_reward/mean": 0.765625, "rewards/verify_math_reward/std": 0.4238441288471222, "step": 1583 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1049107142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3532.0, "completions/mean_length": 976.3750610351562, "completions/mean_terminated_length": 610.7332153320312, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 14.802332361516035, "grad_norm": 0.15128548443317413, "learning_rate": 1e-06, "loss": -0.0721, "num_tokens": 923990133.0, "reward": 0.6718750596046448, "reward_std": 0.12974917888641357, "rewards/verify_math_reward/mean": 0.671875, "rewards/verify_math_reward/std": 0.46979284286499023, "step": 1584 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1272321428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3154.0, "completions/mean_length": 1109.7176513671875, "completions/mean_terminated_length": 674.3772583007812, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 14.811661807580174, "grad_norm": 0.15887951850891113, "learning_rate": 1e-06, "loss": -0.0843, "num_tokens": 924613488.0, "reward": 0.660714328289032, "reward_std": 0.14684438705444336, "rewards/verify_math_reward/mean": 0.6607142686843872, "rewards/verify_math_reward/std": 0.4737313687801361, "step": 1585 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1183035714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3648.0, "completions/mean_length": 1017.9699096679688, "completions/mean_terminated_length": 604.9683837890625, "completions/min_length": 167.0, "completions/min_terminated_length": 167.0, "epoch": 14.820991253644316, "grad_norm": 0.14667904376983643, "learning_rate": 1e-06, "loss": -0.0426, "num_tokens": 925184069.0, "reward": 0.691964328289032, "reward_std": 0.1028788611292839, "rewards/verify_math_reward/mean": 0.6919642686843872, "rewards/verify_math_reward/std": 0.4619392454624176, "step": 1586 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1361607142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3848.0, "completions/mean_length": 1110.227783203125, "completions/mean_terminated_length": 639.6021118164062, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 14.830320699708455, "grad_norm": 0.15516255795955658, "learning_rate": 1e-06, "loss": -0.0392, "num_tokens": 925764297.0, "reward": 0.6640625, "reward_std": 0.134141206741333, "rewards/verify_math_reward/mean": 0.6640625, "rewards/verify_math_reward/std": 0.4725809693336487, "step": 1587 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0837053571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 2970.0, "completions/mean_length": 851.1964721679688, "completions/mean_terminated_length": 554.777099609375, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 14.839650145772595, "grad_norm": 0.15170130133628845, "learning_rate": 1e-06, "loss": -0.0872, "num_tokens": 926312313.0, "reward": 0.7477678656578064, "reward_std": 0.13301284611225128, "rewards/verify_math_reward/mean": 0.7477678656578064, "rewards/verify_math_reward/std": 0.4345363676548004, "step": 1588 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.109375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3927.0, "completions/mean_length": 1040.1239013671875, "completions/mean_terminated_length": 664.8408203125, "completions/min_length": 112.0, "completions/min_terminated_length": 112.0, "epoch": 14.848979591836734, "grad_norm": 0.14962172508239746, "learning_rate": 1e-06, "loss": -0.0603, "num_tokens": 926931664.0, "reward": 0.6640625, "reward_std": 0.14266708493232727, "rewards/verify_math_reward/mean": 0.6640625, "rewards/verify_math_reward/std": 0.4725809693336487, "step": 1589 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1227678571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3939.0, "completions/mean_length": 1059.352783203125, "completions/mean_terminated_length": 634.3765869140625, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 14.858309037900874, "grad_norm": 0.16940048336982727, "learning_rate": 1e-06, "loss": -0.0373, "num_tokens": 927523068.0, "reward": 0.6640625, "reward_std": 0.1179899051785469, "rewards/verify_math_reward/mean": 0.6640625, "rewards/verify_math_reward/std": 0.4725809693336487, "step": 1590 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1026785714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2574.0, "completions/mean_length": 929.1964721679688, "completions/mean_terminated_length": 566.8258666992188, "completions/min_length": 120.0, "completions/min_terminated_length": 120.0, "epoch": 14.867638483965015, "grad_norm": 0.17912112176418304, "learning_rate": 1e-06, "loss": -0.0608, "num_tokens": 928067012.0, "reward": 0.7176339626312256, "reward_std": 0.149361714720726, "rewards/verify_math_reward/mean": 0.7176339030265808, "rewards/verify_math_reward/std": 0.4504019320011139, "step": 1591 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0736607142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2960.0, "completions/mean_length": 809.8939819335938, "completions/mean_terminated_length": 548.5891723632812, "completions/min_length": 137.0, "completions/min_terminated_length": 137.0, "epoch": 14.876967930029155, "grad_norm": 0.15511579811573029, "learning_rate": 1e-06, "loss": -0.051, "num_tokens": 928610173.0, "reward": 0.785714328289032, "reward_std": 0.10867056250572205, "rewards/verify_math_reward/mean": 0.7857142686843872, "rewards/verify_math_reward/std": 0.4105550944805145, "step": 1592 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1305803571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3541.0, "completions/mean_length": 1074.6038818359375, "completions/mean_terminated_length": 620.8126220703125, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 14.886297376093294, "grad_norm": 0.2319992035627365, "learning_rate": 1e-06, "loss": -0.0489, "num_tokens": 929190018.0, "reward": 0.6696428656578064, "reward_std": 0.13873010873794556, "rewards/verify_math_reward/mean": 0.6696428656578064, "rewards/verify_math_reward/std": 0.47060438990592957, "step": 1593 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1261160714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3720.0, "completions/mean_length": 1005.6038208007812, "completions/mean_terminated_length": 559.60791015625, "completions/min_length": 117.0, "completions/min_terminated_length": 117.0, "epoch": 14.895626822157434, "grad_norm": 0.12457224726676941, "learning_rate": 1e-06, "loss": -0.0491, "num_tokens": 929720471.0, "reward": 0.660714328289032, "reward_std": 0.06707222014665604, "rewards/verify_math_reward/mean": 0.6607142686843872, "rewards/verify_math_reward/std": 0.4737313687801361, "step": 1594 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1183035714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 4086.0, "completions/mean_length": 1016.7779541015625, "completions/mean_terminated_length": 603.616455078125, "completions/min_length": 171.0, "completions/min_terminated_length": 171.0, "epoch": 14.904956268221575, "grad_norm": 0.1526627540588379, "learning_rate": 1e-06, "loss": -0.0253, "num_tokens": 930287960.0, "reward": 0.684151828289032, "reward_std": 0.11501792818307877, "rewards/verify_math_reward/mean": 0.6841517686843872, "rewards/verify_math_reward/std": 0.4651124179363251, "step": 1595 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0915178571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3631.0, "completions/mean_length": 953.036865234375, "completions/mean_terminated_length": 636.423828125, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 14.914285714285715, "grad_norm": 0.15698200464248657, "learning_rate": 1e-06, "loss": -0.0768, "num_tokens": 930891705.0, "reward": 0.7131696939468384, "reward_std": 0.1341080218553543, "rewards/verify_math_reward/mean": 0.7131696343421936, "rewards/verify_math_reward/std": 0.4525342881679535, "step": 1596 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1026785714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2859.0, "completions/mean_length": 901.86279296875, "completions/mean_terminated_length": 536.3644409179688, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 14.923615160349854, "grad_norm": 0.1555425226688385, "learning_rate": 1e-06, "loss": -0.0527, "num_tokens": 931404622.0, "reward": 0.762276828289032, "reward_std": 0.096761554479599, "rewards/verify_math_reward/mean": 0.7622767686843872, "rewards/verify_math_reward/std": 0.42592647671699524, "step": 1597 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1127232142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3426.0, "completions/mean_length": 1003.1194458007812, "completions/mean_terminated_length": 610.1874389648438, "completions/min_length": 118.0, "completions/min_terminated_length": 118.0, "epoch": 14.932944606413994, "grad_norm": 0.1556512713432312, "learning_rate": 1e-06, "loss": -0.0816, "num_tokens": 931973705.0, "reward": 0.7020089626312256, "reward_std": 0.1509779393672943, "rewards/verify_math_reward/mean": 0.7020089030265808, "rewards/verify_math_reward/std": 0.45763099193573, "step": 1598 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1205357142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3186.0, "completions/mean_length": 1009.4576416015625, "completions/mean_terminated_length": 586.4288940429688, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 14.942274052478133, "grad_norm": 0.14617133140563965, "learning_rate": 1e-06, "loss": -0.0453, "num_tokens": 932528283.0, "reward": 0.699776828289032, "reward_std": 0.10595890879631042, "rewards/verify_math_reward/mean": 0.6997767686843872, "rewards/verify_math_reward/std": 0.4586109220981598, "step": 1599 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1506696428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3293.0, "completions/mean_length": 1177.529052734375, "completions/mean_terminated_length": 659.797607421875, "completions/min_length": 193.0, "completions/min_terminated_length": 193.0, "epoch": 14.951603498542275, "grad_norm": 0.1690528392791748, "learning_rate": 1e-06, "loss": -0.0656, "num_tokens": 933130789.0, "reward": 0.606026828289032, "reward_std": 0.14586800336837769, "rewards/verify_math_reward/mean": 0.6060267686843872, "rewards/verify_math_reward/std": 0.48890194296836853, "step": 1600 }, { "epoch": 14.951603498542275, "step": 1600, "total_flos": 0.0, "train_loss": -0.0391429264701253, "train_runtime": 211229.3498, "train_samples_per_second": 6.787, "train_steps_per_second": 0.008 } ], "logging_steps": 1, "max_steps": 1600, "num_input_tokens_seen": 933130789, "num_train_epochs": 15, "save_steps": 80, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }